1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/callb.h> 73 #include <sys/vlan.h> 74 75 /* Port add/deletion/etc routines */ 76 static int vsw_port_delete(vsw_port_t *port); 77 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 78 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 79 static int vsw_init_ldcs(vsw_port_t *port); 80 static int vsw_uninit_ldcs(vsw_port_t *port); 81 static int vsw_ldc_init(vsw_ldc_t *ldcp); 82 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 83 static int vsw_drain_ldcs(vsw_port_t *port); 84 static int vsw_drain_port_taskq(vsw_port_t *port); 85 static void vsw_marker_task(void *); 86 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 87 int vsw_detach_ports(vsw_t *vswp); 88 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 89 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 90 int vsw_port_detach(vsw_t *vswp, int p_instance); 91 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 92 int vsw_port_attach(vsw_port_t *portp); 93 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 94 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 95 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 96 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 97 98 /* Interrupt routines */ 99 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 100 101 /* Handshake routines */ 102 static void vsw_ldc_reinit(vsw_ldc_t *); 103 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 104 static void vsw_conn_task(void *); 105 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 106 static void vsw_next_milestone(vsw_ldc_t *); 107 static int vsw_supported_version(vio_ver_msg_t *); 108 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 109 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 110 111 /* Data processing routines */ 112 static void vsw_process_pkt(void *); 113 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 114 static void vsw_process_ctrl_pkt(void *); 115 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 122 uint32_t); 123 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 124 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 125 static void vsw_process_pkt_data(void *, void *, uint32_t); 126 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 127 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 128 129 /* Switching/data transmit routines */ 130 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 131 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 132 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 133 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 134 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 135 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 137 /* Packet creation routines */ 138 static void vsw_send_ver(void *); 139 static void vsw_send_attr(vsw_ldc_t *); 140 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 141 static void vsw_send_dring_info(vsw_ldc_t *); 142 static void vsw_send_rdx(vsw_ldc_t *); 143 144 /* Dring routines */ 145 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 146 static void vsw_create_privring(vsw_ldc_t *); 147 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 148 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 149 int *); 150 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 151 static int vsw_reclaim_dring(dring_info_t *dp, int start); 152 153 static void vsw_set_lane_attr(vsw_t *, lane_t *); 154 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 155 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 156 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 157 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 158 159 /* Rcv/Tx thread routines */ 160 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 161 static void vsw_ldc_tx_worker(void *arg); 162 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 163 static void vsw_ldc_rx_worker(void *arg); 164 165 /* Misc support routines */ 166 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 167 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 168 static void vsw_free_ring(dring_info_t *); 169 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 170 static int vsw_get_same_dest_list(struct ether_header *ehp, 171 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 172 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 173 174 /* Debugging routines */ 175 static void dump_flags(uint64_t); 176 static void display_state(void); 177 static void display_lane(lane_t *); 178 static void display_ring(dring_info_t *); 179 180 /* 181 * Functions imported from other files. 182 */ 183 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 184 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 185 extern void vsw_reconfig_hw(vsw_t *); 186 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 187 extern void vsw_del_mcst_port(vsw_port_t *port); 188 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 189 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 190 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 191 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 192 extern void vsw_create_vlans(void *arg, int type); 193 extern void vsw_destroy_vlans(void *arg, int type); 194 extern void vsw_vlan_add_ids(void *arg, int type); 195 extern void vsw_vlan_remove_ids(void *arg, int type); 196 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 197 struct ether_header *ehp, uint16_t *vidp); 198 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 199 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 200 mblk_t **npt); 201 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 202 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 203 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 204 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 205 extern void vsw_hio_stop_port(vsw_port_t *portp); 206 extern void vsw_publish_macaddr(vsw_t *vswp, uint8_t *addr); 207 208 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 209 210 /* 211 * Tunables used in this file. 212 */ 213 extern int vsw_num_handshakes; 214 extern int vsw_wretries; 215 extern int vsw_desc_delay; 216 extern int vsw_read_attempts; 217 extern int vsw_ldc_tx_delay; 218 extern int vsw_ldc_tx_retries; 219 extern boolean_t vsw_ldc_rxthr_enabled; 220 extern boolean_t vsw_ldc_txthr_enabled; 221 extern uint32_t vsw_ntxds; 222 extern uint32_t vsw_max_tx_qcount; 223 extern uint32_t vsw_chain_len; 224 extern uint32_t vsw_mblk_size1; 225 extern uint32_t vsw_mblk_size2; 226 extern uint32_t vsw_mblk_size3; 227 extern uint32_t vsw_num_mblks1; 228 extern uint32_t vsw_num_mblks2; 229 extern uint32_t vsw_num_mblks3; 230 extern boolean_t vsw_obp_ver_proto_workaround; 231 extern uint32_t vsw_publish_macaddr_count; 232 233 #define LDC_ENTER_LOCK(ldcp) \ 234 mutex_enter(&((ldcp)->ldc_cblock));\ 235 mutex_enter(&((ldcp)->ldc_rxlock));\ 236 mutex_enter(&((ldcp)->ldc_txlock)); 237 #define LDC_EXIT_LOCK(ldcp) \ 238 mutex_exit(&((ldcp)->ldc_txlock));\ 239 mutex_exit(&((ldcp)->ldc_rxlock));\ 240 mutex_exit(&((ldcp)->ldc_cblock)); 241 242 #define VSW_VER_EQ(ldcp, major, minor) \ 243 ((ldcp)->lane_out.ver_major == (major) && \ 244 (ldcp)->lane_out.ver_minor == (minor)) 245 246 #define VSW_VER_LT(ldcp, major, minor) \ 247 (((ldcp)->lane_out.ver_major < (major)) || \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor < (minor))) 250 251 #define VSW_VER_GTEQ(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major > (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor >= (minor))) 255 256 /* supported versions */ 257 static ver_sup_t vsw_versions[] = { {1, 3} }; 258 259 /* 260 * For the moment the state dump routines have their own 261 * private flag. 262 */ 263 #define DUMP_STATE 0 264 265 #if DUMP_STATE 266 267 #define DUMP_TAG(tag) \ 268 { \ 269 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 270 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 271 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 272 } 273 274 #define DUMP_TAG_PTR(tag) \ 275 { \ 276 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 277 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 278 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 279 } 280 281 #define DUMP_FLAGS(flags) dump_flags(flags); 282 #define DISPLAY_STATE() display_state() 283 284 #else 285 286 #define DUMP_TAG(tag) 287 #define DUMP_TAG_PTR(tag) 288 #define DUMP_FLAGS(state) 289 #define DISPLAY_STATE() 290 291 #endif /* DUMP_STATE */ 292 293 /* 294 * Attach the specified port. 295 * 296 * Returns 0 on success, 1 on failure. 297 */ 298 int 299 vsw_port_attach(vsw_port_t *port) 300 { 301 vsw_t *vswp = port->p_vswp; 302 vsw_port_list_t *plist = &vswp->plist; 303 vsw_port_t *p, **pp; 304 int i; 305 int nids = port->num_ldcs; 306 uint64_t *ldcids; 307 308 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 309 310 /* port already exists? */ 311 READ_ENTER(&plist->lockrw); 312 for (p = plist->head; p != NULL; p = p->p_next) { 313 if (p->p_instance == port->p_instance) { 314 DWARN(vswp, "%s: port instance %d already attached", 315 __func__, p->p_instance); 316 RW_EXIT(&plist->lockrw); 317 return (1); 318 } 319 } 320 RW_EXIT(&plist->lockrw); 321 322 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 323 324 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 325 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 326 327 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 328 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 329 port->state = VSW_PORT_INIT; 330 331 D2(vswp, "%s: %d nids", __func__, nids); 332 ldcids = port->ldc_ids; 333 for (i = 0; i < nids; i++) { 334 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 335 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 336 DERR(vswp, "%s: ldc_attach failed", __func__); 337 338 rw_destroy(&port->p_ldclist.lockrw); 339 340 cv_destroy(&port->state_cv); 341 mutex_destroy(&port->state_lock); 342 343 mutex_destroy(&port->tx_lock); 344 mutex_destroy(&port->mca_lock); 345 kmem_free(port, sizeof (vsw_port_t)); 346 return (1); 347 } 348 } 349 350 if (vswp->switching_setup_done == B_TRUE) { 351 /* 352 * If the underlying physical device has been setup, 353 * program the mac address of this port in it. 354 * Otherwise, port macaddr will be set after the physical 355 * device is successfully setup by the timeout handler. 356 */ 357 mutex_enter(&vswp->hw_lock); 358 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 359 mutex_exit(&vswp->hw_lock); 360 } 361 362 /* create the fdb entry for this port/mac address */ 363 vsw_fdbe_add(vswp, port); 364 365 vsw_create_vlans(port, VSW_VNETPORT); 366 367 WRITE_ENTER(&plist->lockrw); 368 369 /* link it into the list of ports for this vsw instance */ 370 pp = (vsw_port_t **)(&plist->head); 371 port->p_next = *pp; 372 *pp = port; 373 plist->num_ports++; 374 375 RW_EXIT(&plist->lockrw); 376 377 /* 378 * Initialise the port and any ldc's under it. 379 */ 380 (void) vsw_init_ldcs(port); 381 382 /* announce macaddr of vnet to the physical switch */ 383 if (vsw_publish_macaddr_count != 0) { /* enabled */ 384 vsw_publish_macaddr(vswp, (uint8_t *)&(port->p_macaddr)); 385 } 386 387 D1(vswp, "%s: exit", __func__); 388 return (0); 389 } 390 391 /* 392 * Detach the specified port. 393 * 394 * Returns 0 on success, 1 on failure. 395 */ 396 int 397 vsw_port_detach(vsw_t *vswp, int p_instance) 398 { 399 vsw_port_t *port = NULL; 400 vsw_port_list_t *plist = &vswp->plist; 401 402 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 403 404 WRITE_ENTER(&plist->lockrw); 405 406 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 407 RW_EXIT(&plist->lockrw); 408 return (1); 409 } 410 411 if (vsw_plist_del_node(vswp, port)) { 412 RW_EXIT(&plist->lockrw); 413 return (1); 414 } 415 416 /* cleanup any HybridIO for this port */ 417 vsw_hio_stop_port(port); 418 419 /* 420 * No longer need to hold writer lock on port list now 421 * that we have unlinked the target port from the list. 422 */ 423 RW_EXIT(&plist->lockrw); 424 425 /* Remove the fdb entry for this port/mac address */ 426 vsw_fdbe_del(vswp, &(port->p_macaddr)); 427 vsw_destroy_vlans(port, VSW_VNETPORT); 428 429 /* Remove any multicast addresses.. */ 430 vsw_del_mcst_port(port); 431 432 /* Remove address if was programmed into HW. */ 433 mutex_enter(&vswp->hw_lock); 434 435 /* 436 * Port's address may not have been set in hardware. This could 437 * happen if the underlying physical device is not yet available and 438 * vsw_setup_switching_timeout() may be in progress. 439 * We remove its addr from hardware only if it has been set before. 440 */ 441 if (port->addr_set != VSW_ADDR_UNSET) 442 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 443 444 if (vswp->recfg_reqd) 445 vsw_reconfig_hw(vswp); 446 447 mutex_exit(&vswp->hw_lock); 448 449 if (vsw_port_delete(port)) { 450 return (1); 451 } 452 453 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 454 return (0); 455 } 456 457 /* 458 * Detach all active ports. 459 * 460 * Returns 0 on success, 1 on failure. 461 */ 462 int 463 vsw_detach_ports(vsw_t *vswp) 464 { 465 vsw_port_list_t *plist = &vswp->plist; 466 vsw_port_t *port = NULL; 467 468 D1(vswp, "%s: enter", __func__); 469 470 WRITE_ENTER(&plist->lockrw); 471 472 while ((port = plist->head) != NULL) { 473 if (vsw_plist_del_node(vswp, port)) { 474 DERR(vswp, "%s: Error deleting port %d" 475 " from port list", __func__, port->p_instance); 476 RW_EXIT(&plist->lockrw); 477 return (1); 478 } 479 480 /* Remove address if was programmed into HW. */ 481 mutex_enter(&vswp->hw_lock); 482 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 483 mutex_exit(&vswp->hw_lock); 484 485 /* Remove the fdb entry for this port/mac address */ 486 vsw_fdbe_del(vswp, &(port->p_macaddr)); 487 vsw_destroy_vlans(port, VSW_VNETPORT); 488 489 /* Remove any multicast addresses.. */ 490 vsw_del_mcst_port(port); 491 492 /* 493 * No longer need to hold the lock on the port list 494 * now that we have unlinked the target port from the 495 * list. 496 */ 497 RW_EXIT(&plist->lockrw); 498 if (vsw_port_delete(port)) { 499 DERR(vswp, "%s: Error deleting port %d", 500 __func__, port->p_instance); 501 return (1); 502 } 503 WRITE_ENTER(&plist->lockrw); 504 } 505 RW_EXIT(&plist->lockrw); 506 507 D1(vswp, "%s: exit", __func__); 508 509 return (0); 510 } 511 512 /* 513 * Delete the specified port. 514 * 515 * Returns 0 on success, 1 on failure. 516 */ 517 static int 518 vsw_port_delete(vsw_port_t *port) 519 { 520 vsw_ldc_list_t *ldcl; 521 vsw_t *vswp = port->p_vswp; 522 int num_ldcs; 523 524 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 525 526 (void) vsw_uninit_ldcs(port); 527 528 /* 529 * Wait for any pending ctrl msg tasks which reference this 530 * port to finish. 531 */ 532 if (vsw_drain_port_taskq(port)) 533 return (1); 534 535 /* 536 * Wait for any active callbacks to finish 537 */ 538 if (vsw_drain_ldcs(port)) 539 return (1); 540 541 ldcl = &port->p_ldclist; 542 num_ldcs = port->num_ldcs; 543 WRITE_ENTER(&ldcl->lockrw); 544 while (num_ldcs > 0) { 545 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 546 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 547 vswp->instance, ldcl->head->ldc_id); 548 RW_EXIT(&ldcl->lockrw); 549 port->num_ldcs = num_ldcs; 550 return (1); 551 } 552 num_ldcs--; 553 } 554 RW_EXIT(&ldcl->lockrw); 555 556 rw_destroy(&port->p_ldclist.lockrw); 557 558 mutex_destroy(&port->mca_lock); 559 mutex_destroy(&port->tx_lock); 560 561 cv_destroy(&port->state_cv); 562 mutex_destroy(&port->state_lock); 563 564 if (port->num_ldcs != 0) { 565 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 566 port->num_ldcs = 0; 567 } 568 kmem_free(port, sizeof (vsw_port_t)); 569 570 D1(vswp, "%s: exit", __func__); 571 572 return (0); 573 } 574 575 /* 576 * Attach a logical domain channel (ldc) under a specified port. 577 * 578 * Returns 0 on success, 1 on failure. 579 */ 580 static int 581 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 582 { 583 vsw_t *vswp = port->p_vswp; 584 vsw_ldc_list_t *ldcl = &port->p_ldclist; 585 vsw_ldc_t *ldcp = NULL; 586 ldc_attr_t attr; 587 ldc_status_t istatus; 588 int status = DDI_FAILURE; 589 int rv; 590 char kname[MAXNAMELEN]; 591 enum { PROG_init = 0x0, PROG_mblks = 0x1, 592 PROG_callback = 0x2, PROG_rx_thread = 0x4, 593 PROG_tx_thread = 0x8} 594 progress; 595 596 progress = PROG_init; 597 598 D1(vswp, "%s: enter", __func__); 599 600 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 601 if (ldcp == NULL) { 602 DERR(vswp, "%s: kmem_zalloc failed", __func__); 603 return (1); 604 } 605 ldcp->ldc_id = ldc_id; 606 607 /* Allocate pools of receive mblks */ 608 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 609 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 610 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 611 if (rv) { 612 DWARN(vswp, "%s: unable to create free mblk pools for" 613 " channel %ld (rv %d)", __func__, ldc_id, rv); 614 kmem_free(ldcp, sizeof (vsw_ldc_t)); 615 return (1); 616 } 617 618 progress |= PROG_mblks; 619 620 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 621 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 622 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 623 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 624 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 625 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 626 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 627 628 /* required for handshake with peer */ 629 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 630 ldcp->peer_session = 0; 631 ldcp->session_status = 0; 632 ldcp->hss_id = 1; /* Initial handshake session id */ 633 634 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 635 636 /* only set for outbound lane, inbound set by peer */ 637 vsw_set_lane_attr(vswp, &ldcp->lane_out); 638 639 attr.devclass = LDC_DEV_NT_SVC; 640 attr.instance = ddi_get_instance(vswp->dip); 641 attr.mode = LDC_MODE_UNRELIABLE; 642 attr.mtu = VSW_LDC_MTU; 643 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 644 if (status != 0) { 645 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 646 __func__, ldc_id, status); 647 goto ldc_attach_fail; 648 } 649 650 if (vsw_ldc_rxthr_enabled) { 651 ldcp->rx_thr_flags = 0; 652 653 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 654 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 655 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 656 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 657 658 progress |= PROG_rx_thread; 659 if (ldcp->rx_thread == NULL) { 660 DWARN(vswp, "%s(%lld): Failed to create worker thread", 661 __func__, ldc_id); 662 goto ldc_attach_fail; 663 } 664 } 665 666 if (vsw_ldc_txthr_enabled) { 667 ldcp->tx_thr_flags = 0; 668 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 669 670 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 671 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 672 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 673 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 674 675 progress |= PROG_tx_thread; 676 if (ldcp->tx_thread == NULL) { 677 DWARN(vswp, "%s(%lld): Failed to create worker thread", 678 __func__, ldc_id); 679 goto ldc_attach_fail; 680 } 681 } 682 683 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 684 if (status != 0) { 685 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 686 __func__, ldc_id, status); 687 (void) ldc_fini(ldcp->ldc_handle); 688 goto ldc_attach_fail; 689 } 690 /* 691 * allocate a message for ldc_read()s, big enough to hold ctrl and 692 * data msgs, including raw data msgs used to recv priority frames. 693 */ 694 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 695 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 696 697 progress |= PROG_callback; 698 699 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 700 701 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 702 DERR(vswp, "%s: ldc_status failed", __func__); 703 mutex_destroy(&ldcp->status_lock); 704 goto ldc_attach_fail; 705 } 706 707 ldcp->ldc_status = istatus; 708 ldcp->ldc_port = port; 709 ldcp->ldc_vswp = vswp; 710 711 vsw_reset_vnet_proto_ops(ldcp); 712 713 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 714 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 715 kname, &ldcp->ldc_stats); 716 if (ldcp->ksp == NULL) { 717 DERR(vswp, "%s: kstats setup failed", __func__); 718 goto ldc_attach_fail; 719 } 720 721 /* link it into the list of channels for this port */ 722 WRITE_ENTER(&ldcl->lockrw); 723 ldcp->ldc_next = ldcl->head; 724 ldcl->head = ldcp; 725 RW_EXIT(&ldcl->lockrw); 726 727 D1(vswp, "%s: exit", __func__); 728 return (0); 729 730 ldc_attach_fail: 731 732 if (progress & PROG_callback) { 733 (void) ldc_unreg_callback(ldcp->ldc_handle); 734 kmem_free(ldcp->ldcmsg, ldcp->msglen); 735 } 736 737 if (progress & PROG_rx_thread) { 738 if (ldcp->rx_thread != NULL) { 739 vsw_stop_rx_thread(ldcp); 740 } 741 mutex_destroy(&ldcp->rx_thr_lock); 742 cv_destroy(&ldcp->rx_thr_cv); 743 } 744 745 if (progress & PROG_tx_thread) { 746 if (ldcp->tx_thread != NULL) { 747 vsw_stop_tx_thread(ldcp); 748 } 749 mutex_destroy(&ldcp->tx_thr_lock); 750 cv_destroy(&ldcp->tx_thr_cv); 751 } 752 if (ldcp->ksp != NULL) { 753 vgen_destroy_kstats(ldcp->ksp); 754 } 755 mutex_destroy(&ldcp->ldc_txlock); 756 mutex_destroy(&ldcp->ldc_rxlock); 757 mutex_destroy(&ldcp->ldc_cblock); 758 mutex_destroy(&ldcp->drain_cv_lock); 759 760 cv_destroy(&ldcp->drain_cv); 761 762 rw_destroy(&ldcp->lane_in.dlistrw); 763 rw_destroy(&ldcp->lane_out.dlistrw); 764 765 if (progress & PROG_mblks) { 766 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 767 } 768 kmem_free(ldcp, sizeof (vsw_ldc_t)); 769 770 return (1); 771 } 772 773 /* 774 * Detach a logical domain channel (ldc) belonging to a 775 * particular port. 776 * 777 * Returns 0 on success, 1 on failure. 778 */ 779 static int 780 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 781 { 782 vsw_t *vswp = port->p_vswp; 783 vsw_ldc_t *ldcp, *prev_ldcp; 784 vsw_ldc_list_t *ldcl = &port->p_ldclist; 785 int rv; 786 787 prev_ldcp = ldcl->head; 788 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 789 if (ldcp->ldc_id == ldc_id) { 790 break; 791 } 792 } 793 794 /* specified ldc id not found */ 795 if (ldcp == NULL) { 796 DERR(vswp, "%s: ldcp = NULL", __func__); 797 return (1); 798 } 799 800 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 801 802 /* Stop the receive thread */ 803 if (ldcp->rx_thread != NULL) { 804 vsw_stop_rx_thread(ldcp); 805 mutex_destroy(&ldcp->rx_thr_lock); 806 cv_destroy(&ldcp->rx_thr_cv); 807 } 808 kmem_free(ldcp->ldcmsg, ldcp->msglen); 809 810 /* Stop the tx thread */ 811 if (ldcp->tx_thread != NULL) { 812 vsw_stop_tx_thread(ldcp); 813 mutex_destroy(&ldcp->tx_thr_lock); 814 cv_destroy(&ldcp->tx_thr_cv); 815 if (ldcp->tx_mhead != NULL) { 816 freemsgchain(ldcp->tx_mhead); 817 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 818 ldcp->tx_cnt = 0; 819 } 820 } 821 822 /* Destory kstats */ 823 vgen_destroy_kstats(ldcp->ksp); 824 825 /* 826 * Before we can close the channel we must release any mapped 827 * resources (e.g. drings). 828 */ 829 vsw_free_lane_resources(ldcp, INBOUND); 830 vsw_free_lane_resources(ldcp, OUTBOUND); 831 832 /* 833 * If the close fails we are in serious trouble, as won't 834 * be able to delete the parent port. 835 */ 836 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 837 DERR(vswp, "%s: error %d closing channel %lld", 838 __func__, rv, ldcp->ldc_id); 839 return (1); 840 } 841 842 (void) ldc_fini(ldcp->ldc_handle); 843 844 ldcp->ldc_status = LDC_INIT; 845 ldcp->ldc_handle = NULL; 846 ldcp->ldc_vswp = NULL; 847 848 849 /* 850 * Most likely some mblks are still in use and 851 * have not been returned to the pool. These mblks are 852 * added to the pool that is maintained in the device instance. 853 * Another attempt will be made to destroy the pool 854 * when the device detaches. 855 */ 856 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 857 858 /* unlink it from the list */ 859 prev_ldcp = ldcp->ldc_next; 860 861 mutex_destroy(&ldcp->ldc_txlock); 862 mutex_destroy(&ldcp->ldc_rxlock); 863 mutex_destroy(&ldcp->ldc_cblock); 864 cv_destroy(&ldcp->drain_cv); 865 mutex_destroy(&ldcp->drain_cv_lock); 866 mutex_destroy(&ldcp->status_lock); 867 rw_destroy(&ldcp->lane_in.dlistrw); 868 rw_destroy(&ldcp->lane_out.dlistrw); 869 870 kmem_free(ldcp, sizeof (vsw_ldc_t)); 871 872 return (0); 873 } 874 875 /* 876 * Open and attempt to bring up the channel. Note that channel 877 * can only be brought up if peer has also opened channel. 878 * 879 * Returns 0 if can open and bring up channel, otherwise 880 * returns 1. 881 */ 882 static int 883 vsw_ldc_init(vsw_ldc_t *ldcp) 884 { 885 vsw_t *vswp = ldcp->ldc_vswp; 886 ldc_status_t istatus = 0; 887 int rv; 888 889 D1(vswp, "%s: enter", __func__); 890 891 LDC_ENTER_LOCK(ldcp); 892 893 /* don't start at 0 in case clients don't like that */ 894 ldcp->next_ident = 1; 895 896 rv = ldc_open(ldcp->ldc_handle); 897 if (rv != 0) { 898 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 899 __func__, ldcp->ldc_id, rv); 900 LDC_EXIT_LOCK(ldcp); 901 return (1); 902 } 903 904 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 905 DERR(vswp, "%s: unable to get status", __func__); 906 LDC_EXIT_LOCK(ldcp); 907 return (1); 908 909 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 910 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 911 __func__, ldcp->ldc_id, istatus); 912 LDC_EXIT_LOCK(ldcp); 913 return (1); 914 } 915 916 mutex_enter(&ldcp->status_lock); 917 ldcp->ldc_status = istatus; 918 mutex_exit(&ldcp->status_lock); 919 920 rv = ldc_up(ldcp->ldc_handle); 921 if (rv != 0) { 922 /* 923 * Not a fatal error for ldc_up() to fail, as peer 924 * end point may simply not be ready yet. 925 */ 926 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 927 ldcp->ldc_id, rv); 928 LDC_EXIT_LOCK(ldcp); 929 return (1); 930 } 931 932 /* 933 * ldc_up() call is non-blocking so need to explicitly 934 * check channel status to see if in fact the channel 935 * is UP. 936 */ 937 mutex_enter(&ldcp->status_lock); 938 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 939 DERR(vswp, "%s: unable to get status", __func__); 940 mutex_exit(&ldcp->status_lock); 941 LDC_EXIT_LOCK(ldcp); 942 return (1); 943 944 } 945 946 if (ldcp->ldc_status == LDC_UP) { 947 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 948 ldcp->ldc_id, istatus); 949 mutex_exit(&ldcp->status_lock); 950 LDC_EXIT_LOCK(ldcp); 951 952 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 953 return (0); 954 } 955 956 mutex_exit(&ldcp->status_lock); 957 LDC_EXIT_LOCK(ldcp); 958 959 D1(vswp, "%s: exit", __func__); 960 return (0); 961 } 962 963 /* disable callbacks on the channel */ 964 static int 965 vsw_ldc_uninit(vsw_ldc_t *ldcp) 966 { 967 vsw_t *vswp = ldcp->ldc_vswp; 968 int rv; 969 970 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 971 972 LDC_ENTER_LOCK(ldcp); 973 974 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 975 if (rv != 0) { 976 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 977 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 978 LDC_EXIT_LOCK(ldcp); 979 return (1); 980 } 981 982 mutex_enter(&ldcp->status_lock); 983 ldcp->ldc_status = LDC_INIT; 984 mutex_exit(&ldcp->status_lock); 985 986 LDC_EXIT_LOCK(ldcp); 987 988 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 989 990 return (0); 991 } 992 993 static int 994 vsw_init_ldcs(vsw_port_t *port) 995 { 996 vsw_ldc_list_t *ldcl = &port->p_ldclist; 997 vsw_ldc_t *ldcp; 998 999 READ_ENTER(&ldcl->lockrw); 1000 ldcp = ldcl->head; 1001 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1002 (void) vsw_ldc_init(ldcp); 1003 } 1004 RW_EXIT(&ldcl->lockrw); 1005 1006 return (0); 1007 } 1008 1009 static int 1010 vsw_uninit_ldcs(vsw_port_t *port) 1011 { 1012 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1013 vsw_ldc_t *ldcp; 1014 1015 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1016 1017 READ_ENTER(&ldcl->lockrw); 1018 ldcp = ldcl->head; 1019 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1020 (void) vsw_ldc_uninit(ldcp); 1021 } 1022 RW_EXIT(&ldcl->lockrw); 1023 1024 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1025 1026 return (0); 1027 } 1028 1029 /* 1030 * Wait until the callback(s) associated with the ldcs under the specified 1031 * port have completed. 1032 * 1033 * Prior to this function being invoked each channel under this port 1034 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1035 * 1036 * A short explaination of what we are doing below.. 1037 * 1038 * The simplest approach would be to have a reference counter in 1039 * the ldc structure which is increment/decremented by the callbacks as 1040 * they use the channel. The drain function could then simply disable any 1041 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1042 * there is a tiny window here - before the callback is able to get the lock 1043 * on the channel it is interrupted and this function gets to execute. It 1044 * sees that the ref count is zero and believes its free to delete the 1045 * associated data structures. 1046 * 1047 * We get around this by taking advantage of the fact that before the ldc 1048 * framework invokes a callback it sets a flag to indicate that there is a 1049 * callback active (or about to become active). If when we attempt to 1050 * unregister a callback when this active flag is set then the unregister 1051 * will fail with EWOULDBLOCK. 1052 * 1053 * If the unregister fails we do a cv_timedwait. We will either be signaled 1054 * by the callback as it is exiting (note we have to wait a short period to 1055 * allow the callback to return fully to the ldc framework and it to clear 1056 * the active flag), or by the timer expiring. In either case we again attempt 1057 * the unregister. We repeat this until we can succesfully unregister the 1058 * callback. 1059 * 1060 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1061 * the case where the callback has finished but the ldc framework has not yet 1062 * cleared the active flag. In this case we would never get a cv_signal. 1063 */ 1064 static int 1065 vsw_drain_ldcs(vsw_port_t *port) 1066 { 1067 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1068 vsw_ldc_t *ldcp; 1069 vsw_t *vswp = port->p_vswp; 1070 1071 D1(vswp, "%s: enter", __func__); 1072 1073 READ_ENTER(&ldcl->lockrw); 1074 1075 ldcp = ldcl->head; 1076 1077 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1078 /* 1079 * If we can unregister the channel callback then we 1080 * know that there is no callback either running or 1081 * scheduled to run for this channel so move on to next 1082 * channel in the list. 1083 */ 1084 mutex_enter(&ldcp->drain_cv_lock); 1085 1086 /* prompt active callbacks to quit */ 1087 ldcp->drain_state = VSW_LDC_DRAINING; 1088 1089 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1090 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1091 ldcp->ldc_id); 1092 mutex_exit(&ldcp->drain_cv_lock); 1093 continue; 1094 } else { 1095 /* 1096 * If we end up here we know that either 1) a callback 1097 * is currently executing, 2) is about to start (i.e. 1098 * the ldc framework has set the active flag but 1099 * has not actually invoked the callback yet, or 3) 1100 * has finished and has returned to the ldc framework 1101 * but the ldc framework has not yet cleared the 1102 * active bit. 1103 * 1104 * Wait for it to finish. 1105 */ 1106 while (ldc_unreg_callback(ldcp->ldc_handle) 1107 == EWOULDBLOCK) 1108 (void) cv_timedwait(&ldcp->drain_cv, 1109 &ldcp->drain_cv_lock, lbolt + hz); 1110 1111 mutex_exit(&ldcp->drain_cv_lock); 1112 D2(vswp, "%s: unreg callback for chan %ld after " 1113 "timeout", __func__, ldcp->ldc_id); 1114 } 1115 } 1116 RW_EXIT(&ldcl->lockrw); 1117 1118 D1(vswp, "%s: exit", __func__); 1119 return (0); 1120 } 1121 1122 /* 1123 * Wait until all tasks which reference this port have completed. 1124 * 1125 * Prior to this function being invoked each channel under this port 1126 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1127 */ 1128 static int 1129 vsw_drain_port_taskq(vsw_port_t *port) 1130 { 1131 vsw_t *vswp = port->p_vswp; 1132 1133 D1(vswp, "%s: enter", __func__); 1134 1135 /* 1136 * Mark the port as in the process of being detached, and 1137 * dispatch a marker task to the queue so we know when all 1138 * relevant tasks have completed. 1139 */ 1140 mutex_enter(&port->state_lock); 1141 port->state = VSW_PORT_DETACHING; 1142 1143 if ((vswp->taskq_p == NULL) || 1144 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1145 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1146 DERR(vswp, "%s: unable to dispatch marker task", 1147 __func__); 1148 mutex_exit(&port->state_lock); 1149 return (1); 1150 } 1151 1152 /* 1153 * Wait for the marker task to finish. 1154 */ 1155 while (port->state != VSW_PORT_DETACHABLE) 1156 cv_wait(&port->state_cv, &port->state_lock); 1157 1158 mutex_exit(&port->state_lock); 1159 1160 D1(vswp, "%s: exit", __func__); 1161 1162 return (0); 1163 } 1164 1165 static void 1166 vsw_marker_task(void *arg) 1167 { 1168 vsw_port_t *port = arg; 1169 vsw_t *vswp = port->p_vswp; 1170 1171 D1(vswp, "%s: enter", __func__); 1172 1173 mutex_enter(&port->state_lock); 1174 1175 /* 1176 * No further tasks should be dispatched which reference 1177 * this port so ok to mark it as safe to detach. 1178 */ 1179 port->state = VSW_PORT_DETACHABLE; 1180 1181 cv_signal(&port->state_cv); 1182 1183 mutex_exit(&port->state_lock); 1184 1185 D1(vswp, "%s: exit", __func__); 1186 } 1187 1188 vsw_port_t * 1189 vsw_lookup_port(vsw_t *vswp, int p_instance) 1190 { 1191 vsw_port_list_t *plist = &vswp->plist; 1192 vsw_port_t *port; 1193 1194 for (port = plist->head; port != NULL; port = port->p_next) { 1195 if (port->p_instance == p_instance) { 1196 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1197 return (port); 1198 } 1199 } 1200 1201 return (NULL); 1202 } 1203 1204 void 1205 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1206 { 1207 vsw_ldc_list_t *ldclp; 1208 vsw_ldc_t *ldcp; 1209 1210 ldclp = &portp->p_ldclist; 1211 1212 READ_ENTER(&ldclp->lockrw); 1213 1214 /* 1215 * NOTE: for now, we will assume we have a single channel. 1216 */ 1217 if (ldclp->head == NULL) { 1218 RW_EXIT(&ldclp->lockrw); 1219 return; 1220 } 1221 ldcp = ldclp->head; 1222 1223 mutex_enter(&ldcp->ldc_cblock); 1224 1225 /* 1226 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1227 * the connection. See comments in vsw_set_vnet_proto_ops(). 1228 */ 1229 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1230 portp->nvids != 0) { 1231 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1232 } 1233 1234 mutex_exit(&ldcp->ldc_cblock); 1235 1236 RW_EXIT(&ldclp->lockrw); 1237 } 1238 1239 void 1240 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1241 { 1242 vsw_ldc_list_t *ldclp; 1243 vsw_ldc_t *ldcp; 1244 1245 ldclp = &portp->p_ldclist; 1246 1247 READ_ENTER(&ldclp->lockrw); 1248 1249 /* 1250 * NOTE: for now, we will assume we have a single channel. 1251 */ 1252 if (ldclp->head == NULL) { 1253 RW_EXIT(&ldclp->lockrw); 1254 return; 1255 } 1256 ldcp = ldclp->head; 1257 1258 mutex_enter(&ldcp->ldc_cblock); 1259 1260 /* 1261 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1262 * to trigger re-negotiation, which inturn trigger HybridIO 1263 * setup/cleanup. 1264 */ 1265 if ((ldcp->hphase == VSW_MILESTONE4) && 1266 (portp->p_hio_capable == B_TRUE)) { 1267 if (immediate == B_TRUE) { 1268 (void) ldc_down(ldcp->ldc_handle); 1269 } else { 1270 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1271 } 1272 } 1273 1274 mutex_exit(&ldcp->ldc_cblock); 1275 1276 RW_EXIT(&ldclp->lockrw); 1277 } 1278 1279 /* 1280 * Search for and remove the specified port from the port 1281 * list. Returns 0 if able to locate and remove port, otherwise 1282 * returns 1. 1283 */ 1284 static int 1285 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1286 { 1287 vsw_port_list_t *plist = &vswp->plist; 1288 vsw_port_t *curr_p, *prev_p; 1289 1290 if (plist->head == NULL) 1291 return (1); 1292 1293 curr_p = prev_p = plist->head; 1294 1295 while (curr_p != NULL) { 1296 if (curr_p == port) { 1297 if (prev_p == curr_p) { 1298 plist->head = curr_p->p_next; 1299 } else { 1300 prev_p->p_next = curr_p->p_next; 1301 } 1302 plist->num_ports--; 1303 break; 1304 } else { 1305 prev_p = curr_p; 1306 curr_p = curr_p->p_next; 1307 } 1308 } 1309 return (0); 1310 } 1311 1312 /* 1313 * Interrupt handler for ldc messages. 1314 */ 1315 static uint_t 1316 vsw_ldc_cb(uint64_t event, caddr_t arg) 1317 { 1318 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1319 vsw_t *vswp = ldcp->ldc_vswp; 1320 1321 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1322 1323 mutex_enter(&ldcp->ldc_cblock); 1324 ldcp->ldc_stats.callbacks++; 1325 1326 mutex_enter(&ldcp->status_lock); 1327 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1328 mutex_exit(&ldcp->status_lock); 1329 mutex_exit(&ldcp->ldc_cblock); 1330 return (LDC_SUCCESS); 1331 } 1332 mutex_exit(&ldcp->status_lock); 1333 1334 if (event & LDC_EVT_UP) { 1335 /* 1336 * Channel has come up. 1337 */ 1338 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1339 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1340 1341 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1342 1343 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1344 } 1345 1346 if (event & LDC_EVT_READ) { 1347 /* 1348 * Data available for reading. 1349 */ 1350 D2(vswp, "%s: id(ld) event(%llx) data READ", 1351 __func__, ldcp->ldc_id, event); 1352 1353 if (ldcp->rx_thread != NULL) { 1354 /* 1355 * If the receive thread is enabled, then 1356 * wakeup the receive thread to process the 1357 * LDC messages. 1358 */ 1359 mutex_exit(&ldcp->ldc_cblock); 1360 mutex_enter(&ldcp->rx_thr_lock); 1361 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1362 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1363 cv_signal(&ldcp->rx_thr_cv); 1364 } 1365 mutex_exit(&ldcp->rx_thr_lock); 1366 mutex_enter(&ldcp->ldc_cblock); 1367 } else { 1368 vsw_process_pkt(ldcp); 1369 } 1370 1371 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1372 1373 goto vsw_cb_exit; 1374 } 1375 1376 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1377 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1378 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1379 1380 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1381 } 1382 1383 /* 1384 * Catch either LDC_EVT_WRITE which we don't support or any 1385 * unknown event. 1386 */ 1387 if (event & 1388 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1389 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1390 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1391 } 1392 1393 vsw_cb_exit: 1394 mutex_exit(&ldcp->ldc_cblock); 1395 1396 /* 1397 * Let the drain function know we are finishing if it 1398 * is waiting. 1399 */ 1400 mutex_enter(&ldcp->drain_cv_lock); 1401 if (ldcp->drain_state == VSW_LDC_DRAINING) 1402 cv_signal(&ldcp->drain_cv); 1403 mutex_exit(&ldcp->drain_cv_lock); 1404 1405 return (LDC_SUCCESS); 1406 } 1407 1408 /* 1409 * Reinitialise data structures associated with the channel. 1410 */ 1411 static void 1412 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1413 { 1414 vsw_t *vswp = ldcp->ldc_vswp; 1415 vsw_port_t *port; 1416 vsw_ldc_list_t *ldcl; 1417 1418 D1(vswp, "%s: enter", __func__); 1419 1420 port = ldcp->ldc_port; 1421 ldcl = &port->p_ldclist; 1422 1423 READ_ENTER(&ldcl->lockrw); 1424 1425 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1426 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1427 1428 vsw_free_lane_resources(ldcp, INBOUND); 1429 vsw_free_lane_resources(ldcp, OUTBOUND); 1430 RW_EXIT(&ldcl->lockrw); 1431 1432 ldcp->lane_in.lstate = 0; 1433 ldcp->lane_out.lstate = 0; 1434 1435 /* Remove the fdb entry for this port/mac address */ 1436 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1437 1438 /* remove the port from vlans it has been assigned to */ 1439 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1440 1441 /* 1442 * Remove parent port from any multicast groups 1443 * it may have registered with. Client must resend 1444 * multicast add command after handshake completes. 1445 */ 1446 vsw_del_mcst_port(port); 1447 1448 ldcp->peer_session = 0; 1449 ldcp->session_status = 0; 1450 ldcp->hcnt = 0; 1451 ldcp->hphase = VSW_MILESTONE0; 1452 1453 vsw_reset_vnet_proto_ops(ldcp); 1454 1455 D1(vswp, "%s: exit", __func__); 1456 } 1457 1458 /* 1459 * Process a connection event. 1460 * 1461 * Note - care must be taken to ensure that this function is 1462 * not called with the dlistrw lock held. 1463 */ 1464 static void 1465 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1466 { 1467 vsw_t *vswp = ldcp->ldc_vswp; 1468 vsw_conn_evt_t *conn = NULL; 1469 1470 D1(vswp, "%s: enter", __func__); 1471 1472 /* 1473 * Check if either a reset or restart event is pending 1474 * or in progress. If so just return. 1475 * 1476 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1477 * being received by the callback handler, or a ECONNRESET error 1478 * code being returned from a ldc_read() or ldc_write() call. 1479 * 1480 * A VSW_CONN_RESTART event occurs when some error checking code 1481 * decides that there is a problem with data from the channel, 1482 * and that the handshake should be restarted. 1483 */ 1484 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1485 (ldstub((uint8_t *)&ldcp->reset_active))) 1486 return; 1487 1488 /* 1489 * If it is an LDC_UP event we first check the recorded 1490 * state of the channel. If this is UP then we know that 1491 * the channel moving to the UP state has already been dealt 1492 * with and don't need to dispatch a new task. 1493 * 1494 * The reason for this check is that when we do a ldc_up(), 1495 * depending on the state of the peer, we may or may not get 1496 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1497 * every time we do ldc_up() we explicitly check the channel 1498 * status to see has it come up (ldc_up() is asynch and will 1499 * complete at some undefined time), and take the appropriate 1500 * action. 1501 * 1502 * The flip side of this is that we may get a LDC_UP event 1503 * when we have already seen that the channel is up and have 1504 * dealt with that. 1505 */ 1506 mutex_enter(&ldcp->status_lock); 1507 if (evt == VSW_CONN_UP) { 1508 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1509 mutex_exit(&ldcp->status_lock); 1510 return; 1511 } 1512 } 1513 mutex_exit(&ldcp->status_lock); 1514 1515 /* 1516 * The transaction group id allows us to identify and discard 1517 * any tasks which are still pending on the taskq and refer 1518 * to the handshake session we are about to restart or reset. 1519 * These stale messages no longer have any real meaning. 1520 */ 1521 (void) atomic_inc_32(&ldcp->hss_id); 1522 1523 ASSERT(vswp->taskq_p != NULL); 1524 1525 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1526 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1527 " connection event", vswp->instance); 1528 goto err_exit; 1529 } 1530 1531 conn->evt = evt; 1532 conn->ldcp = ldcp; 1533 1534 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1535 DDI_NOSLEEP) != DDI_SUCCESS) { 1536 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1537 vswp->instance); 1538 1539 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1540 goto err_exit; 1541 } 1542 1543 D1(vswp, "%s: exit", __func__); 1544 return; 1545 1546 err_exit: 1547 /* 1548 * Have mostly likely failed due to memory shortage. Clear the flag so 1549 * that future requests will at least be attempted and will hopefully 1550 * succeed. 1551 */ 1552 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1553 ldcp->reset_active = 0; 1554 } 1555 1556 /* 1557 * Deal with events relating to a connection. Invoked from a taskq. 1558 */ 1559 static void 1560 vsw_conn_task(void *arg) 1561 { 1562 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1563 vsw_ldc_t *ldcp = NULL; 1564 vsw_port_t *portp; 1565 vsw_t *vswp = NULL; 1566 uint16_t evt; 1567 ldc_status_t curr_status; 1568 1569 ldcp = conn->ldcp; 1570 evt = conn->evt; 1571 vswp = ldcp->ldc_vswp; 1572 portp = ldcp->ldc_port; 1573 1574 D1(vswp, "%s: enter", __func__); 1575 1576 /* can safely free now have copied out data */ 1577 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1578 1579 mutex_enter(&ldcp->status_lock); 1580 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1581 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1582 "channel %ld", vswp->instance, ldcp->ldc_id); 1583 mutex_exit(&ldcp->status_lock); 1584 return; 1585 } 1586 1587 /* 1588 * If we wish to restart the handshake on this channel, then if 1589 * the channel is UP we bring it DOWN to flush the underlying 1590 * ldc queue. 1591 */ 1592 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1593 (void) ldc_down(ldcp->ldc_handle); 1594 1595 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1596 vsw_hio_stop(vswp, ldcp); 1597 } 1598 1599 /* 1600 * re-init all the associated data structures. 1601 */ 1602 vsw_ldc_reinit(ldcp); 1603 1604 /* 1605 * Bring the channel back up (note it does no harm to 1606 * do this even if the channel is already UP, Just 1607 * becomes effectively a no-op). 1608 */ 1609 (void) ldc_up(ldcp->ldc_handle); 1610 1611 /* 1612 * Check if channel is now UP. This will only happen if 1613 * peer has also done a ldc_up(). 1614 */ 1615 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1616 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1617 "channel %ld", vswp->instance, ldcp->ldc_id); 1618 mutex_exit(&ldcp->status_lock); 1619 return; 1620 } 1621 1622 ldcp->ldc_status = curr_status; 1623 1624 /* channel UP so restart handshake by sending version info */ 1625 if (curr_status == LDC_UP) { 1626 if (ldcp->hcnt++ > vsw_num_handshakes) { 1627 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1628 " handshake attempts (%d) on channel %ld", 1629 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1630 mutex_exit(&ldcp->status_lock); 1631 return; 1632 } 1633 1634 if (vsw_obp_ver_proto_workaround == B_FALSE && 1635 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1636 DDI_NOSLEEP) != DDI_SUCCESS)) { 1637 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1638 vswp->instance); 1639 1640 /* 1641 * Don't count as valid restart attempt if couldn't 1642 * send version msg. 1643 */ 1644 if (ldcp->hcnt > 0) 1645 ldcp->hcnt--; 1646 } 1647 } 1648 1649 /* 1650 * Mark that the process is complete by clearing the flag. 1651 * 1652 * Note is it possible that the taskq dispatch above may have failed, 1653 * most likely due to memory shortage. We still clear the flag so 1654 * future attempts will at least be attempted and will hopefully 1655 * succeed. 1656 */ 1657 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1658 ldcp->reset_active = 0; 1659 1660 mutex_exit(&ldcp->status_lock); 1661 1662 D1(vswp, "%s: exit", __func__); 1663 } 1664 1665 /* 1666 * returns 0 if legal for event signified by flag to have 1667 * occured at the time it did. Otherwise returns 1. 1668 */ 1669 int 1670 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1671 { 1672 vsw_t *vswp = ldcp->ldc_vswp; 1673 uint64_t state; 1674 uint64_t phase; 1675 1676 if (dir == INBOUND) 1677 state = ldcp->lane_in.lstate; 1678 else 1679 state = ldcp->lane_out.lstate; 1680 1681 phase = ldcp->hphase; 1682 1683 switch (flag) { 1684 case VSW_VER_INFO_RECV: 1685 if (phase > VSW_MILESTONE0) { 1686 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1687 " when in state %d\n", ldcp->ldc_id, phase); 1688 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1689 return (1); 1690 } 1691 break; 1692 1693 case VSW_VER_ACK_RECV: 1694 case VSW_VER_NACK_RECV: 1695 if (!(state & VSW_VER_INFO_SENT)) { 1696 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1697 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1698 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1699 return (1); 1700 } else 1701 state &= ~VSW_VER_INFO_SENT; 1702 break; 1703 1704 case VSW_ATTR_INFO_RECV: 1705 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1706 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1707 " when in state %d\n", ldcp->ldc_id, phase); 1708 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1709 return (1); 1710 } 1711 break; 1712 1713 case VSW_ATTR_ACK_RECV: 1714 case VSW_ATTR_NACK_RECV: 1715 if (!(state & VSW_ATTR_INFO_SENT)) { 1716 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1717 " or ATTR_NACK when in state %d\n", 1718 ldcp->ldc_id, phase); 1719 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1720 return (1); 1721 } else 1722 state &= ~VSW_ATTR_INFO_SENT; 1723 break; 1724 1725 case VSW_DRING_INFO_RECV: 1726 if (phase < VSW_MILESTONE1) { 1727 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1728 " when in state %d\n", ldcp->ldc_id, phase); 1729 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1730 return (1); 1731 } 1732 break; 1733 1734 case VSW_DRING_ACK_RECV: 1735 case VSW_DRING_NACK_RECV: 1736 if (!(state & VSW_DRING_INFO_SENT)) { 1737 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1738 " or DRING_NACK when in state %d\n", 1739 ldcp->ldc_id, phase); 1740 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1741 return (1); 1742 } else 1743 state &= ~VSW_DRING_INFO_SENT; 1744 break; 1745 1746 case VSW_RDX_INFO_RECV: 1747 if (phase < VSW_MILESTONE3) { 1748 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1749 " when in state %d\n", ldcp->ldc_id, phase); 1750 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1751 return (1); 1752 } 1753 break; 1754 1755 case VSW_RDX_ACK_RECV: 1756 case VSW_RDX_NACK_RECV: 1757 if (!(state & VSW_RDX_INFO_SENT)) { 1758 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1759 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1760 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1761 return (1); 1762 } else 1763 state &= ~VSW_RDX_INFO_SENT; 1764 break; 1765 1766 case VSW_MCST_INFO_RECV: 1767 if (phase < VSW_MILESTONE3) { 1768 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1769 " when in state %d\n", ldcp->ldc_id, phase); 1770 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1771 return (1); 1772 } 1773 break; 1774 1775 default: 1776 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1777 ldcp->ldc_id, flag); 1778 return (1); 1779 } 1780 1781 if (dir == INBOUND) 1782 ldcp->lane_in.lstate = state; 1783 else 1784 ldcp->lane_out.lstate = state; 1785 1786 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1787 1788 return (0); 1789 } 1790 1791 void 1792 vsw_next_milestone(vsw_ldc_t *ldcp) 1793 { 1794 vsw_t *vswp = ldcp->ldc_vswp; 1795 vsw_port_t *portp = ldcp->ldc_port; 1796 1797 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1798 ldcp->ldc_id, ldcp->hphase); 1799 1800 DUMP_FLAGS(ldcp->lane_in.lstate); 1801 DUMP_FLAGS(ldcp->lane_out.lstate); 1802 1803 switch (ldcp->hphase) { 1804 1805 case VSW_MILESTONE0: 1806 /* 1807 * If we haven't started to handshake with our peer, 1808 * start to do so now. 1809 */ 1810 if (ldcp->lane_out.lstate == 0) { 1811 D2(vswp, "%s: (chan %lld) starting handshake " 1812 "with peer", __func__, ldcp->ldc_id); 1813 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1814 } 1815 1816 /* 1817 * Only way to pass this milestone is to have successfully 1818 * negotiated version info. 1819 */ 1820 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1821 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1822 1823 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1824 __func__, ldcp->ldc_id); 1825 1826 vsw_set_vnet_proto_ops(ldcp); 1827 1828 /* 1829 * Next milestone is passed when attribute 1830 * information has been successfully exchanged. 1831 */ 1832 ldcp->hphase = VSW_MILESTONE1; 1833 vsw_send_attr(ldcp); 1834 1835 } 1836 break; 1837 1838 case VSW_MILESTONE1: 1839 /* 1840 * Only way to pass this milestone is to have successfully 1841 * negotiated attribute information. 1842 */ 1843 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1844 1845 ldcp->hphase = VSW_MILESTONE2; 1846 1847 /* 1848 * If the peer device has said it wishes to 1849 * use descriptor rings then we send it our ring 1850 * info, otherwise we just set up a private ring 1851 * which we use an internal buffer 1852 */ 1853 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1854 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1855 (VSW_VER_LT(ldcp, 1, 2) && 1856 (ldcp->lane_in.xfer_mode == 1857 VIO_DRING_MODE_V1_0))) { 1858 vsw_send_dring_info(ldcp); 1859 } 1860 } 1861 break; 1862 1863 case VSW_MILESTONE2: 1864 /* 1865 * If peer has indicated in its attribute message that 1866 * it wishes to use descriptor rings then the only way 1867 * to pass this milestone is for us to have received 1868 * valid dring info. 1869 * 1870 * If peer is not using descriptor rings then just fall 1871 * through. 1872 */ 1873 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1874 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1875 (VSW_VER_LT(ldcp, 1, 2) && 1876 (ldcp->lane_in.xfer_mode == 1877 VIO_DRING_MODE_V1_0))) { 1878 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1879 break; 1880 } 1881 1882 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1883 __func__, ldcp->ldc_id); 1884 1885 ldcp->hphase = VSW_MILESTONE3; 1886 vsw_send_rdx(ldcp); 1887 break; 1888 1889 case VSW_MILESTONE3: 1890 /* 1891 * Pass this milestone when all paramaters have been 1892 * successfully exchanged and RDX sent in both directions. 1893 * 1894 * Mark outbound lane as available to transmit data. 1895 */ 1896 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1897 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1898 1899 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1900 __func__, ldcp->ldc_id); 1901 D2(vswp, "%s: ** handshake complete (0x%llx : " 1902 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1903 ldcp->lane_out.lstate); 1904 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1905 ldcp->hphase = VSW_MILESTONE4; 1906 ldcp->hcnt = 0; 1907 DISPLAY_STATE(); 1908 /* Start HIO if enabled and capable */ 1909 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1910 D2(vswp, "%s: start HybridIO setup", __func__); 1911 vsw_hio_start(vswp, ldcp); 1912 } 1913 } else { 1914 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1915 __func__, ldcp->lane_in.lstate, 1916 ldcp->lane_out.lstate); 1917 } 1918 break; 1919 1920 case VSW_MILESTONE4: 1921 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1922 ldcp->ldc_id); 1923 break; 1924 1925 default: 1926 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1927 ldcp->ldc_id, ldcp->hphase); 1928 } 1929 1930 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1931 ldcp->hphase); 1932 } 1933 1934 /* 1935 * Check if major version is supported. 1936 * 1937 * Returns 0 if finds supported major number, and if necessary 1938 * adjusts the minor field. 1939 * 1940 * Returns 1 if can't match major number exactly. Sets mjor/minor 1941 * to next lowest support values, or to zero if no other values possible. 1942 */ 1943 static int 1944 vsw_supported_version(vio_ver_msg_t *vp) 1945 { 1946 int i; 1947 1948 D1(NULL, "vsw_supported_version: enter"); 1949 1950 for (i = 0; i < VSW_NUM_VER; i++) { 1951 if (vsw_versions[i].ver_major == vp->ver_major) { 1952 /* 1953 * Matching or lower major version found. Update 1954 * minor number if necessary. 1955 */ 1956 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1957 D2(NULL, "%s: adjusting minor value from %d " 1958 "to %d", __func__, vp->ver_minor, 1959 vsw_versions[i].ver_minor); 1960 vp->ver_minor = vsw_versions[i].ver_minor; 1961 } 1962 1963 return (0); 1964 } 1965 1966 /* 1967 * If the message contains a higher major version number, set 1968 * the message's major/minor versions to the current values 1969 * and return false, so this message will get resent with 1970 * these values. 1971 */ 1972 if (vsw_versions[i].ver_major < vp->ver_major) { 1973 D2(NULL, "%s: adjusting major and minor " 1974 "values to %d, %d\n", 1975 __func__, vsw_versions[i].ver_major, 1976 vsw_versions[i].ver_minor); 1977 vp->ver_major = vsw_versions[i].ver_major; 1978 vp->ver_minor = vsw_versions[i].ver_minor; 1979 return (1); 1980 } 1981 } 1982 1983 /* No match was possible, zero out fields */ 1984 vp->ver_major = 0; 1985 vp->ver_minor = 0; 1986 1987 D1(NULL, "vsw_supported_version: exit"); 1988 1989 return (1); 1990 } 1991 1992 /* 1993 * Set vnet-protocol-version dependent functions based on version. 1994 */ 1995 static void 1996 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1997 { 1998 vsw_t *vswp = ldcp->ldc_vswp; 1999 lane_t *lp = &ldcp->lane_out; 2000 2001 if (VSW_VER_GTEQ(ldcp, 1, 3)) { 2002 /* 2003 * If the version negotiated with peer is >= 1.3, 2004 * set the mtu in our attributes to max_frame_size. 2005 */ 2006 lp->mtu = vswp->max_frame_size; 2007 } else { 2008 vsw_port_t *portp = ldcp->ldc_port; 2009 /* 2010 * Pre-1.3 peers expect max frame size of ETHERMAX. 2011 * We can negotiate that size with those peers provided the 2012 * following conditions are true: 2013 * - Our max_frame_size is greater only by VLAN_TAGSZ (4). 2014 * - Only pvid is defined for our peer and there are no vids. 2015 * If the above conditions are true, then we can send/recv only 2016 * untagged frames of max size ETHERMAX. Note that pvid of the 2017 * peer can be different, as vsw has to serve the vnet in that 2018 * vlan even if itself is not assigned to that vlan. 2019 */ 2020 if ((vswp->max_frame_size == ETHERMAX + VLAN_TAGSZ) && 2021 portp->nvids == 0) { 2022 lp->mtu = ETHERMAX; 2023 } 2024 } 2025 2026 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2027 /* Versions >= 1.2 */ 2028 2029 if (VSW_PRI_ETH_DEFINED(vswp)) { 2030 /* 2031 * enable priority routines and pkt mode only if 2032 * at least one pri-eth-type is specified in MD. 2033 */ 2034 ldcp->tx = vsw_ldctx_pri; 2035 ldcp->rx_pktdata = vsw_process_pkt_data; 2036 2037 /* set xfer mode for vsw_send_attr() */ 2038 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2039 } else { 2040 /* no priority eth types defined in MD */ 2041 2042 ldcp->tx = vsw_ldctx; 2043 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2044 2045 /* set xfer mode for vsw_send_attr() */ 2046 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2047 } 2048 2049 } else { 2050 /* Versions prior to 1.2 */ 2051 2052 vsw_reset_vnet_proto_ops(ldcp); 2053 } 2054 } 2055 2056 /* 2057 * Reset vnet-protocol-version dependent functions to v1.0. 2058 */ 2059 static void 2060 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2061 { 2062 lane_t *lp = &ldcp->lane_out; 2063 2064 ldcp->tx = vsw_ldctx; 2065 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2066 2067 /* set xfer mode for vsw_send_attr() */ 2068 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2069 } 2070 2071 /* 2072 * Main routine for processing messages received over LDC. 2073 */ 2074 static void 2075 vsw_process_pkt(void *arg) 2076 { 2077 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2078 vsw_t *vswp = ldcp->ldc_vswp; 2079 size_t msglen; 2080 vio_msg_tag_t *tagp; 2081 uint64_t *ldcmsg; 2082 int rv = 0; 2083 2084 2085 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2086 2087 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2088 2089 ldcmsg = ldcp->ldcmsg; 2090 /* 2091 * If channel is up read messages until channel is empty. 2092 */ 2093 do { 2094 msglen = ldcp->msglen; 2095 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2096 2097 if (rv != 0) { 2098 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2099 __func__, ldcp->ldc_id, rv, msglen); 2100 } 2101 2102 /* channel has been reset */ 2103 if (rv == ECONNRESET) { 2104 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2105 break; 2106 } 2107 2108 if (msglen == 0) { 2109 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2110 ldcp->ldc_id); 2111 break; 2112 } 2113 2114 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2115 ldcp->ldc_id, msglen); 2116 2117 /* 2118 * Figure out what sort of packet we have gotten by 2119 * examining the msg tag, and then switch it appropriately. 2120 */ 2121 tagp = (vio_msg_tag_t *)ldcmsg; 2122 2123 switch (tagp->vio_msgtype) { 2124 case VIO_TYPE_CTRL: 2125 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2126 break; 2127 case VIO_TYPE_DATA: 2128 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2129 break; 2130 case VIO_TYPE_ERR: 2131 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2132 break; 2133 default: 2134 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2135 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2136 break; 2137 } 2138 } while (msglen); 2139 2140 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2141 } 2142 2143 /* 2144 * Dispatch a task to process a VIO control message. 2145 */ 2146 static void 2147 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2148 { 2149 vsw_ctrl_task_t *ctaskp = NULL; 2150 vsw_port_t *port = ldcp->ldc_port; 2151 vsw_t *vswp = port->p_vswp; 2152 2153 D1(vswp, "%s: enter", __func__); 2154 2155 /* 2156 * We need to handle RDX ACK messages in-band as once they 2157 * are exchanged it is possible that we will get an 2158 * immediate (legitimate) data packet. 2159 */ 2160 if ((tagp->vio_subtype_env == VIO_RDX) && 2161 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2162 2163 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2164 return; 2165 2166 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2167 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2168 "(ostate 0x%llx : hphase %d)", __func__, 2169 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2170 vsw_next_milestone(ldcp); 2171 return; 2172 } 2173 2174 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2175 2176 if (ctaskp == NULL) { 2177 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2178 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2179 return; 2180 } 2181 2182 ctaskp->ldcp = ldcp; 2183 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2184 ctaskp->hss_id = ldcp->hss_id; 2185 2186 /* 2187 * Dispatch task to processing taskq if port is not in 2188 * the process of being detached. 2189 */ 2190 mutex_enter(&port->state_lock); 2191 if (port->state == VSW_PORT_INIT) { 2192 if ((vswp->taskq_p == NULL) || 2193 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2194 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2195 mutex_exit(&port->state_lock); 2196 DERR(vswp, "%s: unable to dispatch task to taskq", 2197 __func__); 2198 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2199 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2200 return; 2201 } 2202 } else { 2203 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2204 DWARN(vswp, "%s: port %d detaching, not dispatching " 2205 "task", __func__, port->p_instance); 2206 } 2207 2208 mutex_exit(&port->state_lock); 2209 2210 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2211 ldcp->ldc_id); 2212 D1(vswp, "%s: exit", __func__); 2213 } 2214 2215 /* 2216 * Process a VIO ctrl message. Invoked from taskq. 2217 */ 2218 static void 2219 vsw_process_ctrl_pkt(void *arg) 2220 { 2221 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2222 vsw_ldc_t *ldcp = ctaskp->ldcp; 2223 vsw_t *vswp = ldcp->ldc_vswp; 2224 vio_msg_tag_t tag; 2225 uint16_t env; 2226 2227 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2228 2229 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2230 env = tag.vio_subtype_env; 2231 2232 /* stale pkt check */ 2233 if (ctaskp->hss_id < ldcp->hss_id) { 2234 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2235 " (%ld) handshake session", __func__, ctaskp->hss_id); 2236 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2237 return; 2238 } 2239 2240 /* session id check */ 2241 if (ldcp->session_status & VSW_PEER_SESSION) { 2242 if (ldcp->peer_session != tag.vio_sid) { 2243 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2244 __func__, ldcp->ldc_id, tag.vio_sid); 2245 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2246 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2247 return; 2248 } 2249 } 2250 2251 /* 2252 * Switch on vio_subtype envelope, then let lower routines 2253 * decide if its an INFO, ACK or NACK packet. 2254 */ 2255 switch (env) { 2256 case VIO_VER_INFO: 2257 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2258 break; 2259 case VIO_DRING_REG: 2260 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2261 break; 2262 case VIO_DRING_UNREG: 2263 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2264 break; 2265 case VIO_ATTR_INFO: 2266 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2267 break; 2268 case VNET_MCAST_INFO: 2269 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2270 break; 2271 case VIO_RDX: 2272 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2273 break; 2274 case VIO_DDS_INFO: 2275 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2276 break; 2277 default: 2278 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2279 } 2280 2281 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2282 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2283 } 2284 2285 /* 2286 * Version negotiation. We can end up here either because our peer 2287 * has responded to a handshake message we have sent it, or our peer 2288 * has initiated a handshake with us. If its the former then can only 2289 * be ACK or NACK, if its the later can only be INFO. 2290 * 2291 * If its an ACK we move to the next stage of the handshake, namely 2292 * attribute exchange. If its a NACK we see if we can specify another 2293 * version, if we can't we stop. 2294 * 2295 * If it is an INFO we reset all params associated with communication 2296 * in that direction over this channel (remember connection is 2297 * essentially 2 independent simplex channels). 2298 */ 2299 void 2300 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2301 { 2302 vio_ver_msg_t *ver_pkt; 2303 vsw_t *vswp = ldcp->ldc_vswp; 2304 2305 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2306 2307 /* 2308 * We know this is a ctrl/version packet so 2309 * cast it into the correct structure. 2310 */ 2311 ver_pkt = (vio_ver_msg_t *)pkt; 2312 2313 switch (ver_pkt->tag.vio_subtype) { 2314 case VIO_SUBTYPE_INFO: 2315 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2316 2317 /* 2318 * Record the session id, which we will use from now 2319 * until we see another VER_INFO msg. Even then the 2320 * session id in most cases will be unchanged, execpt 2321 * if channel was reset. 2322 */ 2323 if ((ldcp->session_status & VSW_PEER_SESSION) && 2324 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2325 DERR(vswp, "%s: updating session id for chan %lld " 2326 "from %llx to %llx", __func__, ldcp->ldc_id, 2327 ldcp->peer_session, ver_pkt->tag.vio_sid); 2328 } 2329 2330 ldcp->peer_session = ver_pkt->tag.vio_sid; 2331 ldcp->session_status |= VSW_PEER_SESSION; 2332 2333 /* Legal message at this time ? */ 2334 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2335 return; 2336 2337 /* 2338 * First check the device class. Currently only expect 2339 * to be talking to a network device. In the future may 2340 * also talk to another switch. 2341 */ 2342 if (ver_pkt->dev_class != VDEV_NETWORK) { 2343 DERR(vswp, "%s: illegal device class %d", __func__, 2344 ver_pkt->dev_class); 2345 2346 ver_pkt->tag.vio_sid = ldcp->local_session; 2347 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2348 2349 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2350 2351 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2352 sizeof (vio_ver_msg_t), B_TRUE); 2353 2354 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2355 vsw_next_milestone(ldcp); 2356 return; 2357 } else { 2358 ldcp->dev_class = ver_pkt->dev_class; 2359 } 2360 2361 /* 2362 * Now check the version. 2363 */ 2364 if (vsw_supported_version(ver_pkt) == 0) { 2365 /* 2366 * Support this major version and possibly 2367 * adjusted minor version. 2368 */ 2369 2370 D2(vswp, "%s: accepted ver %d:%d", __func__, 2371 ver_pkt->ver_major, ver_pkt->ver_minor); 2372 2373 /* Store accepted values */ 2374 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2375 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2376 2377 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2378 2379 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2380 2381 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2382 /* 2383 * Send a version info message 2384 * using the accepted version that 2385 * we are about to ack. Also note that 2386 * we send our ver info before we ack. 2387 * Otherwise, as soon as receiving the 2388 * ack, obp sends attr info msg, which 2389 * breaks vsw_check_flag() invoked 2390 * from vsw_process_ctrl_attr_pkt(); 2391 * as we also need VSW_VER_ACK_RECV to 2392 * be set in lane_out.lstate, before 2393 * we can receive attr info. 2394 */ 2395 vsw_send_ver(ldcp); 2396 } 2397 } else { 2398 /* 2399 * NACK back with the next lower major/minor 2400 * pairing we support (if don't suuport any more 2401 * versions then they will be set to zero. 2402 */ 2403 2404 D2(vswp, "%s: replying with ver %d:%d", __func__, 2405 ver_pkt->ver_major, ver_pkt->ver_minor); 2406 2407 /* Store updated values */ 2408 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2409 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2410 2411 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2412 2413 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2414 } 2415 2416 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2417 ver_pkt->tag.vio_sid = ldcp->local_session; 2418 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2419 sizeof (vio_ver_msg_t), B_TRUE); 2420 2421 vsw_next_milestone(ldcp); 2422 break; 2423 2424 case VIO_SUBTYPE_ACK: 2425 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2426 2427 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2428 return; 2429 2430 /* Store updated values */ 2431 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2432 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2433 2434 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2435 vsw_next_milestone(ldcp); 2436 2437 break; 2438 2439 case VIO_SUBTYPE_NACK: 2440 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2441 2442 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2443 return; 2444 2445 /* 2446 * If our peer sent us a NACK with the ver fields set to 2447 * zero then there is nothing more we can do. Otherwise see 2448 * if we support either the version suggested, or a lesser 2449 * one. 2450 */ 2451 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2452 DERR(vswp, "%s: peer unable to negotiate any " 2453 "further.", __func__); 2454 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2455 vsw_next_milestone(ldcp); 2456 return; 2457 } 2458 2459 /* 2460 * Check to see if we support this major version or 2461 * a lower one. If we don't then maj/min will be set 2462 * to zero. 2463 */ 2464 (void) vsw_supported_version(ver_pkt); 2465 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2466 /* Nothing more we can do */ 2467 DERR(vswp, "%s: version negotiation failed.\n", 2468 __func__); 2469 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2470 vsw_next_milestone(ldcp); 2471 } else { 2472 /* found a supported major version */ 2473 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2474 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2475 2476 D2(vswp, "%s: resending with updated values (%x, %x)", 2477 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2478 2479 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2480 ver_pkt->tag.vio_sid = ldcp->local_session; 2481 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2482 2483 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2484 2485 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2486 sizeof (vio_ver_msg_t), B_TRUE); 2487 2488 vsw_next_milestone(ldcp); 2489 2490 } 2491 break; 2492 2493 default: 2494 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2495 ver_pkt->tag.vio_subtype); 2496 } 2497 2498 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2499 } 2500 2501 /* 2502 * Process an attribute packet. We can end up here either because our peer 2503 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2504 * peer has sent us an attribute INFO message 2505 * 2506 * If its an ACK we then move to the next stage of the handshake which 2507 * is to send our descriptor ring info to our peer. If its a NACK then 2508 * there is nothing more we can (currently) do. 2509 * 2510 * If we get a valid/acceptable INFO packet (and we have already negotiated 2511 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2512 * NACK back and reset channel state to INACTIV. 2513 * 2514 * FUTURE: in time we will probably negotiate over attributes, but for 2515 * the moment unacceptable attributes are regarded as a fatal error. 2516 * 2517 */ 2518 void 2519 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2520 { 2521 vnet_attr_msg_t *attr_pkt; 2522 vsw_t *vswp = ldcp->ldc_vswp; 2523 vsw_port_t *port = ldcp->ldc_port; 2524 uint64_t macaddr = 0; 2525 int i; 2526 2527 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2528 2529 /* 2530 * We know this is a ctrl/attr packet so 2531 * cast it into the correct structure. 2532 */ 2533 attr_pkt = (vnet_attr_msg_t *)pkt; 2534 2535 switch (attr_pkt->tag.vio_subtype) { 2536 case VIO_SUBTYPE_INFO: 2537 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2538 2539 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2540 return; 2541 2542 /* 2543 * If the attributes are unacceptable then we NACK back. 2544 */ 2545 if (vsw_check_attr(attr_pkt, ldcp)) { 2546 2547 DERR(vswp, "%s (chan %d): invalid attributes", 2548 __func__, ldcp->ldc_id); 2549 2550 vsw_free_lane_resources(ldcp, INBOUND); 2551 2552 attr_pkt->tag.vio_sid = ldcp->local_session; 2553 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2554 2555 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2556 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2557 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2558 sizeof (vnet_attr_msg_t), B_TRUE); 2559 2560 vsw_next_milestone(ldcp); 2561 return; 2562 } 2563 2564 /* 2565 * Otherwise store attributes for this lane and update 2566 * lane state. 2567 */ 2568 ldcp->lane_in.mtu = attr_pkt->mtu; 2569 ldcp->lane_in.addr = attr_pkt->addr; 2570 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2571 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2572 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2573 2574 macaddr = ldcp->lane_in.addr; 2575 for (i = ETHERADDRL - 1; i >= 0; i--) { 2576 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2577 macaddr >>= 8; 2578 } 2579 2580 /* create the fdb entry for this port/mac address */ 2581 vsw_fdbe_add(vswp, port); 2582 2583 /* add the port to the specified vlans */ 2584 vsw_vlan_add_ids(port, VSW_VNETPORT); 2585 2586 /* setup device specifc xmit routines */ 2587 mutex_enter(&port->tx_lock); 2588 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2589 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2590 (VSW_VER_LT(ldcp, 1, 2) && 2591 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2592 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2593 port->transmit = vsw_dringsend; 2594 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2595 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2596 vsw_create_privring(ldcp); 2597 port->transmit = vsw_descrsend; 2598 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2599 } 2600 2601 /* 2602 * HybridIO is supported only vnet, not by OBP. 2603 * So, set hio_capable to true only when in DRING mode. 2604 */ 2605 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2606 (ldcp->lane_in.xfer_mode != VIO_DESC_MODE)) { 2607 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2608 } else { 2609 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2610 } 2611 2612 mutex_exit(&port->tx_lock); 2613 2614 attr_pkt->tag.vio_sid = ldcp->local_session; 2615 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2616 2617 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2618 2619 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2620 2621 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2622 sizeof (vnet_attr_msg_t), B_TRUE); 2623 2624 vsw_next_milestone(ldcp); 2625 break; 2626 2627 case VIO_SUBTYPE_ACK: 2628 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2629 2630 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2631 return; 2632 2633 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2634 vsw_next_milestone(ldcp); 2635 break; 2636 2637 case VIO_SUBTYPE_NACK: 2638 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2639 2640 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2641 return; 2642 2643 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2644 vsw_next_milestone(ldcp); 2645 break; 2646 2647 default: 2648 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2649 attr_pkt->tag.vio_subtype); 2650 } 2651 2652 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2653 } 2654 2655 /* 2656 * Process a dring info packet. We can end up here either because our peer 2657 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2658 * peer has sent us a dring INFO message. 2659 * 2660 * If we get a valid/acceptable INFO packet (and we have already negotiated 2661 * a version) we ACK back and update the lane state, otherwise we NACK back. 2662 * 2663 * FUTURE: nothing to stop client from sending us info on multiple dring's 2664 * but for the moment we will just use the first one we are given. 2665 * 2666 */ 2667 void 2668 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2669 { 2670 vio_dring_reg_msg_t *dring_pkt; 2671 vsw_t *vswp = ldcp->ldc_vswp; 2672 ldc_mem_info_t minfo; 2673 dring_info_t *dp, *dbp; 2674 int dring_found = 0; 2675 2676 /* 2677 * We know this is a ctrl/dring packet so 2678 * cast it into the correct structure. 2679 */ 2680 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2681 2682 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2683 2684 switch (dring_pkt->tag.vio_subtype) { 2685 case VIO_SUBTYPE_INFO: 2686 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2687 2688 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2689 return; 2690 2691 /* 2692 * If the dring params are unacceptable then we NACK back. 2693 */ 2694 if (vsw_check_dring_info(dring_pkt)) { 2695 2696 DERR(vswp, "%s (%lld): invalid dring info", 2697 __func__, ldcp->ldc_id); 2698 2699 vsw_free_lane_resources(ldcp, INBOUND); 2700 2701 dring_pkt->tag.vio_sid = ldcp->local_session; 2702 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2703 2704 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2705 2706 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2707 2708 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2709 sizeof (vio_dring_reg_msg_t), B_TRUE); 2710 2711 vsw_next_milestone(ldcp); 2712 return; 2713 } 2714 2715 /* 2716 * Otherwise, attempt to map in the dring using the 2717 * cookie. If that succeeds we send back a unique dring 2718 * identifier that the sending side will use in future 2719 * to refer to this descriptor ring. 2720 */ 2721 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2722 2723 dp->num_descriptors = dring_pkt->num_descriptors; 2724 dp->descriptor_size = dring_pkt->descriptor_size; 2725 dp->options = dring_pkt->options; 2726 dp->ncookies = dring_pkt->ncookies; 2727 2728 /* 2729 * Note: should only get one cookie. Enforced in 2730 * the ldc layer. 2731 */ 2732 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2733 sizeof (ldc_mem_cookie_t)); 2734 2735 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2736 dp->num_descriptors, dp->descriptor_size); 2737 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2738 dp->options, dp->ncookies); 2739 2740 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2741 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2742 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 2743 2744 DERR(vswp, "%s: dring_map failed\n", __func__); 2745 2746 kmem_free(dp, sizeof (dring_info_t)); 2747 vsw_free_lane_resources(ldcp, INBOUND); 2748 2749 dring_pkt->tag.vio_sid = ldcp->local_session; 2750 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2751 2752 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2753 2754 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2755 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2756 sizeof (vio_dring_reg_msg_t), B_TRUE); 2757 2758 vsw_next_milestone(ldcp); 2759 return; 2760 } 2761 2762 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2763 2764 DERR(vswp, "%s: dring_addr failed\n", __func__); 2765 2766 kmem_free(dp, sizeof (dring_info_t)); 2767 vsw_free_lane_resources(ldcp, INBOUND); 2768 2769 dring_pkt->tag.vio_sid = ldcp->local_session; 2770 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2771 2772 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2773 2774 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2775 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2776 sizeof (vio_dring_reg_msg_t), B_TRUE); 2777 2778 vsw_next_milestone(ldcp); 2779 return; 2780 } else { 2781 /* store the address of the pub part of ring */ 2782 dp->pub_addr = minfo.vaddr; 2783 2784 /* cache the dring mtype */ 2785 dp->dring_mtype = minfo.mtype; 2786 } 2787 2788 /* no private section as we are importing */ 2789 dp->priv_addr = NULL; 2790 2791 /* 2792 * Using simple mono increasing int for ident at 2793 * the moment. 2794 */ 2795 dp->ident = ldcp->next_ident; 2796 ldcp->next_ident++; 2797 2798 dp->end_idx = 0; 2799 dp->next = NULL; 2800 2801 /* 2802 * Link it onto the end of the list of drings 2803 * for this lane. 2804 */ 2805 if (ldcp->lane_in.dringp == NULL) { 2806 D2(vswp, "%s: adding first INBOUND dring", __func__); 2807 ldcp->lane_in.dringp = dp; 2808 } else { 2809 dbp = ldcp->lane_in.dringp; 2810 2811 while (dbp->next != NULL) 2812 dbp = dbp->next; 2813 2814 dbp->next = dp; 2815 } 2816 2817 /* acknowledge it */ 2818 dring_pkt->tag.vio_sid = ldcp->local_session; 2819 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2820 dring_pkt->dring_ident = dp->ident; 2821 2822 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2823 sizeof (vio_dring_reg_msg_t), B_TRUE); 2824 2825 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2826 vsw_next_milestone(ldcp); 2827 break; 2828 2829 case VIO_SUBTYPE_ACK: 2830 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2831 2832 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2833 return; 2834 2835 /* 2836 * Peer is acknowledging our dring info and will have 2837 * sent us a dring identifier which we will use to 2838 * refer to this ring w.r.t. our peer. 2839 */ 2840 dp = ldcp->lane_out.dringp; 2841 if (dp != NULL) { 2842 /* 2843 * Find the ring this ident should be associated 2844 * with. 2845 */ 2846 if (vsw_dring_match(dp, dring_pkt)) { 2847 dring_found = 1; 2848 2849 } else while (dp != NULL) { 2850 if (vsw_dring_match(dp, dring_pkt)) { 2851 dring_found = 1; 2852 break; 2853 } 2854 dp = dp->next; 2855 } 2856 2857 if (dring_found == 0) { 2858 DERR(NULL, "%s: unrecognised ring cookie", 2859 __func__); 2860 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2861 return; 2862 } 2863 2864 } else { 2865 DERR(vswp, "%s: DRING ACK received but no drings " 2866 "allocated", __func__); 2867 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2868 return; 2869 } 2870 2871 /* store ident */ 2872 dp->ident = dring_pkt->dring_ident; 2873 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2874 vsw_next_milestone(ldcp); 2875 break; 2876 2877 case VIO_SUBTYPE_NACK: 2878 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2879 2880 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2881 return; 2882 2883 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2884 vsw_next_milestone(ldcp); 2885 break; 2886 2887 default: 2888 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2889 dring_pkt->tag.vio_subtype); 2890 } 2891 2892 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2893 } 2894 2895 /* 2896 * Process a request from peer to unregister a dring. 2897 * 2898 * For the moment we just restart the handshake if our 2899 * peer endpoint attempts to unregister a dring. 2900 */ 2901 void 2902 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2903 { 2904 vsw_t *vswp = ldcp->ldc_vswp; 2905 vio_dring_unreg_msg_t *dring_pkt; 2906 2907 /* 2908 * We know this is a ctrl/dring packet so 2909 * cast it into the correct structure. 2910 */ 2911 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2912 2913 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2914 2915 switch (dring_pkt->tag.vio_subtype) { 2916 case VIO_SUBTYPE_INFO: 2917 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2918 2919 DWARN(vswp, "%s: restarting handshake..", __func__); 2920 break; 2921 2922 case VIO_SUBTYPE_ACK: 2923 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2924 2925 DWARN(vswp, "%s: restarting handshake..", __func__); 2926 break; 2927 2928 case VIO_SUBTYPE_NACK: 2929 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2930 2931 DWARN(vswp, "%s: restarting handshake..", __func__); 2932 break; 2933 2934 default: 2935 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2936 dring_pkt->tag.vio_subtype); 2937 } 2938 2939 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2940 2941 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2942 } 2943 2944 #define SND_MCST_NACK(ldcp, pkt) \ 2945 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2946 pkt->tag.vio_sid = ldcp->local_session; \ 2947 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2948 sizeof (vnet_mcast_msg_t), B_TRUE); 2949 2950 /* 2951 * Process a multicast request from a vnet. 2952 * 2953 * Vnet's specify a multicast address that they are interested in. This 2954 * address is used as a key into the hash table which forms the multicast 2955 * forwarding database (mFDB). 2956 * 2957 * The table keys are the multicast addresses, while the table entries 2958 * are pointers to lists of ports which wish to receive packets for the 2959 * specified multicast address. 2960 * 2961 * When a multicast packet is being switched we use the address as a key 2962 * into the hash table, and then walk the appropriate port list forwarding 2963 * the pkt to each port in turn. 2964 * 2965 * If a vnet is no longer interested in a particular multicast grouping 2966 * we simply find the correct location in the hash table and then delete 2967 * the relevant port from the port list. 2968 * 2969 * To deal with the case whereby a port is being deleted without first 2970 * removing itself from the lists in the hash table, we maintain a list 2971 * of multicast addresses the port has registered an interest in, within 2972 * the port structure itself. We then simply walk that list of addresses 2973 * using them as keys into the hash table and remove the port from the 2974 * appropriate lists. 2975 */ 2976 static void 2977 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2978 { 2979 vnet_mcast_msg_t *mcst_pkt; 2980 vsw_port_t *port = ldcp->ldc_port; 2981 vsw_t *vswp = ldcp->ldc_vswp; 2982 int i; 2983 2984 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2985 2986 /* 2987 * We know this is a ctrl/mcast packet so 2988 * cast it into the correct structure. 2989 */ 2990 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2991 2992 switch (mcst_pkt->tag.vio_subtype) { 2993 case VIO_SUBTYPE_INFO: 2994 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2995 2996 /* 2997 * Check if in correct state to receive a multicast 2998 * message (i.e. handshake complete). If not reset 2999 * the handshake. 3000 */ 3001 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3002 return; 3003 3004 /* 3005 * Before attempting to add or remove address check 3006 * that they are valid multicast addresses. 3007 * If not, then NACK back. 3008 */ 3009 for (i = 0; i < mcst_pkt->count; i++) { 3010 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3011 DERR(vswp, "%s: invalid multicast address", 3012 __func__); 3013 SND_MCST_NACK(ldcp, mcst_pkt); 3014 return; 3015 } 3016 } 3017 3018 /* 3019 * Now add/remove the addresses. If this fails we 3020 * NACK back. 3021 */ 3022 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3023 SND_MCST_NACK(ldcp, mcst_pkt); 3024 return; 3025 } 3026 3027 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3028 mcst_pkt->tag.vio_sid = ldcp->local_session; 3029 3030 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3031 3032 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3033 sizeof (vnet_mcast_msg_t), B_TRUE); 3034 break; 3035 3036 case VIO_SUBTYPE_ACK: 3037 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3038 3039 /* 3040 * We shouldn't ever get a multicast ACK message as 3041 * at the moment we never request multicast addresses 3042 * to be set on some other device. This may change in 3043 * the future if we have cascading switches. 3044 */ 3045 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3046 return; 3047 3048 /* Do nothing */ 3049 break; 3050 3051 case VIO_SUBTYPE_NACK: 3052 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3053 3054 /* 3055 * We shouldn't get a multicast NACK packet for the 3056 * same reasons as we shouldn't get a ACK packet. 3057 */ 3058 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3059 return; 3060 3061 /* Do nothing */ 3062 break; 3063 3064 default: 3065 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3066 mcst_pkt->tag.vio_subtype); 3067 } 3068 3069 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3070 } 3071 3072 static void 3073 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3074 { 3075 vio_rdx_msg_t *rdx_pkt; 3076 vsw_t *vswp = ldcp->ldc_vswp; 3077 3078 /* 3079 * We know this is a ctrl/rdx packet so 3080 * cast it into the correct structure. 3081 */ 3082 rdx_pkt = (vio_rdx_msg_t *)pkt; 3083 3084 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3085 3086 switch (rdx_pkt->tag.vio_subtype) { 3087 case VIO_SUBTYPE_INFO: 3088 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3089 3090 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3091 return; 3092 3093 rdx_pkt->tag.vio_sid = ldcp->local_session; 3094 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3095 3096 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3097 3098 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3099 3100 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3101 sizeof (vio_rdx_msg_t), B_TRUE); 3102 3103 vsw_next_milestone(ldcp); 3104 break; 3105 3106 case VIO_SUBTYPE_ACK: 3107 /* 3108 * Should be handled in-band by callback handler. 3109 */ 3110 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3111 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3112 break; 3113 3114 case VIO_SUBTYPE_NACK: 3115 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3116 3117 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3118 return; 3119 3120 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3121 vsw_next_milestone(ldcp); 3122 break; 3123 3124 default: 3125 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3126 rdx_pkt->tag.vio_subtype); 3127 } 3128 3129 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3130 } 3131 3132 static void 3133 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3134 uint32_t msglen) 3135 { 3136 uint16_t env = tagp->vio_subtype_env; 3137 vsw_t *vswp = ldcp->ldc_vswp; 3138 3139 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3140 3141 /* session id check */ 3142 if (ldcp->session_status & VSW_PEER_SESSION) { 3143 if (ldcp->peer_session != tagp->vio_sid) { 3144 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3145 __func__, ldcp->ldc_id, tagp->vio_sid); 3146 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3147 return; 3148 } 3149 } 3150 3151 /* 3152 * It is an error for us to be getting data packets 3153 * before the handshake has completed. 3154 */ 3155 if (ldcp->hphase != VSW_MILESTONE4) { 3156 DERR(vswp, "%s: got data packet before handshake complete " 3157 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3158 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3159 DUMP_FLAGS(ldcp->lane_in.lstate); 3160 DUMP_FLAGS(ldcp->lane_out.lstate); 3161 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3162 return; 3163 } 3164 3165 /* 3166 * To reduce the locking contention, release the 3167 * ldc_cblock here and re-acquire it once we are done 3168 * receiving packets. 3169 */ 3170 mutex_exit(&ldcp->ldc_cblock); 3171 mutex_enter(&ldcp->ldc_rxlock); 3172 3173 /* 3174 * Switch on vio_subtype envelope, then let lower routines 3175 * decide if its an INFO, ACK or NACK packet. 3176 */ 3177 if (env == VIO_DRING_DATA) { 3178 vsw_process_data_dring_pkt(ldcp, dpkt); 3179 } else if (env == VIO_PKT_DATA) { 3180 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3181 } else if (env == VIO_DESC_DATA) { 3182 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3183 } else { 3184 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3185 } 3186 3187 mutex_exit(&ldcp->ldc_rxlock); 3188 mutex_enter(&ldcp->ldc_cblock); 3189 3190 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3191 } 3192 3193 #define SND_DRING_NACK(ldcp, pkt) \ 3194 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3195 pkt->tag.vio_sid = ldcp->local_session; \ 3196 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3197 sizeof (vio_dring_msg_t), B_TRUE); 3198 3199 static void 3200 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3201 { 3202 vio_dring_msg_t *dring_pkt; 3203 vnet_public_desc_t desc, *pub_addr = NULL; 3204 vsw_private_desc_t *priv_addr = NULL; 3205 dring_info_t *dp = NULL; 3206 vsw_t *vswp = ldcp->ldc_vswp; 3207 mblk_t *mp = NULL; 3208 mblk_t *bp = NULL; 3209 mblk_t *bpt = NULL; 3210 size_t nbytes = 0; 3211 uint64_t chain = 0; 3212 uint64_t len; 3213 uint32_t pos, start; 3214 uint32_t range_start, range_end; 3215 int32_t end, num, cnt = 0; 3216 int i, rv, rng_rv = 0, msg_rv = 0; 3217 boolean_t prev_desc_ack = B_FALSE; 3218 int read_attempts = 0; 3219 struct ether_header *ehp; 3220 3221 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3222 3223 /* 3224 * We know this is a data/dring packet so 3225 * cast it into the correct structure. 3226 */ 3227 dring_pkt = (vio_dring_msg_t *)dpkt; 3228 3229 /* 3230 * Switch on the vio_subtype. If its INFO then we need to 3231 * process the data. If its an ACK we need to make sure 3232 * it makes sense (i.e did we send an earlier data/info), 3233 * and if its a NACK then we maybe attempt a retry. 3234 */ 3235 switch (dring_pkt->tag.vio_subtype) { 3236 case VIO_SUBTYPE_INFO: 3237 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3238 3239 READ_ENTER(&ldcp->lane_in.dlistrw); 3240 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3241 dring_pkt->dring_ident)) == NULL) { 3242 RW_EXIT(&ldcp->lane_in.dlistrw); 3243 3244 DERR(vswp, "%s(%lld): unable to find dring from " 3245 "ident 0x%llx", __func__, ldcp->ldc_id, 3246 dring_pkt->dring_ident); 3247 3248 SND_DRING_NACK(ldcp, dring_pkt); 3249 return; 3250 } 3251 3252 start = pos = dring_pkt->start_idx; 3253 end = dring_pkt->end_idx; 3254 len = dp->num_descriptors; 3255 3256 range_start = range_end = pos; 3257 3258 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3259 __func__, ldcp->ldc_id, start, end); 3260 3261 if (end == -1) { 3262 num = -1; 3263 } else if (end >= 0) { 3264 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3265 3266 /* basic sanity check */ 3267 if (end > len) { 3268 RW_EXIT(&ldcp->lane_in.dlistrw); 3269 DERR(vswp, "%s(%lld): endpoint %lld outside " 3270 "ring length %lld", __func__, 3271 ldcp->ldc_id, end, len); 3272 3273 SND_DRING_NACK(ldcp, dring_pkt); 3274 return; 3275 } 3276 } else { 3277 RW_EXIT(&ldcp->lane_in.dlistrw); 3278 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3279 __func__, ldcp->ldc_id, end); 3280 SND_DRING_NACK(ldcp, dring_pkt); 3281 return; 3282 } 3283 3284 while (cnt != num) { 3285 vsw_recheck_desc: 3286 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3287 3288 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3289 &desc, dp->dring_mtype, dp->handle, 3290 pos, pos)) != 0) { 3291 DERR(vswp, "%s(%lld): unable to copy " 3292 "descriptor at pos %d: err %d", 3293 __func__, pos, ldcp->ldc_id, rng_rv); 3294 ldcp->ldc_stats.ierrors++; 3295 break; 3296 } 3297 3298 /* 3299 * When given a bounded range of descriptors 3300 * to process, its an error to hit a descriptor 3301 * which is not ready. In the non-bounded case 3302 * (end_idx == -1) this simply indicates we have 3303 * reached the end of the current active range. 3304 */ 3305 if (desc.hdr.dstate != VIO_DESC_READY) { 3306 /* unbound - no error */ 3307 if (end == -1) { 3308 if (read_attempts == vsw_read_attempts) 3309 break; 3310 3311 delay(drv_usectohz(vsw_desc_delay)); 3312 read_attempts++; 3313 goto vsw_recheck_desc; 3314 } 3315 3316 /* bounded - error - so NACK back */ 3317 RW_EXIT(&ldcp->lane_in.dlistrw); 3318 DERR(vswp, "%s(%lld): descriptor not READY " 3319 "(%d)", __func__, ldcp->ldc_id, 3320 desc.hdr.dstate); 3321 SND_DRING_NACK(ldcp, dring_pkt); 3322 return; 3323 } 3324 3325 DTRACE_PROBE1(read_attempts, int, read_attempts); 3326 3327 range_end = pos; 3328 3329 /* 3330 * If we ACK'd the previous descriptor then now 3331 * record the new range start position for later 3332 * ACK's. 3333 */ 3334 if (prev_desc_ack) { 3335 range_start = pos; 3336 3337 D2(vswp, "%s(%lld): updating range start to be " 3338 "%d", __func__, ldcp->ldc_id, range_start); 3339 3340 prev_desc_ack = B_FALSE; 3341 } 3342 3343 D2(vswp, "%s(%lld): processing desc %lld at pos" 3344 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3345 __func__, ldcp->ldc_id, pos, &desc, 3346 desc.hdr.dstate, desc.nbytes); 3347 3348 /* 3349 * Ensure that we ask ldc for an aligned 3350 * number of bytes. Data is padded to align on 8 3351 * byte boundary, desc.nbytes is actual data length, 3352 * i.e. minus that padding. 3353 */ 3354 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3355 3356 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3357 if (mp == NULL) { 3358 ldcp->ldc_stats.rx_vio_allocb_fail++; 3359 /* 3360 * No free receive buffers available, so 3361 * fallback onto allocb(9F). Make sure that 3362 * we get a data buffer which is a multiple 3363 * of 8 as this is required by ldc_mem_copy. 3364 */ 3365 DTRACE_PROBE(allocb); 3366 if ((mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3367 BPRI_MED)) == NULL) { 3368 DERR(vswp, "%s(%ld): allocb failed", 3369 __func__, ldcp->ldc_id); 3370 rng_rv = vnet_dring_entry_set_dstate( 3371 pub_addr, dp->dring_mtype, 3372 dp->handle, pos, pos, 3373 VIO_DESC_DONE); 3374 ldcp->ldc_stats.ierrors++; 3375 ldcp->ldc_stats.rx_allocb_fail++; 3376 break; 3377 } 3378 } 3379 3380 rv = ldc_mem_copy(ldcp->ldc_handle, 3381 (caddr_t)mp->b_rptr, 0, &nbytes, 3382 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3383 if (rv != 0) { 3384 DERR(vswp, "%s(%d): unable to copy in data " 3385 "from %d cookies in desc %d (rv %d)", 3386 __func__, ldcp->ldc_id, desc.ncookies, 3387 pos, rv); 3388 freemsg(mp); 3389 3390 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3391 dp->dring_mtype, dp->handle, pos, pos, 3392 VIO_DESC_DONE); 3393 ldcp->ldc_stats.ierrors++; 3394 break; 3395 } else { 3396 D2(vswp, "%s(%d): copied in %ld bytes" 3397 " using %d cookies", __func__, 3398 ldcp->ldc_id, nbytes, desc.ncookies); 3399 } 3400 3401 /* adjust the read pointer to skip over the padding */ 3402 mp->b_rptr += VNET_IPALIGN; 3403 3404 /* point to the actual end of data */ 3405 mp->b_wptr = mp->b_rptr + desc.nbytes; 3406 3407 /* update statistics */ 3408 ehp = (struct ether_header *)mp->b_rptr; 3409 if (IS_BROADCAST(ehp)) 3410 ldcp->ldc_stats.brdcstrcv++; 3411 else if (IS_MULTICAST(ehp)) 3412 ldcp->ldc_stats.multircv++; 3413 3414 ldcp->ldc_stats.ipackets++; 3415 ldcp->ldc_stats.rbytes += desc.nbytes; 3416 3417 /* 3418 * IPALIGN space can be used for VLAN_TAG 3419 */ 3420 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3421 VSW_VNETPORT, mp); 3422 3423 /* build a chain of received packets */ 3424 if (bp == NULL) { 3425 /* first pkt */ 3426 bp = mp; 3427 bp->b_next = bp->b_prev = NULL; 3428 bpt = bp; 3429 chain = 1; 3430 } else { 3431 mp->b_next = mp->b_prev = NULL; 3432 bpt->b_next = mp; 3433 bpt = mp; 3434 chain++; 3435 } 3436 3437 /* mark we are finished with this descriptor */ 3438 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3439 dp->dring_mtype, dp->handle, pos, pos, 3440 VIO_DESC_DONE)) != 0) { 3441 DERR(vswp, "%s(%lld): unable to update " 3442 "dstate at pos %d: err %d", 3443 __func__, pos, ldcp->ldc_id, rng_rv); 3444 ldcp->ldc_stats.ierrors++; 3445 break; 3446 } 3447 3448 /* 3449 * Send an ACK back to peer if requested. 3450 */ 3451 if (desc.hdr.ack) { 3452 dring_pkt->start_idx = range_start; 3453 dring_pkt->end_idx = range_end; 3454 3455 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3456 " requested", __func__, ldcp->ldc_id, 3457 dring_pkt->start_idx, dring_pkt->end_idx); 3458 3459 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3460 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3461 dring_pkt->tag.vio_sid = ldcp->local_session; 3462 3463 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3464 sizeof (vio_dring_msg_t), B_FALSE); 3465 3466 /* 3467 * Check if ACK was successfully sent. If not 3468 * we break and deal with that below. 3469 */ 3470 if (msg_rv != 0) 3471 break; 3472 3473 prev_desc_ack = B_TRUE; 3474 range_start = pos; 3475 } 3476 3477 /* next descriptor */ 3478 pos = (pos + 1) % len; 3479 cnt++; 3480 3481 /* 3482 * Break out of loop here and stop processing to 3483 * allow some other network device (or disk) to 3484 * get access to the cpu. 3485 */ 3486 if (chain > vsw_chain_len) { 3487 D3(vswp, "%s(%lld): switching chain of %d " 3488 "msgs", __func__, ldcp->ldc_id, chain); 3489 break; 3490 } 3491 } 3492 RW_EXIT(&ldcp->lane_in.dlistrw); 3493 3494 /* send the chain of packets to be switched */ 3495 if (bp != NULL) { 3496 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3497 D3(vswp, "%s(%lld): switching chain of %d msgs", 3498 __func__, ldcp->ldc_id, chain); 3499 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3500 ldcp->ldc_port, NULL); 3501 } 3502 3503 /* 3504 * If when we encountered an error when attempting to 3505 * access an imported dring, initiate a connection reset. 3506 */ 3507 if (rng_rv != 0) { 3508 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3509 break; 3510 } 3511 3512 /* 3513 * If when we attempted to send the ACK we found that the 3514 * channel had been reset then now handle this. We deal with 3515 * it here as we cannot reset the channel while holding the 3516 * dlistrw lock, and we don't want to acquire/release it 3517 * continuously in the above loop, as a channel reset should 3518 * be a rare event. 3519 */ 3520 if (msg_rv == ECONNRESET) { 3521 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3522 break; 3523 } 3524 3525 DTRACE_PROBE1(msg_cnt, int, cnt); 3526 3527 /* 3528 * We are now finished so ACK back with the state 3529 * set to STOPPING so our peer knows we are finished 3530 */ 3531 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3532 dring_pkt->tag.vio_sid = ldcp->local_session; 3533 3534 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3535 3536 DTRACE_PROBE(stop_process_sent); 3537 3538 /* 3539 * We have not processed any more descriptors beyond 3540 * the last one we ACK'd. 3541 */ 3542 if (prev_desc_ack) 3543 range_start = range_end; 3544 3545 dring_pkt->start_idx = range_start; 3546 dring_pkt->end_idx = range_end; 3547 3548 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3549 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3550 dring_pkt->end_idx); 3551 3552 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3553 sizeof (vio_dring_msg_t), B_TRUE); 3554 break; 3555 3556 case VIO_SUBTYPE_ACK: 3557 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3558 /* 3559 * Verify that the relevant descriptors are all 3560 * marked as DONE 3561 */ 3562 READ_ENTER(&ldcp->lane_out.dlistrw); 3563 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3564 dring_pkt->dring_ident)) == NULL) { 3565 RW_EXIT(&ldcp->lane_out.dlistrw); 3566 DERR(vswp, "%s: unknown ident in ACK", __func__); 3567 return; 3568 } 3569 3570 start = end = 0; 3571 start = dring_pkt->start_idx; 3572 end = dring_pkt->end_idx; 3573 len = dp->num_descriptors; 3574 3575 3576 mutex_enter(&dp->dlock); 3577 dp->last_ack_recv = end; 3578 ldcp->ldc_stats.dring_data_acks++; 3579 mutex_exit(&dp->dlock); 3580 3581 (void) vsw_reclaim_dring(dp, start); 3582 3583 /* 3584 * If our peer is stopping processing descriptors then 3585 * we check to make sure it has processed all the descriptors 3586 * we have updated. If not then we send it a new message 3587 * to prompt it to restart. 3588 */ 3589 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3590 DTRACE_PROBE(stop_process_recv); 3591 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3592 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3593 dring_pkt->end_idx); 3594 3595 /* 3596 * Check next descriptor in public section of ring. 3597 * If its marked as READY then we need to prompt our 3598 * peer to start processing the ring again. 3599 */ 3600 i = (end + 1) % len; 3601 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3602 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3603 3604 /* 3605 * Hold the restart lock across all of this to 3606 * make sure that its not possible for us to 3607 * decide that a msg needs to be sent in the future 3608 * but the sending code having already checked is 3609 * about to exit. 3610 */ 3611 mutex_enter(&dp->restart_lock); 3612 ldcp->ldc_stats.dring_stopped_acks++; 3613 mutex_enter(&priv_addr->dstate_lock); 3614 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3615 3616 mutex_exit(&priv_addr->dstate_lock); 3617 3618 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3619 dring_pkt->tag.vio_sid = ldcp->local_session; 3620 3621 dring_pkt->start_idx = (end + 1) % len; 3622 dring_pkt->end_idx = -1; 3623 3624 D2(vswp, "%s(%lld) : sending restart msg:" 3625 " %d : %d", __func__, ldcp->ldc_id, 3626 dring_pkt->start_idx, dring_pkt->end_idx); 3627 3628 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3629 sizeof (vio_dring_msg_t), B_FALSE); 3630 ldcp->ldc_stats.dring_data_msgs++; 3631 3632 } else { 3633 mutex_exit(&priv_addr->dstate_lock); 3634 dp->restart_reqd = B_TRUE; 3635 } 3636 mutex_exit(&dp->restart_lock); 3637 } 3638 RW_EXIT(&ldcp->lane_out.dlistrw); 3639 3640 /* only do channel reset after dropping dlistrw lock */ 3641 if (msg_rv == ECONNRESET) 3642 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3643 3644 break; 3645 3646 case VIO_SUBTYPE_NACK: 3647 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3648 __func__, ldcp->ldc_id); 3649 /* 3650 * Something is badly wrong if we are getting NACK's 3651 * for our data pkts. So reset the channel. 3652 */ 3653 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3654 3655 break; 3656 3657 default: 3658 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3659 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3660 } 3661 3662 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3663 } 3664 3665 /* 3666 * dummy pkt data handler function for vnet protocol version 1.0 3667 */ 3668 static void 3669 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3670 { 3671 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3672 } 3673 3674 /* 3675 * This function handles raw pkt data messages received over the channel. 3676 * Currently, only priority-eth-type frames are received through this mechanism. 3677 * In this case, the frame(data) is present within the message itself which 3678 * is copied into an mblk before switching it. 3679 */ 3680 static void 3681 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3682 { 3683 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3684 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3685 uint32_t size; 3686 mblk_t *mp; 3687 vsw_t *vswp = ldcp->ldc_vswp; 3688 vgen_stats_t *statsp = &ldcp->ldc_stats; 3689 lane_t *lp = &ldcp->lane_out; 3690 3691 size = msglen - VIO_PKT_DATA_HDRSIZE; 3692 if (size < ETHERMIN || size > lp->mtu) { 3693 (void) atomic_inc_32(&statsp->rx_pri_fail); 3694 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3695 ldcp->ldc_id, size); 3696 return; 3697 } 3698 3699 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3700 if (mp == NULL) { 3701 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3702 if (mp == NULL) { 3703 (void) atomic_inc_32(&statsp->rx_pri_fail); 3704 DWARN(vswp, "%s(%lld) allocb failure, " 3705 "unable to process priority frame\n", __func__, 3706 ldcp->ldc_id); 3707 return; 3708 } 3709 } 3710 3711 /* skip over the extra space for vlan tag */ 3712 mp->b_rptr += VLAN_TAGSZ; 3713 3714 /* copy the frame from the payload of raw data msg into the mblk */ 3715 bcopy(dpkt->data, mp->b_rptr, size); 3716 mp->b_wptr = mp->b_rptr + size; 3717 3718 /* update stats */ 3719 (void) atomic_inc_64(&statsp->rx_pri_packets); 3720 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3721 3722 /* 3723 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3724 */ 3725 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3726 3727 /* switch the frame to destination */ 3728 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3729 } 3730 3731 /* 3732 * Process an in-band descriptor message (most likely from 3733 * OBP). 3734 */ 3735 static void 3736 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3737 { 3738 vnet_ibnd_desc_t *ibnd_desc; 3739 dring_info_t *dp = NULL; 3740 vsw_private_desc_t *priv_addr = NULL; 3741 vsw_t *vswp = ldcp->ldc_vswp; 3742 mblk_t *mp = NULL; 3743 size_t nbytes = 0; 3744 size_t off = 0; 3745 uint64_t idx = 0; 3746 uint32_t num = 1, len, datalen = 0; 3747 uint64_t ncookies = 0; 3748 int i, rv; 3749 int j = 0; 3750 3751 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3752 3753 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3754 3755 switch (ibnd_desc->hdr.tag.vio_subtype) { 3756 case VIO_SUBTYPE_INFO: 3757 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3758 3759 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3760 return; 3761 3762 /* 3763 * Data is padded to align on a 8 byte boundary, 3764 * nbytes is actual data length, i.e. minus that 3765 * padding. 3766 */ 3767 datalen = ibnd_desc->nbytes; 3768 3769 D2(vswp, "%s(%lld): processing inband desc : " 3770 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3771 3772 ncookies = ibnd_desc->ncookies; 3773 3774 /* 3775 * allocb(9F) returns an aligned data block. We 3776 * need to ensure that we ask ldc for an aligned 3777 * number of bytes also. 3778 */ 3779 nbytes = datalen; 3780 if (nbytes & 0x7) { 3781 off = 8 - (nbytes & 0x7); 3782 nbytes += off; 3783 } 3784 3785 /* alloc extra space for VLAN_TAG */ 3786 mp = allocb(datalen + 8, BPRI_MED); 3787 if (mp == NULL) { 3788 DERR(vswp, "%s(%lld): allocb failed", 3789 __func__, ldcp->ldc_id); 3790 ldcp->ldc_stats.rx_allocb_fail++; 3791 return; 3792 } 3793 3794 /* skip over the extra space for VLAN_TAG */ 3795 mp->b_rptr += 8; 3796 3797 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3798 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3799 LDC_COPY_IN); 3800 3801 if (rv != 0) { 3802 DERR(vswp, "%s(%d): unable to copy in data from " 3803 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3804 freemsg(mp); 3805 ldcp->ldc_stats.ierrors++; 3806 return; 3807 } 3808 3809 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3810 __func__, ldcp->ldc_id, nbytes, ncookies); 3811 3812 /* point to the actual end of data */ 3813 mp->b_wptr = mp->b_rptr + datalen; 3814 ldcp->ldc_stats.ipackets++; 3815 ldcp->ldc_stats.rbytes += datalen; 3816 3817 /* 3818 * We ACK back every in-band descriptor message we process 3819 */ 3820 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3821 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3822 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3823 sizeof (vnet_ibnd_desc_t), B_TRUE); 3824 3825 /* 3826 * there is extra space alloc'd for VLAN_TAG 3827 */ 3828 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3829 3830 /* send the packet to be switched */ 3831 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3832 ldcp->ldc_port, NULL); 3833 3834 break; 3835 3836 case VIO_SUBTYPE_ACK: 3837 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3838 3839 /* Verify the ACK is valid */ 3840 idx = ibnd_desc->hdr.desc_handle; 3841 3842 if (idx >= vsw_ntxds) { 3843 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3844 "(idx %ld)", vswp->instance, idx); 3845 return; 3846 } 3847 3848 if ((dp = ldcp->lane_out.dringp) == NULL) { 3849 DERR(vswp, "%s: no dring found", __func__); 3850 return; 3851 } 3852 3853 len = dp->num_descriptors; 3854 /* 3855 * If the descriptor we are being ACK'ed for is not the 3856 * one we expected, then pkts were lost somwhere, either 3857 * when we tried to send a msg, or a previous ACK msg from 3858 * our peer. In either case we now reclaim the descriptors 3859 * in the range from the last ACK we received up to the 3860 * current ACK. 3861 */ 3862 if (idx != dp->last_ack_recv) { 3863 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3864 __func__, dp->last_ack_recv, idx); 3865 num = idx >= dp->last_ack_recv ? 3866 idx - dp->last_ack_recv + 1: 3867 (len - dp->last_ack_recv + 1) + idx; 3868 } 3869 3870 /* 3871 * When we sent the in-band message to our peer we 3872 * marked the copy in our private ring as READY. We now 3873 * check that the descriptor we are being ACK'ed for is in 3874 * fact READY, i.e. it is one we have shared with our peer. 3875 * 3876 * If its not we flag an error, but still reset the descr 3877 * back to FREE. 3878 */ 3879 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3880 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3881 mutex_enter(&priv_addr->dstate_lock); 3882 if (priv_addr->dstate != VIO_DESC_READY) { 3883 DERR(vswp, "%s: (%ld) desc at index %ld not " 3884 "READY (0x%lx)", __func__, 3885 ldcp->ldc_id, idx, priv_addr->dstate); 3886 DERR(vswp, "%s: bound %d: ncookies %ld : " 3887 "datalen %ld", __func__, 3888 priv_addr->bound, priv_addr->ncookies, 3889 priv_addr->datalen); 3890 } 3891 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3892 ldcp->ldc_id, idx); 3893 /* release resources associated with sent msg */ 3894 priv_addr->datalen = 0; 3895 priv_addr->dstate = VIO_DESC_FREE; 3896 mutex_exit(&priv_addr->dstate_lock); 3897 } 3898 /* update to next expected value */ 3899 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3900 3901 break; 3902 3903 case VIO_SUBTYPE_NACK: 3904 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3905 3906 /* 3907 * We should only get a NACK if our peer doesn't like 3908 * something about a message we have sent it. If this 3909 * happens we just release the resources associated with 3910 * the message. (We are relying on higher layers to decide 3911 * whether or not to resend. 3912 */ 3913 3914 /* limit check */ 3915 idx = ibnd_desc->hdr.desc_handle; 3916 3917 if (idx >= vsw_ntxds) { 3918 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3919 __func__, idx); 3920 return; 3921 } 3922 3923 if ((dp = ldcp->lane_out.dringp) == NULL) { 3924 DERR(vswp, "%s: no dring found", __func__); 3925 return; 3926 } 3927 3928 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3929 3930 /* move to correct location in ring */ 3931 priv_addr += idx; 3932 3933 /* release resources associated with sent msg */ 3934 mutex_enter(&priv_addr->dstate_lock); 3935 priv_addr->datalen = 0; 3936 priv_addr->dstate = VIO_DESC_FREE; 3937 mutex_exit(&priv_addr->dstate_lock); 3938 3939 break; 3940 3941 default: 3942 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3943 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3944 } 3945 3946 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3947 } 3948 3949 static void 3950 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3951 { 3952 _NOTE(ARGUNUSED(epkt)) 3953 3954 vsw_t *vswp = ldcp->ldc_vswp; 3955 uint16_t env = tagp->vio_subtype_env; 3956 3957 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3958 3959 /* 3960 * Error vio_subtypes have yet to be defined. So for 3961 * the moment we can't do anything. 3962 */ 3963 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3964 3965 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3966 } 3967 3968 /* transmit the packet over the given port */ 3969 int 3970 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3971 { 3972 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3973 vsw_ldc_t *ldcp; 3974 int status = 0; 3975 uint32_t n; 3976 3977 READ_ENTER(&ldcl->lockrw); 3978 /* 3979 * Note for now, we have a single channel. 3980 */ 3981 ldcp = ldcl->head; 3982 if (ldcp == NULL) { 3983 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3984 freemsgchain(mp); 3985 RW_EXIT(&ldcl->lockrw); 3986 return (1); 3987 } 3988 3989 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3990 3991 count -= n; 3992 if (count == 0) { 3993 goto vsw_portsend_exit; 3994 } 3995 3996 status = ldcp->tx(ldcp, mp, mpt, count); 3997 3998 vsw_portsend_exit: 3999 RW_EXIT(&ldcl->lockrw); 4000 4001 return (status); 4002 } 4003 4004 /* 4005 * Break up frames into 2 seperate chains: normal and 4006 * priority, based on the frame type. The number of 4007 * priority frames is also counted and returned. 4008 * 4009 * Params: 4010 * vswp: pointer to the instance of vsw 4011 * np: head of packet chain to be broken 4012 * npt: tail of packet chain to be broken 4013 * 4014 * Returns: 4015 * np: head of normal data packets 4016 * npt: tail of normal data packets 4017 * hp: head of high priority packets 4018 * hpt: tail of high priority packets 4019 */ 4020 static uint32_t 4021 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4022 mblk_t **hp, mblk_t **hpt) 4023 { 4024 mblk_t *tmp = NULL; 4025 mblk_t *smp = NULL; 4026 mblk_t *hmp = NULL; /* high prio pkts head */ 4027 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4028 mblk_t *nmp = NULL; /* normal pkts head */ 4029 mblk_t *nmpt = NULL; /* normal pkts tail */ 4030 uint32_t count = 0; 4031 int i; 4032 struct ether_header *ehp; 4033 uint32_t num_types; 4034 uint16_t *types; 4035 4036 tmp = *np; 4037 while (tmp != NULL) { 4038 4039 smp = tmp; 4040 tmp = tmp->b_next; 4041 smp->b_next = NULL; 4042 smp->b_prev = NULL; 4043 4044 ehp = (struct ether_header *)smp->b_rptr; 4045 num_types = vswp->pri_num_types; 4046 types = vswp->pri_types; 4047 for (i = 0; i < num_types; i++) { 4048 if (ehp->ether_type == types[i]) { 4049 /* high priority frame */ 4050 4051 if (hmp != NULL) { 4052 hmpt->b_next = smp; 4053 hmpt = smp; 4054 } else { 4055 hmp = hmpt = smp; 4056 } 4057 count++; 4058 break; 4059 } 4060 } 4061 if (i == num_types) { 4062 /* normal data frame */ 4063 4064 if (nmp != NULL) { 4065 nmpt->b_next = smp; 4066 nmpt = smp; 4067 } else { 4068 nmp = nmpt = smp; 4069 } 4070 } 4071 } 4072 4073 *hp = hmp; 4074 *hpt = hmpt; 4075 *np = nmp; 4076 *npt = nmpt; 4077 4078 return (count); 4079 } 4080 4081 /* 4082 * Wrapper function to transmit normal and/or priority frames over the channel. 4083 */ 4084 static int 4085 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4086 { 4087 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4088 mblk_t *tmp; 4089 mblk_t *smp; 4090 mblk_t *hmp; /* high prio pkts head */ 4091 mblk_t *hmpt; /* high prio pkts tail */ 4092 mblk_t *nmp; /* normal pkts head */ 4093 mblk_t *nmpt; /* normal pkts tail */ 4094 uint32_t n = 0; 4095 vsw_t *vswp = ldcp->ldc_vswp; 4096 4097 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4098 ASSERT(count != 0); 4099 4100 nmp = mp; 4101 nmpt = mpt; 4102 4103 /* gather any priority frames from the chain of packets */ 4104 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4105 4106 /* transmit priority frames */ 4107 tmp = hmp; 4108 while (tmp != NULL) { 4109 smp = tmp; 4110 tmp = tmp->b_next; 4111 smp->b_next = NULL; 4112 vsw_ldcsend_pkt(ldcp, smp); 4113 } 4114 4115 count -= n; 4116 4117 if (count == 0) { 4118 /* no normal data frames to process */ 4119 return (0); 4120 } 4121 4122 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4123 } 4124 4125 /* 4126 * Wrapper function to transmit normal frames over the channel. 4127 */ 4128 static int 4129 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4130 { 4131 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4132 mblk_t *tmp = NULL; 4133 4134 ASSERT(count != 0); 4135 /* 4136 * If the TX thread is enabled, then queue the 4137 * ordinary frames and signal the tx thread. 4138 */ 4139 if (ldcp->tx_thread != NULL) { 4140 4141 mutex_enter(&ldcp->tx_thr_lock); 4142 4143 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4144 /* 4145 * If we reached queue limit, 4146 * do not queue new packets, 4147 * drop them. 4148 */ 4149 ldcp->ldc_stats.tx_qfull += count; 4150 mutex_exit(&ldcp->tx_thr_lock); 4151 freemsgchain(mp); 4152 goto exit; 4153 } 4154 if (ldcp->tx_mhead == NULL) { 4155 ldcp->tx_mhead = mp; 4156 ldcp->tx_mtail = mpt; 4157 cv_signal(&ldcp->tx_thr_cv); 4158 } else { 4159 ldcp->tx_mtail->b_next = mp; 4160 ldcp->tx_mtail = mpt; 4161 } 4162 ldcp->tx_cnt += count; 4163 mutex_exit(&ldcp->tx_thr_lock); 4164 } else { 4165 while (mp != NULL) { 4166 tmp = mp->b_next; 4167 mp->b_next = mp->b_prev = NULL; 4168 (void) vsw_ldcsend(ldcp, mp, 1); 4169 mp = tmp; 4170 } 4171 } 4172 4173 exit: 4174 return (0); 4175 } 4176 4177 /* 4178 * This function transmits the frame in the payload of a raw data 4179 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4180 * send special frames with high priorities, without going through 4181 * the normal data path which uses descriptor ring mechanism. 4182 */ 4183 static void 4184 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4185 { 4186 vio_raw_data_msg_t *pkt; 4187 mblk_t *bp; 4188 mblk_t *nmp = NULL; 4189 caddr_t dst; 4190 uint32_t mblksz; 4191 uint32_t size; 4192 uint32_t nbytes; 4193 int rv; 4194 vsw_t *vswp = ldcp->ldc_vswp; 4195 vgen_stats_t *statsp = &ldcp->ldc_stats; 4196 4197 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4198 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4199 (void) atomic_inc_32(&statsp->tx_pri_fail); 4200 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4201 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4202 ldcp->lane_out.lstate); 4203 goto send_pkt_exit; 4204 } 4205 4206 size = msgsize(mp); 4207 4208 /* frame size bigger than available payload len of raw data msg ? */ 4209 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4210 (void) atomic_inc_32(&statsp->tx_pri_fail); 4211 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4212 ldcp->ldc_id, size); 4213 goto send_pkt_exit; 4214 } 4215 4216 if (size < ETHERMIN) 4217 size = ETHERMIN; 4218 4219 /* alloc space for a raw data message */ 4220 nmp = vio_allocb(vswp->pri_tx_vmp); 4221 if (nmp == NULL) { 4222 (void) atomic_inc_32(&statsp->tx_pri_fail); 4223 DWARN(vswp, "vio_allocb failed\n"); 4224 goto send_pkt_exit; 4225 } 4226 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4227 4228 /* copy frame into the payload of raw data message */ 4229 dst = (caddr_t)pkt->data; 4230 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4231 mblksz = MBLKL(bp); 4232 bcopy(bp->b_rptr, dst, mblksz); 4233 dst += mblksz; 4234 } 4235 4236 /* setup the raw data msg */ 4237 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4238 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4239 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4240 pkt->tag.vio_sid = ldcp->local_session; 4241 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4242 4243 /* send the msg over ldc */ 4244 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4245 if (rv != 0) { 4246 (void) atomic_inc_32(&statsp->tx_pri_fail); 4247 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4248 ldcp->ldc_id); 4249 goto send_pkt_exit; 4250 } 4251 4252 /* update stats */ 4253 (void) atomic_inc_64(&statsp->tx_pri_packets); 4254 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4255 4256 send_pkt_exit: 4257 if (nmp != NULL) 4258 freemsg(nmp); 4259 freemsg(mp); 4260 } 4261 4262 /* 4263 * Transmit the packet over the given LDC channel. 4264 * 4265 * The 'retries' argument indicates how many times a packet 4266 * is retried before it is dropped. Note, the retry is done 4267 * only for a resource related failure, for all other failures 4268 * the packet is dropped immediately. 4269 */ 4270 static int 4271 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4272 { 4273 int i; 4274 int rc; 4275 int status = 0; 4276 vsw_port_t *port = ldcp->ldc_port; 4277 dring_info_t *dp = NULL; 4278 4279 4280 for (i = 0; i < retries; ) { 4281 /* 4282 * Send the message out using the appropriate 4283 * transmit function which will free mblock when it 4284 * is finished with it. 4285 */ 4286 mutex_enter(&port->tx_lock); 4287 if (port->transmit != NULL) { 4288 status = (*port->transmit)(ldcp, mp); 4289 } 4290 if (status == LDC_TX_SUCCESS) { 4291 mutex_exit(&port->tx_lock); 4292 break; 4293 } 4294 i++; /* increment the counter here */ 4295 4296 /* If its the last retry, then update the oerror */ 4297 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4298 ldcp->ldc_stats.oerrors++; 4299 } 4300 mutex_exit(&port->tx_lock); 4301 4302 if (status != LDC_TX_NORESOURCES) { 4303 /* 4304 * No retrying required for errors un-related 4305 * to resources. 4306 */ 4307 break; 4308 } 4309 READ_ENTER(&ldcp->lane_out.dlistrw); 4310 if (((dp = ldcp->lane_out.dringp) != NULL) && 4311 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4312 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4313 ((VSW_VER_LT(ldcp, 1, 2) && 4314 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4315 rc = vsw_reclaim_dring(dp, dp->end_idx); 4316 } else { 4317 /* 4318 * If there is no dring or the xfer_mode is 4319 * set to DESC_MODE(ie., OBP), then simply break here. 4320 */ 4321 RW_EXIT(&ldcp->lane_out.dlistrw); 4322 break; 4323 } 4324 RW_EXIT(&ldcp->lane_out.dlistrw); 4325 4326 /* 4327 * Delay only if none were reclaimed 4328 * and its not the last retry. 4329 */ 4330 if ((rc == 0) && (i < retries)) { 4331 delay(drv_usectohz(vsw_ldc_tx_delay)); 4332 } 4333 } 4334 freemsg(mp); 4335 return (status); 4336 } 4337 4338 /* 4339 * Send packet out via descriptor ring to a logical device. 4340 */ 4341 static int 4342 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4343 { 4344 vio_dring_msg_t dring_pkt; 4345 dring_info_t *dp = NULL; 4346 vsw_private_desc_t *priv_desc = NULL; 4347 vnet_public_desc_t *pub = NULL; 4348 vsw_t *vswp = ldcp->ldc_vswp; 4349 mblk_t *bp; 4350 size_t n, size; 4351 caddr_t bufp; 4352 int idx; 4353 int status = LDC_TX_SUCCESS; 4354 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4355 lane_t *lp = &ldcp->lane_out; 4356 4357 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4358 4359 /* TODO: make test a macro */ 4360 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4361 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4362 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4363 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4364 ldcp->lane_out.lstate); 4365 ldcp->ldc_stats.oerrors++; 4366 return (LDC_TX_FAILURE); 4367 } 4368 4369 /* 4370 * Note - using first ring only, this may change 4371 * in the future. 4372 */ 4373 READ_ENTER(&ldcp->lane_out.dlistrw); 4374 if ((dp = ldcp->lane_out.dringp) == NULL) { 4375 RW_EXIT(&ldcp->lane_out.dlistrw); 4376 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4377 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4378 ldcp->ldc_stats.oerrors++; 4379 return (LDC_TX_FAILURE); 4380 } 4381 4382 size = msgsize(mp); 4383 if (size > (size_t)lp->mtu) { 4384 RW_EXIT(&ldcp->lane_out.dlistrw); 4385 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4386 ldcp->ldc_id, size); 4387 ldcp->ldc_stats.oerrors++; 4388 return (LDC_TX_FAILURE); 4389 } 4390 4391 /* 4392 * Find a free descriptor 4393 * 4394 * Note: for the moment we are assuming that we will only 4395 * have one dring going from the switch to each of its 4396 * peers. This may change in the future. 4397 */ 4398 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4399 D2(vswp, "%s(%lld): no descriptor available for ring " 4400 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4401 4402 /* nothing more we can do */ 4403 status = LDC_TX_NORESOURCES; 4404 ldcp->ldc_stats.tx_no_desc++; 4405 goto vsw_dringsend_free_exit; 4406 } else { 4407 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4408 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4409 } 4410 4411 /* copy data into the descriptor */ 4412 bufp = priv_desc->datap; 4413 bufp += VNET_IPALIGN; 4414 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4415 n = MBLKL(bp); 4416 bcopy(bp->b_rptr, bufp, n); 4417 bufp += n; 4418 } 4419 4420 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4421 4422 pub = priv_desc->descp; 4423 pub->nbytes = priv_desc->datalen; 4424 4425 /* update statistics */ 4426 if (IS_BROADCAST(ehp)) 4427 ldcp->ldc_stats.brdcstxmt++; 4428 else if (IS_MULTICAST(ehp)) 4429 ldcp->ldc_stats.multixmt++; 4430 ldcp->ldc_stats.opackets++; 4431 ldcp->ldc_stats.obytes += priv_desc->datalen; 4432 4433 mutex_enter(&priv_desc->dstate_lock); 4434 pub->hdr.dstate = VIO_DESC_READY; 4435 mutex_exit(&priv_desc->dstate_lock); 4436 4437 /* 4438 * Determine whether or not we need to send a message to our 4439 * peer prompting them to read our newly updated descriptor(s). 4440 */ 4441 mutex_enter(&dp->restart_lock); 4442 if (dp->restart_reqd) { 4443 dp->restart_reqd = B_FALSE; 4444 ldcp->ldc_stats.dring_data_msgs++; 4445 mutex_exit(&dp->restart_lock); 4446 4447 /* 4448 * Send a vio_dring_msg to peer to prompt them to read 4449 * the updated descriptor ring. 4450 */ 4451 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4452 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4453 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4454 dring_pkt.tag.vio_sid = ldcp->local_session; 4455 4456 /* Note - for now using first ring */ 4457 dring_pkt.dring_ident = dp->ident; 4458 4459 /* 4460 * If last_ack_recv is -1 then we know we've not 4461 * received any ack's yet, so this must be the first 4462 * msg sent, so set the start to the begining of the ring. 4463 */ 4464 mutex_enter(&dp->dlock); 4465 if (dp->last_ack_recv == -1) { 4466 dring_pkt.start_idx = 0; 4467 } else { 4468 dring_pkt.start_idx = 4469 (dp->last_ack_recv + 1) % dp->num_descriptors; 4470 } 4471 dring_pkt.end_idx = -1; 4472 mutex_exit(&dp->dlock); 4473 4474 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4475 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4476 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4477 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4478 dring_pkt.end_idx); 4479 4480 RW_EXIT(&ldcp->lane_out.dlistrw); 4481 4482 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4483 sizeof (vio_dring_msg_t), B_TRUE); 4484 4485 return (status); 4486 4487 } else { 4488 mutex_exit(&dp->restart_lock); 4489 D2(vswp, "%s(%lld): updating descp %d", __func__, 4490 ldcp->ldc_id, idx); 4491 } 4492 4493 vsw_dringsend_free_exit: 4494 4495 RW_EXIT(&ldcp->lane_out.dlistrw); 4496 4497 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4498 return (status); 4499 } 4500 4501 /* 4502 * Send an in-band descriptor message over ldc. 4503 */ 4504 static int 4505 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4506 { 4507 vsw_t *vswp = ldcp->ldc_vswp; 4508 vnet_ibnd_desc_t ibnd_msg; 4509 vsw_private_desc_t *priv_desc = NULL; 4510 dring_info_t *dp = NULL; 4511 size_t n, size = 0; 4512 caddr_t bufp; 4513 mblk_t *bp; 4514 int idx, i; 4515 int status = LDC_TX_SUCCESS; 4516 static int warn_msg = 1; 4517 lane_t *lp = &ldcp->lane_out; 4518 4519 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4520 4521 ASSERT(mp != NULL); 4522 4523 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4524 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4525 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4526 __func__, ldcp->ldc_id, ldcp->ldc_status, 4527 ldcp->lane_out.lstate); 4528 ldcp->ldc_stats.oerrors++; 4529 return (LDC_TX_FAILURE); 4530 } 4531 4532 /* 4533 * only expect single dring to exist, which we use 4534 * as an internal buffer, rather than a transfer channel. 4535 */ 4536 READ_ENTER(&ldcp->lane_out.dlistrw); 4537 if ((dp = ldcp->lane_out.dringp) == NULL) { 4538 DERR(vswp, "%s(%lld): no dring for outbound lane", 4539 __func__, ldcp->ldc_id); 4540 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4541 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4542 RW_EXIT(&ldcp->lane_out.dlistrw); 4543 ldcp->ldc_stats.oerrors++; 4544 return (LDC_TX_FAILURE); 4545 } 4546 4547 size = msgsize(mp); 4548 if (size > (size_t)lp->mtu) { 4549 RW_EXIT(&ldcp->lane_out.dlistrw); 4550 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4551 ldcp->ldc_id, size); 4552 ldcp->ldc_stats.oerrors++; 4553 return (LDC_TX_FAILURE); 4554 } 4555 4556 /* 4557 * Find a free descriptor in our buffer ring 4558 */ 4559 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4560 RW_EXIT(&ldcp->lane_out.dlistrw); 4561 if (warn_msg) { 4562 DERR(vswp, "%s(%lld): no descriptor available for ring " 4563 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4564 warn_msg = 0; 4565 } 4566 4567 /* nothing more we can do */ 4568 status = LDC_TX_NORESOURCES; 4569 goto vsw_descrsend_free_exit; 4570 } else { 4571 D2(vswp, "%s(%lld): free private descriptor found at pos " 4572 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4573 warn_msg = 1; 4574 } 4575 4576 /* copy data into the descriptor */ 4577 bufp = priv_desc->datap; 4578 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4579 n = MBLKL(bp); 4580 bcopy(bp->b_rptr, bufp, n); 4581 bufp += n; 4582 } 4583 4584 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4585 4586 /* create and send the in-band descp msg */ 4587 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4588 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4589 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4590 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4591 4592 /* 4593 * Copy the mem cookies describing the data from the 4594 * private region of the descriptor ring into the inband 4595 * descriptor. 4596 */ 4597 for (i = 0; i < priv_desc->ncookies; i++) { 4598 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4599 sizeof (ldc_mem_cookie_t)); 4600 } 4601 4602 ibnd_msg.hdr.desc_handle = idx; 4603 ibnd_msg.ncookies = priv_desc->ncookies; 4604 ibnd_msg.nbytes = size; 4605 4606 ldcp->ldc_stats.opackets++; 4607 ldcp->ldc_stats.obytes += size; 4608 4609 RW_EXIT(&ldcp->lane_out.dlistrw); 4610 4611 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4612 sizeof (vnet_ibnd_desc_t), B_TRUE); 4613 4614 vsw_descrsend_free_exit: 4615 4616 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4617 return (status); 4618 } 4619 4620 static void 4621 vsw_send_ver(void *arg) 4622 { 4623 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4624 vsw_t *vswp = ldcp->ldc_vswp; 4625 lane_t *lp = &ldcp->lane_out; 4626 vio_ver_msg_t ver_msg; 4627 4628 D1(vswp, "%s enter", __func__); 4629 4630 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4631 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4632 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4633 ver_msg.tag.vio_sid = ldcp->local_session; 4634 4635 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4636 ver_msg.ver_major = vsw_versions[0].ver_major; 4637 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4638 } else { 4639 /* use the major,minor that we've ack'd */ 4640 lane_t *lpi = &ldcp->lane_in; 4641 ver_msg.ver_major = lpi->ver_major; 4642 ver_msg.ver_minor = lpi->ver_minor; 4643 } 4644 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4645 4646 lp->lstate |= VSW_VER_INFO_SENT; 4647 lp->ver_major = ver_msg.ver_major; 4648 lp->ver_minor = ver_msg.ver_minor; 4649 4650 DUMP_TAG(ver_msg.tag); 4651 4652 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4653 4654 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4655 } 4656 4657 static void 4658 vsw_send_attr(vsw_ldc_t *ldcp) 4659 { 4660 vsw_t *vswp = ldcp->ldc_vswp; 4661 lane_t *lp = &ldcp->lane_out; 4662 vnet_attr_msg_t attr_msg; 4663 4664 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4665 4666 /* 4667 * Subtype is set to INFO by default 4668 */ 4669 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4670 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4671 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4672 attr_msg.tag.vio_sid = ldcp->local_session; 4673 4674 /* payload copied from default settings for lane */ 4675 attr_msg.mtu = lp->mtu; 4676 attr_msg.addr_type = lp->addr_type; 4677 attr_msg.xfer_mode = lp->xfer_mode; 4678 attr_msg.ack_freq = lp->xfer_mode; 4679 4680 READ_ENTER(&vswp->if_lockrw); 4681 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4682 RW_EXIT(&vswp->if_lockrw); 4683 4684 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4685 4686 DUMP_TAG(attr_msg.tag); 4687 4688 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4689 4690 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4691 } 4692 4693 /* 4694 * Create dring info msg (which also results in the creation of 4695 * a dring). 4696 */ 4697 static vio_dring_reg_msg_t * 4698 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4699 { 4700 vio_dring_reg_msg_t *mp; 4701 dring_info_t *dp; 4702 vsw_t *vswp = ldcp->ldc_vswp; 4703 4704 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4705 4706 /* 4707 * If we can't create a dring, obviously no point sending 4708 * a message. 4709 */ 4710 if ((dp = vsw_create_dring(ldcp)) == NULL) 4711 return (NULL); 4712 4713 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4714 4715 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4716 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4717 mp->tag.vio_subtype_env = VIO_DRING_REG; 4718 mp->tag.vio_sid = ldcp->local_session; 4719 4720 /* payload */ 4721 mp->num_descriptors = dp->num_descriptors; 4722 mp->descriptor_size = dp->descriptor_size; 4723 mp->options = dp->options; 4724 mp->ncookies = dp->ncookies; 4725 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4726 4727 mp->dring_ident = 0; 4728 4729 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4730 4731 return (mp); 4732 } 4733 4734 static void 4735 vsw_send_dring_info(vsw_ldc_t *ldcp) 4736 { 4737 vio_dring_reg_msg_t *dring_msg; 4738 vsw_t *vswp = ldcp->ldc_vswp; 4739 4740 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4741 4742 dring_msg = vsw_create_dring_info_pkt(ldcp); 4743 if (dring_msg == NULL) { 4744 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4745 vswp->instance, __func__); 4746 return; 4747 } 4748 4749 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4750 4751 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4752 4753 (void) vsw_send_msg(ldcp, dring_msg, 4754 sizeof (vio_dring_reg_msg_t), B_TRUE); 4755 4756 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4757 4758 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4759 } 4760 4761 static void 4762 vsw_send_rdx(vsw_ldc_t *ldcp) 4763 { 4764 vsw_t *vswp = ldcp->ldc_vswp; 4765 vio_rdx_msg_t rdx_msg; 4766 4767 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4768 4769 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4770 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4771 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4772 rdx_msg.tag.vio_sid = ldcp->local_session; 4773 4774 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4775 4776 DUMP_TAG(rdx_msg.tag); 4777 4778 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4779 4780 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4781 } 4782 4783 /* 4784 * Generic routine to send message out over ldc channel. 4785 * 4786 * It is possible that when we attempt to write over the ldc channel 4787 * that we get notified that it has been reset. Depending on the value 4788 * of the handle_reset flag we either handle that event here or simply 4789 * notify the caller that the channel was reset. 4790 */ 4791 int 4792 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4793 { 4794 int rv; 4795 size_t msglen = size; 4796 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4797 vsw_t *vswp = ldcp->ldc_vswp; 4798 vio_dring_msg_t *dmsg; 4799 vio_raw_data_msg_t *rmsg; 4800 vnet_ibnd_desc_t *imsg; 4801 boolean_t data_msg = B_FALSE; 4802 4803 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4804 ldcp->ldc_id, size); 4805 4806 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4807 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4808 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4809 4810 mutex_enter(&ldcp->ldc_txlock); 4811 4812 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4813 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4814 dmsg = (vio_dring_msg_t *)tag; 4815 dmsg->seq_num = ldcp->lane_out.seq_num; 4816 data_msg = B_TRUE; 4817 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4818 rmsg = (vio_raw_data_msg_t *)tag; 4819 rmsg->seq_num = ldcp->lane_out.seq_num; 4820 data_msg = B_TRUE; 4821 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4822 imsg = (vnet_ibnd_desc_t *)tag; 4823 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4824 data_msg = B_TRUE; 4825 } 4826 } 4827 4828 do { 4829 msglen = size; 4830 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4831 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4832 4833 if (rv == 0 && data_msg == B_TRUE) { 4834 ldcp->lane_out.seq_num++; 4835 } 4836 4837 if ((rv != 0) || (msglen != size)) { 4838 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4839 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4840 ldcp->ldc_stats.oerrors++; 4841 } 4842 4843 mutex_exit(&ldcp->ldc_txlock); 4844 4845 /* 4846 * If channel has been reset we either handle it here or 4847 * simply report back that it has been reset and let caller 4848 * decide what to do. 4849 */ 4850 if (rv == ECONNRESET) { 4851 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4852 4853 /* 4854 * N.B - must never be holding the dlistrw lock when 4855 * we do a reset of the channel. 4856 */ 4857 if (handle_reset) { 4858 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4859 } 4860 } 4861 4862 return (rv); 4863 } 4864 4865 /* 4866 * Remove the specified address from the list of address maintained 4867 * in this port node. 4868 */ 4869 mcst_addr_t * 4870 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4871 { 4872 vsw_t *vswp = NULL; 4873 vsw_port_t *port = NULL; 4874 mcst_addr_t *prev_p = NULL; 4875 mcst_addr_t *curr_p = NULL; 4876 4877 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4878 __func__, devtype, addr); 4879 4880 if (devtype == VSW_VNETPORT) { 4881 port = (vsw_port_t *)arg; 4882 mutex_enter(&port->mca_lock); 4883 prev_p = curr_p = port->mcap; 4884 } else { 4885 vswp = (vsw_t *)arg; 4886 mutex_enter(&vswp->mca_lock); 4887 prev_p = curr_p = vswp->mcap; 4888 } 4889 4890 while (curr_p != NULL) { 4891 if (curr_p->addr == addr) { 4892 D2(NULL, "%s: address found", __func__); 4893 /* match found */ 4894 if (prev_p == curr_p) { 4895 /* list head */ 4896 if (devtype == VSW_VNETPORT) 4897 port->mcap = curr_p->nextp; 4898 else 4899 vswp->mcap = curr_p->nextp; 4900 } else { 4901 prev_p->nextp = curr_p->nextp; 4902 } 4903 break; 4904 } else { 4905 prev_p = curr_p; 4906 curr_p = curr_p->nextp; 4907 } 4908 } 4909 4910 if (devtype == VSW_VNETPORT) 4911 mutex_exit(&port->mca_lock); 4912 else 4913 mutex_exit(&vswp->mca_lock); 4914 4915 D1(NULL, "%s: exit", __func__); 4916 4917 return (curr_p); 4918 } 4919 4920 /* 4921 * Creates a descriptor ring (dring) and links it into the 4922 * link of outbound drings for this channel. 4923 * 4924 * Returns NULL if creation failed. 4925 */ 4926 static dring_info_t * 4927 vsw_create_dring(vsw_ldc_t *ldcp) 4928 { 4929 vsw_private_desc_t *priv_addr = NULL; 4930 vsw_t *vswp = ldcp->ldc_vswp; 4931 ldc_mem_info_t minfo; 4932 dring_info_t *dp, *tp; 4933 int i; 4934 4935 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4936 4937 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4938 4939 /* create public section of ring */ 4940 if ((ldc_mem_dring_create(vsw_ntxds, 4941 VSW_PUB_SIZE, &dp->handle)) != 0) { 4942 4943 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4944 "failed", ldcp->ldc_id); 4945 goto create_fail_exit; 4946 } 4947 4948 ASSERT(dp->handle != NULL); 4949 4950 /* 4951 * Get the base address of the public section of the ring. 4952 */ 4953 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4954 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4955 ldcp->ldc_id); 4956 goto dring_fail_exit; 4957 } else { 4958 ASSERT(minfo.vaddr != 0); 4959 dp->pub_addr = minfo.vaddr; 4960 } 4961 4962 dp->num_descriptors = vsw_ntxds; 4963 dp->descriptor_size = VSW_PUB_SIZE; 4964 dp->options = VIO_TX_DRING; 4965 dp->ncookies = 1; /* guaranteed by ldc */ 4966 4967 /* 4968 * create private portion of ring 4969 */ 4970 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4971 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4972 4973 if (vsw_setup_ring(ldcp, dp)) { 4974 DERR(vswp, "%s: unable to setup ring", __func__); 4975 goto dring_fail_exit; 4976 } 4977 4978 /* haven't used any descriptors yet */ 4979 dp->end_idx = 0; 4980 dp->last_ack_recv = -1; 4981 4982 /* bind dring to the channel */ 4983 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4984 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 4985 &dp->cookie[0], &dp->ncookies)) != 0) { 4986 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4987 "%lld", ldcp->ldc_id); 4988 goto dring_fail_exit; 4989 } 4990 4991 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4992 dp->restart_reqd = B_TRUE; 4993 4994 /* 4995 * Only ever create rings for outgoing lane. Link it onto 4996 * end of list. 4997 */ 4998 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4999 if (ldcp->lane_out.dringp == NULL) { 5000 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5001 ldcp->lane_out.dringp = dp; 5002 } else { 5003 tp = ldcp->lane_out.dringp; 5004 while (tp->next != NULL) 5005 tp = tp->next; 5006 5007 tp->next = dp; 5008 } 5009 RW_EXIT(&ldcp->lane_out.dlistrw); 5010 5011 return (dp); 5012 5013 dring_fail_exit: 5014 (void) ldc_mem_dring_destroy(dp->handle); 5015 5016 create_fail_exit: 5017 if (dp->priv_addr != NULL) { 5018 priv_addr = dp->priv_addr; 5019 for (i = 0; i < vsw_ntxds; i++) { 5020 if (priv_addr->memhandle != NULL) 5021 (void) ldc_mem_free_handle( 5022 priv_addr->memhandle); 5023 priv_addr++; 5024 } 5025 kmem_free(dp->priv_addr, 5026 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5027 } 5028 mutex_destroy(&dp->dlock); 5029 5030 kmem_free(dp, sizeof (dring_info_t)); 5031 return (NULL); 5032 } 5033 5034 /* 5035 * Create a ring consisting of just a private portion and link 5036 * it into the list of rings for the outbound lane. 5037 * 5038 * These type of rings are used primarily for temporary data 5039 * storage (i.e. as data buffers). 5040 */ 5041 void 5042 vsw_create_privring(vsw_ldc_t *ldcp) 5043 { 5044 dring_info_t *dp, *tp; 5045 vsw_t *vswp = ldcp->ldc_vswp; 5046 5047 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5048 5049 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5050 5051 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5052 5053 /* no public section */ 5054 dp->pub_addr = NULL; 5055 5056 dp->priv_addr = kmem_zalloc( 5057 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5058 5059 dp->num_descriptors = vsw_ntxds; 5060 5061 if (vsw_setup_ring(ldcp, dp)) { 5062 DERR(vswp, "%s: setup of ring failed", __func__); 5063 kmem_free(dp->priv_addr, 5064 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5065 mutex_destroy(&dp->dlock); 5066 kmem_free(dp, sizeof (dring_info_t)); 5067 return; 5068 } 5069 5070 /* haven't used any descriptors yet */ 5071 dp->end_idx = 0; 5072 5073 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5074 dp->restart_reqd = B_TRUE; 5075 5076 /* 5077 * Only ever create rings for outgoing lane. Link it onto 5078 * end of list. 5079 */ 5080 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5081 if (ldcp->lane_out.dringp == NULL) { 5082 D2(vswp, "%s: adding first outbound privring", __func__); 5083 ldcp->lane_out.dringp = dp; 5084 } else { 5085 tp = ldcp->lane_out.dringp; 5086 while (tp->next != NULL) 5087 tp = tp->next; 5088 5089 tp->next = dp; 5090 } 5091 RW_EXIT(&ldcp->lane_out.dlistrw); 5092 5093 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5094 } 5095 5096 /* 5097 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5098 * failure. 5099 */ 5100 int 5101 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5102 { 5103 vnet_public_desc_t *pub_addr = NULL; 5104 vsw_private_desc_t *priv_addr = NULL; 5105 vsw_t *vswp = ldcp->ldc_vswp; 5106 uint64_t *tmpp; 5107 uint64_t offset = 0; 5108 uint32_t ncookies = 0; 5109 static char *name = "vsw_setup_ring"; 5110 int i, j, nc, rv; 5111 size_t data_sz; 5112 5113 priv_addr = dp->priv_addr; 5114 pub_addr = dp->pub_addr; 5115 5116 /* public section may be null but private should never be */ 5117 ASSERT(priv_addr != NULL); 5118 5119 /* 5120 * Allocate the region of memory which will be used to hold 5121 * the data the descriptors will refer to. 5122 */ 5123 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5124 data_sz = VNET_ROUNDUP_2K(data_sz); 5125 dp->desc_data_sz = data_sz; 5126 dp->data_sz = vsw_ntxds * data_sz; 5127 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5128 5129 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5130 dp->data_sz, dp->data_addr); 5131 5132 tmpp = (uint64_t *)dp->data_addr; 5133 offset = dp->desc_data_sz/sizeof (tmpp); 5134 5135 /* 5136 * Initialise some of the private and public (if they exist) 5137 * descriptor fields. 5138 */ 5139 for (i = 0; i < vsw_ntxds; i++) { 5140 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5141 5142 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5143 &priv_addr->memhandle)) != 0) { 5144 DERR(vswp, "%s: alloc mem handle failed", name); 5145 goto setup_ring_cleanup; 5146 } 5147 5148 priv_addr->datap = (void *)tmpp; 5149 5150 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5151 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5152 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5153 &(priv_addr->memcookie[0]), &ncookies); 5154 if (rv != 0) { 5155 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5156 "(rv %d)", name, ldcp->ldc_id, rv); 5157 goto setup_ring_cleanup; 5158 } 5159 priv_addr->bound = 1; 5160 5161 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5162 name, i, priv_addr->memcookie[0].addr, 5163 priv_addr->memcookie[0].size); 5164 5165 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5166 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5167 "invalid num of cookies (%d) for size 0x%llx", 5168 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5169 5170 goto setup_ring_cleanup; 5171 } else { 5172 for (j = 1; j < ncookies; j++) { 5173 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5174 &(priv_addr->memcookie[j])); 5175 if (rv != 0) { 5176 DERR(vswp, "%s: ldc_mem_nextcookie " 5177 "failed rv (%d)", name, rv); 5178 goto setup_ring_cleanup; 5179 } 5180 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5181 "size 0x%llx", name, j, 5182 priv_addr->memcookie[j].addr, 5183 priv_addr->memcookie[j].size); 5184 } 5185 5186 } 5187 priv_addr->ncookies = ncookies; 5188 priv_addr->dstate = VIO_DESC_FREE; 5189 5190 if (pub_addr != NULL) { 5191 5192 /* link pub and private sides */ 5193 priv_addr->descp = pub_addr; 5194 5195 pub_addr->ncookies = priv_addr->ncookies; 5196 5197 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5198 bcopy(&priv_addr->memcookie[nc], 5199 &pub_addr->memcookie[nc], 5200 sizeof (ldc_mem_cookie_t)); 5201 } 5202 5203 pub_addr->hdr.dstate = VIO_DESC_FREE; 5204 pub_addr++; 5205 } 5206 5207 /* 5208 * move to next element in the dring and the next 5209 * position in the data buffer. 5210 */ 5211 priv_addr++; 5212 tmpp += offset; 5213 } 5214 5215 return (0); 5216 5217 setup_ring_cleanup: 5218 priv_addr = dp->priv_addr; 5219 5220 for (j = 0; j < i; j++) { 5221 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5222 (void) ldc_mem_free_handle(priv_addr->memhandle); 5223 5224 mutex_destroy(&priv_addr->dstate_lock); 5225 5226 priv_addr++; 5227 } 5228 kmem_free(dp->data_addr, dp->data_sz); 5229 5230 return (1); 5231 } 5232 5233 /* 5234 * Searches the private section of a ring for a free descriptor, 5235 * starting at the location of the last free descriptor found 5236 * previously. 5237 * 5238 * Returns 0 if free descriptor is available, and updates state 5239 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5240 * 5241 * FUTURE: might need to return contiguous range of descriptors 5242 * as dring info msg assumes all will be contiguous. 5243 */ 5244 static int 5245 vsw_dring_find_free_desc(dring_info_t *dringp, 5246 vsw_private_desc_t **priv_p, int *idx) 5247 { 5248 vsw_private_desc_t *addr = NULL; 5249 int num = vsw_ntxds; 5250 int ret = 1; 5251 5252 D1(NULL, "%s enter\n", __func__); 5253 5254 ASSERT(dringp->priv_addr != NULL); 5255 5256 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5257 __func__, dringp, dringp->end_idx); 5258 5259 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5260 5261 mutex_enter(&addr->dstate_lock); 5262 if (addr->dstate == VIO_DESC_FREE) { 5263 addr->dstate = VIO_DESC_READY; 5264 *priv_p = addr; 5265 *idx = dringp->end_idx; 5266 dringp->end_idx = (dringp->end_idx + 1) % num; 5267 ret = 0; 5268 5269 } 5270 mutex_exit(&addr->dstate_lock); 5271 5272 /* ring full */ 5273 if (ret == 1) { 5274 D2(NULL, "%s: no desp free: started at %d", __func__, 5275 dringp->end_idx); 5276 } 5277 5278 D1(NULL, "%s: exit\n", __func__); 5279 5280 return (ret); 5281 } 5282 5283 /* 5284 * Map from a dring identifier to the ring itself. Returns 5285 * pointer to ring or NULL if no match found. 5286 * 5287 * Should be called with dlistrw rwlock held as reader. 5288 */ 5289 static dring_info_t * 5290 vsw_ident2dring(lane_t *lane, uint64_t ident) 5291 { 5292 dring_info_t *dp = NULL; 5293 5294 if ((dp = lane->dringp) == NULL) { 5295 return (NULL); 5296 } else { 5297 if (dp->ident == ident) 5298 return (dp); 5299 5300 while (dp != NULL) { 5301 if (dp->ident == ident) 5302 break; 5303 dp = dp->next; 5304 } 5305 } 5306 5307 return (dp); 5308 } 5309 5310 /* 5311 * Set the default lane attributes. These are copied into 5312 * the attr msg we send to our peer. If they are not acceptable 5313 * then (currently) the handshake ends. 5314 */ 5315 static void 5316 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5317 { 5318 bzero(lp, sizeof (lane_t)); 5319 5320 READ_ENTER(&vswp->if_lockrw); 5321 ether_copy(&(vswp->if_addr), &(lp->addr)); 5322 RW_EXIT(&vswp->if_lockrw); 5323 5324 lp->mtu = vswp->max_frame_size; 5325 lp->addr_type = ADDR_TYPE_MAC; 5326 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5327 lp->ack_freq = 0; /* for shared mode */ 5328 lp->seq_num = VNET_ISS; 5329 } 5330 5331 /* 5332 * Verify that the attributes are acceptable. 5333 * 5334 * FUTURE: If some attributes are not acceptable, change them 5335 * our desired values. 5336 */ 5337 static int 5338 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5339 { 5340 int ret = 0; 5341 struct ether_addr ea; 5342 vsw_port_t *port = ldcp->ldc_port; 5343 lane_t *lp = &ldcp->lane_out; 5344 5345 D1(NULL, "vsw_check_attr enter\n"); 5346 5347 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5348 (pkt->xfer_mode != lp->xfer_mode)) { 5349 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5350 ret = 1; 5351 } 5352 5353 /* Only support MAC addresses at moment. */ 5354 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5355 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5356 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5357 ret = 1; 5358 } 5359 5360 /* 5361 * MAC address supplied by device should match that stored 5362 * in the vsw-port OBP node. Need to decide what to do if they 5363 * don't match, for the moment just warn but don't fail. 5364 */ 5365 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5366 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5367 DERR(NULL, "vsw_check_attr: device supplied address " 5368 "0x%llx doesn't match node address 0x%llx\n", 5369 pkt->addr, port->p_macaddr); 5370 } 5371 5372 /* 5373 * Ack freq only makes sense in pkt mode, in shared 5374 * mode the ring descriptors say whether or not to 5375 * send back an ACK. 5376 */ 5377 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5378 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5379 (VSW_VER_LT(ldcp, 1, 2) && 5380 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5381 if (pkt->ack_freq > 0) { 5382 D2(NULL, "vsw_check_attr: non zero ack freq " 5383 " in SHM mode\n"); 5384 ret = 1; 5385 } 5386 } 5387 5388 /* 5389 * Note: for the moment we only support ETHER 5390 * frames. This may change in the future. 5391 */ 5392 if ((pkt->mtu > lp->mtu) || (pkt->mtu <= 0)) { 5393 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5394 pkt->mtu); 5395 ret = 1; 5396 } 5397 5398 D1(NULL, "vsw_check_attr exit\n"); 5399 5400 return (ret); 5401 } 5402 5403 /* 5404 * Returns 1 if there is a problem, 0 otherwise. 5405 */ 5406 static int 5407 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5408 { 5409 _NOTE(ARGUNUSED(pkt)) 5410 5411 int ret = 0; 5412 5413 D1(NULL, "vsw_check_dring_info enter\n"); 5414 5415 if ((pkt->num_descriptors == 0) || 5416 (pkt->descriptor_size == 0) || 5417 (pkt->ncookies != 1)) { 5418 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5419 ret = 1; 5420 } 5421 5422 D1(NULL, "vsw_check_dring_info exit\n"); 5423 5424 return (ret); 5425 } 5426 5427 /* 5428 * Returns 1 if two memory cookies match. Otherwise returns 0. 5429 */ 5430 static int 5431 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5432 { 5433 if ((m1->addr != m2->addr) || 5434 (m2->size != m2->size)) { 5435 return (0); 5436 } else { 5437 return (1); 5438 } 5439 } 5440 5441 /* 5442 * Returns 1 if ring described in reg message matches that 5443 * described by dring_info structure. Otherwise returns 0. 5444 */ 5445 static int 5446 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5447 { 5448 if ((msg->descriptor_size != dp->descriptor_size) || 5449 (msg->num_descriptors != dp->num_descriptors) || 5450 (msg->ncookies != dp->ncookies) || 5451 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5452 return (0); 5453 } else { 5454 return (1); 5455 } 5456 5457 } 5458 5459 static caddr_t 5460 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5461 { 5462 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5463 a[0], a[1], a[2], a[3], a[4], a[5]); 5464 return (ebuf); 5465 } 5466 5467 /* 5468 * Reset and free all the resources associated with 5469 * the channel. 5470 */ 5471 static void 5472 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5473 { 5474 dring_info_t *dp, *dpp; 5475 lane_t *lp = NULL; 5476 5477 ASSERT(ldcp != NULL); 5478 5479 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5480 5481 if (dir == INBOUND) { 5482 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5483 " of channel %lld", __func__, ldcp->ldc_id); 5484 lp = &ldcp->lane_in; 5485 } else { 5486 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5487 " of channel %lld", __func__, ldcp->ldc_id); 5488 lp = &ldcp->lane_out; 5489 } 5490 5491 lp->lstate = VSW_LANE_INACTIV; 5492 lp->seq_num = VNET_ISS; 5493 5494 if (lp->dringp) { 5495 if (dir == INBOUND) { 5496 WRITE_ENTER(&lp->dlistrw); 5497 dp = lp->dringp; 5498 while (dp != NULL) { 5499 dpp = dp->next; 5500 if (dp->handle != NULL) 5501 (void) ldc_mem_dring_unmap(dp->handle); 5502 kmem_free(dp, sizeof (dring_info_t)); 5503 dp = dpp; 5504 } 5505 RW_EXIT(&lp->dlistrw); 5506 } else { 5507 /* 5508 * unbind, destroy exported dring, free dring struct 5509 */ 5510 WRITE_ENTER(&lp->dlistrw); 5511 dp = lp->dringp; 5512 vsw_free_ring(dp); 5513 RW_EXIT(&lp->dlistrw); 5514 } 5515 lp->dringp = NULL; 5516 } 5517 5518 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5519 } 5520 5521 /* 5522 * Free ring and all associated resources. 5523 * 5524 * Should be called with dlistrw rwlock held as writer. 5525 */ 5526 static void 5527 vsw_free_ring(dring_info_t *dp) 5528 { 5529 vsw_private_desc_t *paddr = NULL; 5530 dring_info_t *dpp; 5531 int i; 5532 5533 while (dp != NULL) { 5534 mutex_enter(&dp->dlock); 5535 dpp = dp->next; 5536 if (dp->priv_addr != NULL) { 5537 /* 5538 * First unbind and free the memory handles 5539 * stored in each descriptor within the ring. 5540 */ 5541 for (i = 0; i < vsw_ntxds; i++) { 5542 paddr = (vsw_private_desc_t *) 5543 dp->priv_addr + i; 5544 if (paddr->memhandle != NULL) { 5545 if (paddr->bound == 1) { 5546 if (ldc_mem_unbind_handle( 5547 paddr->memhandle) != 0) { 5548 DERR(NULL, "error " 5549 "unbinding handle for " 5550 "ring 0x%llx at pos %d", 5551 dp, i); 5552 continue; 5553 } 5554 paddr->bound = 0; 5555 } 5556 5557 if (ldc_mem_free_handle( 5558 paddr->memhandle) != 0) { 5559 DERR(NULL, "error freeing " 5560 "handle for ring 0x%llx " 5561 "at pos %d", dp, i); 5562 continue; 5563 } 5564 paddr->memhandle = NULL; 5565 } 5566 mutex_destroy(&paddr->dstate_lock); 5567 } 5568 kmem_free(dp->priv_addr, 5569 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5570 } 5571 5572 /* 5573 * Now unbind and destroy the ring itself. 5574 */ 5575 if (dp->handle != NULL) { 5576 (void) ldc_mem_dring_unbind(dp->handle); 5577 (void) ldc_mem_dring_destroy(dp->handle); 5578 } 5579 5580 if (dp->data_addr != NULL) { 5581 kmem_free(dp->data_addr, dp->data_sz); 5582 } 5583 5584 mutex_exit(&dp->dlock); 5585 mutex_destroy(&dp->dlock); 5586 mutex_destroy(&dp->restart_lock); 5587 kmem_free(dp, sizeof (dring_info_t)); 5588 5589 dp = dpp; 5590 } 5591 } 5592 5593 /* 5594 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5595 * This thread is woken up by the LDC interrupt handler to process 5596 * LDC packets and receive data. 5597 */ 5598 static void 5599 vsw_ldc_rx_worker(void *arg) 5600 { 5601 callb_cpr_t cprinfo; 5602 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5603 vsw_t *vswp = ldcp->ldc_vswp; 5604 5605 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5606 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5607 "vsw_rx_thread"); 5608 mutex_enter(&ldcp->rx_thr_lock); 5609 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5610 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5611 5612 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5613 /* 5614 * Wait until the data is received or a stop 5615 * request is received. 5616 */ 5617 while (!(ldcp->rx_thr_flags & 5618 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5619 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5620 } 5621 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5622 5623 /* 5624 * First process the stop request. 5625 */ 5626 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5627 D2(vswp, "%s(%lld):Rx thread stopped\n", 5628 __func__, ldcp->ldc_id); 5629 break; 5630 } 5631 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5632 mutex_exit(&ldcp->rx_thr_lock); 5633 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5634 __func__, ldcp->ldc_id); 5635 mutex_enter(&ldcp->ldc_cblock); 5636 vsw_process_pkt(ldcp); 5637 mutex_exit(&ldcp->ldc_cblock); 5638 mutex_enter(&ldcp->rx_thr_lock); 5639 } 5640 5641 /* 5642 * Update the run status and wakeup the thread that 5643 * has sent the stop request. 5644 */ 5645 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5646 cv_signal(&ldcp->rx_thr_cv); 5647 CALLB_CPR_EXIT(&cprinfo); 5648 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5649 thread_exit(); 5650 } 5651 5652 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5653 static void 5654 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5655 { 5656 vsw_t *vswp = ldcp->ldc_vswp; 5657 5658 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5659 /* 5660 * Send a stop request by setting the stop flag and 5661 * wait until the receive thread stops. 5662 */ 5663 mutex_enter(&ldcp->rx_thr_lock); 5664 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5665 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5666 cv_signal(&ldcp->rx_thr_cv); 5667 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5668 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5669 } 5670 } 5671 mutex_exit(&ldcp->rx_thr_lock); 5672 ldcp->rx_thread = NULL; 5673 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5674 } 5675 5676 /* 5677 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5678 * This thread is woken up by the vsw_portsend to transmit 5679 * packets. 5680 */ 5681 static void 5682 vsw_ldc_tx_worker(void *arg) 5683 { 5684 callb_cpr_t cprinfo; 5685 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5686 vsw_t *vswp = ldcp->ldc_vswp; 5687 mblk_t *mp; 5688 mblk_t *tmp; 5689 5690 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5691 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5692 "vnet_tx_thread"); 5693 mutex_enter(&ldcp->tx_thr_lock); 5694 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5695 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5696 5697 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5698 /* 5699 * Wait until the data is received or a stop 5700 * request is received. 5701 */ 5702 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5703 (ldcp->tx_mhead == NULL)) { 5704 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5705 } 5706 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5707 5708 /* 5709 * First process the stop request. 5710 */ 5711 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5712 D2(vswp, "%s(%lld):tx thread stopped\n", 5713 __func__, ldcp->ldc_id); 5714 break; 5715 } 5716 mp = ldcp->tx_mhead; 5717 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5718 ldcp->tx_cnt = 0; 5719 mutex_exit(&ldcp->tx_thr_lock); 5720 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5721 __func__, ldcp->ldc_id); 5722 while (mp != NULL) { 5723 tmp = mp->b_next; 5724 mp->b_next = mp->b_prev = NULL; 5725 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5726 mp = tmp; 5727 } 5728 mutex_enter(&ldcp->tx_thr_lock); 5729 } 5730 5731 /* 5732 * Update the run status and wakeup the thread that 5733 * has sent the stop request. 5734 */ 5735 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5736 cv_signal(&ldcp->tx_thr_cv); 5737 CALLB_CPR_EXIT(&cprinfo); 5738 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5739 thread_exit(); 5740 } 5741 5742 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5743 static void 5744 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5745 { 5746 vsw_t *vswp = ldcp->ldc_vswp; 5747 5748 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5749 /* 5750 * Send a stop request by setting the stop flag and 5751 * wait until the receive thread stops. 5752 */ 5753 mutex_enter(&ldcp->tx_thr_lock); 5754 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5755 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5756 cv_signal(&ldcp->tx_thr_cv); 5757 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5758 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5759 } 5760 } 5761 mutex_exit(&ldcp->tx_thr_lock); 5762 ldcp->tx_thread = NULL; 5763 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5764 } 5765 5766 /* vsw_reclaim_dring -- reclaim descriptors */ 5767 static int 5768 vsw_reclaim_dring(dring_info_t *dp, int start) 5769 { 5770 int i, j, len; 5771 vsw_private_desc_t *priv_addr; 5772 vnet_public_desc_t *pub_addr; 5773 5774 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5775 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5776 len = dp->num_descriptors; 5777 5778 D2(NULL, "%s: start index %ld\n", __func__, start); 5779 5780 j = 0; 5781 for (i = start; j < len; i = (i + 1) % len, j++) { 5782 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5783 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5784 5785 mutex_enter(&priv_addr->dstate_lock); 5786 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5787 mutex_exit(&priv_addr->dstate_lock); 5788 break; 5789 } 5790 pub_addr->hdr.dstate = VIO_DESC_FREE; 5791 priv_addr->dstate = VIO_DESC_FREE; 5792 /* clear all the fields */ 5793 priv_addr->datalen = 0; 5794 pub_addr->hdr.ack = 0; 5795 mutex_exit(&priv_addr->dstate_lock); 5796 5797 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5798 i, pub_addr->hdr.dstate, priv_addr->dstate); 5799 } 5800 return (j); 5801 } 5802 5803 /* 5804 * Debugging routines 5805 */ 5806 static void 5807 display_state(void) 5808 { 5809 vsw_t *vswp; 5810 vsw_port_list_t *plist; 5811 vsw_port_t *port; 5812 vsw_ldc_list_t *ldcl; 5813 vsw_ldc_t *ldcp; 5814 extern vsw_t *vsw_head; 5815 5816 cmn_err(CE_NOTE, "***** system state *****"); 5817 5818 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5819 plist = &vswp->plist; 5820 READ_ENTER(&plist->lockrw); 5821 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5822 vswp->instance, plist->num_ports); 5823 5824 for (port = plist->head; port != NULL; port = port->p_next) { 5825 ldcl = &port->p_ldclist; 5826 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5827 port->p_instance, port->num_ldcs); 5828 READ_ENTER(&ldcl->lockrw); 5829 ldcp = ldcl->head; 5830 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5831 cmn_err(CE_CONT, "chan %lu : dev %d : " 5832 "status %d : phase %u\n", 5833 ldcp->ldc_id, ldcp->dev_class, 5834 ldcp->ldc_status, ldcp->hphase); 5835 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5836 "psession %lu\n", ldcp->ldc_id, 5837 ldcp->local_session, ldcp->peer_session); 5838 5839 cmn_err(CE_CONT, "Inbound lane:\n"); 5840 display_lane(&ldcp->lane_in); 5841 cmn_err(CE_CONT, "Outbound lane:\n"); 5842 display_lane(&ldcp->lane_out); 5843 } 5844 RW_EXIT(&ldcl->lockrw); 5845 } 5846 RW_EXIT(&plist->lockrw); 5847 } 5848 cmn_err(CE_NOTE, "***** system state *****"); 5849 } 5850 5851 static void 5852 display_lane(lane_t *lp) 5853 { 5854 dring_info_t *drp; 5855 5856 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5857 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5858 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5859 lp->addr_type, lp->addr, lp->xfer_mode); 5860 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5861 5862 cmn_err(CE_CONT, "Dring info:\n"); 5863 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5864 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5865 drp->num_descriptors, drp->descriptor_size); 5866 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5867 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5868 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5869 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5870 drp->ident, drp->end_idx); 5871 display_ring(drp); 5872 } 5873 } 5874 5875 static void 5876 display_ring(dring_info_t *dringp) 5877 { 5878 uint64_t i; 5879 uint64_t priv_count = 0; 5880 uint64_t pub_count = 0; 5881 vnet_public_desc_t *pub_addr = NULL; 5882 vsw_private_desc_t *priv_addr = NULL; 5883 5884 for (i = 0; i < vsw_ntxds; i++) { 5885 if (dringp->pub_addr != NULL) { 5886 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5887 5888 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5889 pub_count++; 5890 } 5891 5892 if (dringp->priv_addr != NULL) { 5893 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5894 5895 if (priv_addr->dstate == VIO_DESC_FREE) 5896 priv_count++; 5897 } 5898 } 5899 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5900 i, priv_count, pub_count); 5901 } 5902 5903 static void 5904 dump_flags(uint64_t state) 5905 { 5906 int i; 5907 5908 typedef struct flag_name { 5909 int flag_val; 5910 char *flag_name; 5911 } flag_name_t; 5912 5913 flag_name_t flags[] = { 5914 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5915 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5916 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5917 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5918 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5919 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5920 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5921 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5922 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5923 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5924 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5925 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5926 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5927 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5928 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5929 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5930 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5931 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5932 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5933 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5934 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5935 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5936 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5937 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5938 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5939 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5940 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5941 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5942 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5943 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5944 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5945 5946 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5947 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5948 if (state & flags[i].flag_val) 5949 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5950 } 5951 } 5952