1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* Port add/deletion/etc routines */ 78 static int vsw_port_delete(vsw_port_t *port); 79 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 81 static int vsw_init_ldcs(vsw_port_t *port); 82 static int vsw_uninit_ldcs(vsw_port_t *port); 83 static int vsw_ldc_init(vsw_ldc_t *ldcp); 84 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 85 static int vsw_drain_ldcs(vsw_port_t *port); 86 static int vsw_drain_port_taskq(vsw_port_t *port); 87 static void vsw_marker_task(void *); 88 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 89 int vsw_detach_ports(vsw_t *vswp); 90 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 91 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 92 int vsw_port_detach(vsw_t *vswp, int p_instance); 93 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 94 int vsw_port_attach(vsw_port_t *portp); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 97 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 98 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 99 100 /* Interrupt routines */ 101 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 102 103 /* Handshake routines */ 104 static void vsw_ldc_reinit(vsw_ldc_t *); 105 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 106 static void vsw_conn_task(void *); 107 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 108 static void vsw_next_milestone(vsw_ldc_t *); 109 static int vsw_supported_version(vio_ver_msg_t *); 110 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 111 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 112 113 /* Data processing routines */ 114 static void vsw_process_pkt(void *); 115 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 116 static void vsw_process_ctrl_pkt(void *); 117 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 124 uint32_t); 125 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 127 static void vsw_process_pkt_data(void *, void *, uint32_t); 128 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 129 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 130 131 /* Switching/data transmit routines */ 132 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 133 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 134 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 135 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 136 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 138 139 /* Packet creation routines */ 140 static void vsw_send_ver(void *); 141 static void vsw_send_attr(vsw_ldc_t *); 142 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 143 static void vsw_send_dring_info(vsw_ldc_t *); 144 static void vsw_send_rdx(vsw_ldc_t *); 145 146 /* Dring routines */ 147 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 148 static void vsw_create_privring(vsw_ldc_t *); 149 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 150 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 151 int *); 152 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 153 static int vsw_reclaim_dring(dring_info_t *dp, int start); 154 155 static void vsw_set_lane_attr(vsw_t *, lane_t *); 156 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 157 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 158 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 159 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 160 161 /* Rcv/Tx thread routines */ 162 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 163 static void vsw_ldc_tx_worker(void *arg); 164 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 165 static void vsw_ldc_rx_worker(void *arg); 166 167 /* Misc support routines */ 168 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 169 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 170 static int vsw_free_ring(dring_info_t *); 171 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 172 static int vsw_get_same_dest_list(struct ether_header *ehp, 173 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 174 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 175 176 /* Debugging routines */ 177 static void dump_flags(uint64_t); 178 static void display_state(void); 179 static void display_lane(lane_t *); 180 static void display_ring(dring_info_t *); 181 182 /* 183 * Functions imported from other files. 184 */ 185 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 186 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 187 extern void vsw_reconfig_hw(vsw_t *); 188 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 189 extern void vsw_del_mcst_port(vsw_port_t *port); 190 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 191 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 193 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 194 extern void vsw_create_vlans(void *arg, int type); 195 extern void vsw_destroy_vlans(void *arg, int type); 196 extern void vsw_vlan_add_ids(void *arg, int type); 197 extern void vsw_vlan_remove_ids(void *arg, int type); 198 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 199 struct ether_header *ehp, uint16_t *vidp); 200 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 201 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 202 mblk_t **npt); 203 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 204 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 205 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 207 extern void vsw_hio_stop_port(vsw_port_t *portp); 208 extern void vsw_publish_macaddr(vsw_t *vswp, uint8_t *addr); 209 210 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 211 212 /* 213 * Tunables used in this file. 214 */ 215 extern int vsw_num_handshakes; 216 extern int vsw_wretries; 217 extern int vsw_desc_delay; 218 extern int vsw_read_attempts; 219 extern int vsw_ldc_tx_delay; 220 extern int vsw_ldc_tx_retries; 221 extern boolean_t vsw_ldc_rxthr_enabled; 222 extern boolean_t vsw_ldc_txthr_enabled; 223 extern uint32_t vsw_ntxds; 224 extern uint32_t vsw_max_tx_qcount; 225 extern uint32_t vsw_chain_len; 226 extern uint32_t vsw_mblk_size1; 227 extern uint32_t vsw_mblk_size2; 228 extern uint32_t vsw_mblk_size3; 229 extern uint32_t vsw_num_mblks1; 230 extern uint32_t vsw_num_mblks2; 231 extern uint32_t vsw_num_mblks3; 232 extern boolean_t vsw_obp_ver_proto_workaround; 233 extern uint32_t vsw_publish_macaddr_count; 234 235 #define LDC_ENTER_LOCK(ldcp) \ 236 mutex_enter(&((ldcp)->ldc_cblock));\ 237 mutex_enter(&((ldcp)->ldc_rxlock));\ 238 mutex_enter(&((ldcp)->ldc_txlock)); 239 #define LDC_EXIT_LOCK(ldcp) \ 240 mutex_exit(&((ldcp)->ldc_txlock));\ 241 mutex_exit(&((ldcp)->ldc_rxlock));\ 242 mutex_exit(&((ldcp)->ldc_cblock)); 243 244 #define VSW_VER_EQ(ldcp, major, minor) \ 245 ((ldcp)->lane_out.ver_major == (major) && \ 246 (ldcp)->lane_out.ver_minor == (minor)) 247 248 #define VSW_VER_LT(ldcp, major, minor) \ 249 (((ldcp)->lane_out.ver_major < (major)) || \ 250 ((ldcp)->lane_out.ver_major == (major) && \ 251 (ldcp)->lane_out.ver_minor < (minor))) 252 253 #define VSW_VER_GTEQ(ldcp, major, minor) \ 254 (((ldcp)->lane_out.ver_major > (major)) || \ 255 ((ldcp)->lane_out.ver_major == (major) && \ 256 (ldcp)->lane_out.ver_minor >= (minor))) 257 258 /* supported versions */ 259 static ver_sup_t vsw_versions[] = { {1, 3} }; 260 261 /* 262 * For the moment the state dump routines have their own 263 * private flag. 264 */ 265 #define DUMP_STATE 0 266 267 #if DUMP_STATE 268 269 #define DUMP_TAG(tag) \ 270 { \ 271 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 272 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 273 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 274 } 275 276 #define DUMP_TAG_PTR(tag) \ 277 { \ 278 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 279 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 280 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 281 } 282 283 #define DUMP_FLAGS(flags) dump_flags(flags); 284 #define DISPLAY_STATE() display_state() 285 286 #else 287 288 #define DUMP_TAG(tag) 289 #define DUMP_TAG_PTR(tag) 290 #define DUMP_FLAGS(state) 291 #define DISPLAY_STATE() 292 293 #endif /* DUMP_STATE */ 294 295 /* 296 * Attach the specified port. 297 * 298 * Returns 0 on success, 1 on failure. 299 */ 300 int 301 vsw_port_attach(vsw_port_t *port) 302 { 303 vsw_t *vswp = port->p_vswp; 304 vsw_port_list_t *plist = &vswp->plist; 305 vsw_port_t *p, **pp; 306 int i; 307 int nids = port->num_ldcs; 308 uint64_t *ldcids; 309 310 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 311 312 /* port already exists? */ 313 READ_ENTER(&plist->lockrw); 314 for (p = plist->head; p != NULL; p = p->p_next) { 315 if (p->p_instance == port->p_instance) { 316 DWARN(vswp, "%s: port instance %d already attached", 317 __func__, p->p_instance); 318 RW_EXIT(&plist->lockrw); 319 return (1); 320 } 321 } 322 RW_EXIT(&plist->lockrw); 323 324 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 325 326 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 327 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 328 329 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 330 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 331 port->state = VSW_PORT_INIT; 332 333 D2(vswp, "%s: %d nids", __func__, nids); 334 ldcids = port->ldc_ids; 335 for (i = 0; i < nids; i++) { 336 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 337 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 338 DERR(vswp, "%s: ldc_attach failed", __func__); 339 340 rw_destroy(&port->p_ldclist.lockrw); 341 342 cv_destroy(&port->state_cv); 343 mutex_destroy(&port->state_lock); 344 345 mutex_destroy(&port->tx_lock); 346 mutex_destroy(&port->mca_lock); 347 kmem_free(port, sizeof (vsw_port_t)); 348 return (1); 349 } 350 } 351 352 if (vswp->switching_setup_done == B_TRUE) { 353 /* 354 * If the underlying physical device has been setup, 355 * program the mac address of this port in it. 356 * Otherwise, port macaddr will be set after the physical 357 * device is successfully setup by the timeout handler. 358 */ 359 mutex_enter(&vswp->hw_lock); 360 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 361 mutex_exit(&vswp->hw_lock); 362 } 363 364 /* create the fdb entry for this port/mac address */ 365 vsw_fdbe_add(vswp, port); 366 367 vsw_create_vlans(port, VSW_VNETPORT); 368 369 WRITE_ENTER(&plist->lockrw); 370 371 /* link it into the list of ports for this vsw instance */ 372 pp = (vsw_port_t **)(&plist->head); 373 port->p_next = *pp; 374 *pp = port; 375 plist->num_ports++; 376 377 RW_EXIT(&plist->lockrw); 378 379 /* 380 * Initialise the port and any ldc's under it. 381 */ 382 (void) vsw_init_ldcs(port); 383 384 /* announce macaddr of vnet to the physical switch */ 385 if (vsw_publish_macaddr_count != 0) { /* enabled */ 386 vsw_publish_macaddr(vswp, (uint8_t *)&(port->p_macaddr)); 387 } 388 389 D1(vswp, "%s: exit", __func__); 390 return (0); 391 } 392 393 /* 394 * Detach the specified port. 395 * 396 * Returns 0 on success, 1 on failure. 397 */ 398 int 399 vsw_port_detach(vsw_t *vswp, int p_instance) 400 { 401 vsw_port_t *port = NULL; 402 vsw_port_list_t *plist = &vswp->plist; 403 404 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 405 406 WRITE_ENTER(&plist->lockrw); 407 408 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 409 RW_EXIT(&plist->lockrw); 410 return (1); 411 } 412 413 if (vsw_plist_del_node(vswp, port)) { 414 RW_EXIT(&plist->lockrw); 415 return (1); 416 } 417 418 /* cleanup any HybridIO for this port */ 419 vsw_hio_stop_port(port); 420 421 /* 422 * No longer need to hold writer lock on port list now 423 * that we have unlinked the target port from the list. 424 */ 425 RW_EXIT(&plist->lockrw); 426 427 /* Remove the fdb entry for this port/mac address */ 428 vsw_fdbe_del(vswp, &(port->p_macaddr)); 429 vsw_destroy_vlans(port, VSW_VNETPORT); 430 431 /* Remove any multicast addresses.. */ 432 vsw_del_mcst_port(port); 433 434 /* Remove address if was programmed into HW. */ 435 mutex_enter(&vswp->hw_lock); 436 437 /* 438 * Port's address may not have been set in hardware. This could 439 * happen if the underlying physical device is not yet available and 440 * vsw_setup_switching_timeout() may be in progress. 441 * We remove its addr from hardware only if it has been set before. 442 */ 443 if (port->addr_set != VSW_ADDR_UNSET) 444 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 445 446 if (vswp->recfg_reqd) 447 vsw_reconfig_hw(vswp); 448 449 mutex_exit(&vswp->hw_lock); 450 451 if (vsw_port_delete(port)) { 452 return (1); 453 } 454 455 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 456 return (0); 457 } 458 459 /* 460 * Detach all active ports. 461 * 462 * Returns 0 on success, 1 on failure. 463 */ 464 int 465 vsw_detach_ports(vsw_t *vswp) 466 { 467 vsw_port_list_t *plist = &vswp->plist; 468 vsw_port_t *port = NULL; 469 470 D1(vswp, "%s: enter", __func__); 471 472 WRITE_ENTER(&plist->lockrw); 473 474 while ((port = plist->head) != NULL) { 475 if (vsw_plist_del_node(vswp, port)) { 476 DERR(vswp, "%s: Error deleting port %d" 477 " from port list", __func__, port->p_instance); 478 RW_EXIT(&plist->lockrw); 479 return (1); 480 } 481 482 /* Remove address if was programmed into HW. */ 483 mutex_enter(&vswp->hw_lock); 484 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 485 mutex_exit(&vswp->hw_lock); 486 487 /* Remove the fdb entry for this port/mac address */ 488 vsw_fdbe_del(vswp, &(port->p_macaddr)); 489 vsw_destroy_vlans(port, VSW_VNETPORT); 490 491 /* Remove any multicast addresses.. */ 492 vsw_del_mcst_port(port); 493 494 /* 495 * No longer need to hold the lock on the port list 496 * now that we have unlinked the target port from the 497 * list. 498 */ 499 RW_EXIT(&plist->lockrw); 500 if (vsw_port_delete(port)) { 501 DERR(vswp, "%s: Error deleting port %d", 502 __func__, port->p_instance); 503 return (1); 504 } 505 WRITE_ENTER(&plist->lockrw); 506 } 507 RW_EXIT(&plist->lockrw); 508 509 D1(vswp, "%s: exit", __func__); 510 511 return (0); 512 } 513 514 /* 515 * Delete the specified port. 516 * 517 * Returns 0 on success, 1 on failure. 518 */ 519 static int 520 vsw_port_delete(vsw_port_t *port) 521 { 522 vsw_ldc_list_t *ldcl; 523 vsw_t *vswp = port->p_vswp; 524 int num_ldcs; 525 526 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 527 528 (void) vsw_uninit_ldcs(port); 529 530 /* 531 * Wait for any pending ctrl msg tasks which reference this 532 * port to finish. 533 */ 534 if (vsw_drain_port_taskq(port)) 535 return (1); 536 537 /* 538 * Wait for any active callbacks to finish 539 */ 540 if (vsw_drain_ldcs(port)) 541 return (1); 542 543 ldcl = &port->p_ldclist; 544 num_ldcs = port->num_ldcs; 545 WRITE_ENTER(&ldcl->lockrw); 546 while (num_ldcs > 0) { 547 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 548 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 549 vswp->instance, ldcl->head->ldc_id); 550 RW_EXIT(&ldcl->lockrw); 551 port->num_ldcs = num_ldcs; 552 return (1); 553 } 554 num_ldcs--; 555 } 556 RW_EXIT(&ldcl->lockrw); 557 558 rw_destroy(&port->p_ldclist.lockrw); 559 560 mutex_destroy(&port->mca_lock); 561 mutex_destroy(&port->tx_lock); 562 563 cv_destroy(&port->state_cv); 564 mutex_destroy(&port->state_lock); 565 566 if (port->num_ldcs != 0) { 567 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 568 port->num_ldcs = 0; 569 } 570 kmem_free(port, sizeof (vsw_port_t)); 571 572 D1(vswp, "%s: exit", __func__); 573 574 return (0); 575 } 576 577 /* 578 * Attach a logical domain channel (ldc) under a specified port. 579 * 580 * Returns 0 on success, 1 on failure. 581 */ 582 static int 583 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 584 { 585 vsw_t *vswp = port->p_vswp; 586 vsw_ldc_list_t *ldcl = &port->p_ldclist; 587 vsw_ldc_t *ldcp = NULL; 588 ldc_attr_t attr; 589 ldc_status_t istatus; 590 int status = DDI_FAILURE; 591 int rv; 592 char kname[MAXNAMELEN]; 593 enum { PROG_init = 0x0, PROG_mblks = 0x1, 594 PROG_callback = 0x2, PROG_rx_thread = 0x4, 595 PROG_tx_thread = 0x8} 596 progress; 597 598 progress = PROG_init; 599 600 D1(vswp, "%s: enter", __func__); 601 602 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 603 if (ldcp == NULL) { 604 DERR(vswp, "%s: kmem_zalloc failed", __func__); 605 return (1); 606 } 607 ldcp->ldc_id = ldc_id; 608 609 /* Allocate pools of receive mblks */ 610 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 611 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 612 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 613 if (rv) { 614 DWARN(vswp, "%s: unable to create free mblk pools for" 615 " channel %ld (rv %d)", __func__, ldc_id, rv); 616 kmem_free(ldcp, sizeof (vsw_ldc_t)); 617 return (1); 618 } 619 620 progress |= PROG_mblks; 621 622 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 623 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 624 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 625 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 626 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 627 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 628 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 629 630 /* required for handshake with peer */ 631 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 632 ldcp->peer_session = 0; 633 ldcp->session_status = 0; 634 ldcp->hss_id = 1; /* Initial handshake session id */ 635 636 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 637 638 /* only set for outbound lane, inbound set by peer */ 639 vsw_set_lane_attr(vswp, &ldcp->lane_out); 640 641 attr.devclass = LDC_DEV_NT_SVC; 642 attr.instance = ddi_get_instance(vswp->dip); 643 attr.mode = LDC_MODE_UNRELIABLE; 644 attr.mtu = VSW_LDC_MTU; 645 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 646 if (status != 0) { 647 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 648 __func__, ldc_id, status); 649 goto ldc_attach_fail; 650 } 651 652 if (vsw_ldc_rxthr_enabled) { 653 ldcp->rx_thr_flags = 0; 654 655 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 656 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 657 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 658 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 659 660 progress |= PROG_rx_thread; 661 if (ldcp->rx_thread == NULL) { 662 DWARN(vswp, "%s(%lld): Failed to create worker thread", 663 __func__, ldc_id); 664 goto ldc_attach_fail; 665 } 666 } 667 668 if (vsw_ldc_txthr_enabled) { 669 ldcp->tx_thr_flags = 0; 670 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 671 672 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 673 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 674 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 675 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 676 677 progress |= PROG_tx_thread; 678 if (ldcp->tx_thread == NULL) { 679 DWARN(vswp, "%s(%lld): Failed to create worker thread", 680 __func__, ldc_id); 681 goto ldc_attach_fail; 682 } 683 } 684 685 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 686 if (status != 0) { 687 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 688 __func__, ldc_id, status); 689 (void) ldc_fini(ldcp->ldc_handle); 690 goto ldc_attach_fail; 691 } 692 /* 693 * allocate a message for ldc_read()s, big enough to hold ctrl and 694 * data msgs, including raw data msgs used to recv priority frames. 695 */ 696 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 697 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 698 699 progress |= PROG_callback; 700 701 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 702 703 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 704 DERR(vswp, "%s: ldc_status failed", __func__); 705 mutex_destroy(&ldcp->status_lock); 706 goto ldc_attach_fail; 707 } 708 709 ldcp->ldc_status = istatus; 710 ldcp->ldc_port = port; 711 ldcp->ldc_vswp = vswp; 712 713 vsw_reset_vnet_proto_ops(ldcp); 714 715 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 716 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 717 kname, &ldcp->ldc_stats); 718 if (ldcp->ksp == NULL) { 719 DERR(vswp, "%s: kstats setup failed", __func__); 720 goto ldc_attach_fail; 721 } 722 723 /* link it into the list of channels for this port */ 724 WRITE_ENTER(&ldcl->lockrw); 725 ldcp->ldc_next = ldcl->head; 726 ldcl->head = ldcp; 727 RW_EXIT(&ldcl->lockrw); 728 729 D1(vswp, "%s: exit", __func__); 730 return (0); 731 732 ldc_attach_fail: 733 734 if (progress & PROG_callback) { 735 (void) ldc_unreg_callback(ldcp->ldc_handle); 736 kmem_free(ldcp->ldcmsg, ldcp->msglen); 737 } 738 739 if (progress & PROG_rx_thread) { 740 if (ldcp->rx_thread != NULL) { 741 vsw_stop_rx_thread(ldcp); 742 } 743 mutex_destroy(&ldcp->rx_thr_lock); 744 cv_destroy(&ldcp->rx_thr_cv); 745 } 746 747 if (progress & PROG_tx_thread) { 748 if (ldcp->tx_thread != NULL) { 749 vsw_stop_tx_thread(ldcp); 750 } 751 mutex_destroy(&ldcp->tx_thr_lock); 752 cv_destroy(&ldcp->tx_thr_cv); 753 } 754 if (ldcp->ksp != NULL) { 755 vgen_destroy_kstats(ldcp->ksp); 756 } 757 mutex_destroy(&ldcp->ldc_txlock); 758 mutex_destroy(&ldcp->ldc_rxlock); 759 mutex_destroy(&ldcp->ldc_cblock); 760 mutex_destroy(&ldcp->drain_cv_lock); 761 762 cv_destroy(&ldcp->drain_cv); 763 764 rw_destroy(&ldcp->lane_in.dlistrw); 765 rw_destroy(&ldcp->lane_out.dlistrw); 766 767 if (progress & PROG_mblks) { 768 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 769 } 770 kmem_free(ldcp, sizeof (vsw_ldc_t)); 771 772 return (1); 773 } 774 775 /* 776 * Detach a logical domain channel (ldc) belonging to a 777 * particular port. 778 * 779 * Returns 0 on success, 1 on failure. 780 */ 781 static int 782 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 783 { 784 vsw_t *vswp = port->p_vswp; 785 vsw_ldc_t *ldcp, *prev_ldcp; 786 vsw_ldc_list_t *ldcl = &port->p_ldclist; 787 int rv; 788 789 prev_ldcp = ldcl->head; 790 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 791 if (ldcp->ldc_id == ldc_id) { 792 break; 793 } 794 } 795 796 /* specified ldc id not found */ 797 if (ldcp == NULL) { 798 DERR(vswp, "%s: ldcp = NULL", __func__); 799 return (1); 800 } 801 802 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 803 804 /* Stop the receive thread */ 805 if (ldcp->rx_thread != NULL) { 806 vsw_stop_rx_thread(ldcp); 807 mutex_destroy(&ldcp->rx_thr_lock); 808 cv_destroy(&ldcp->rx_thr_cv); 809 } 810 kmem_free(ldcp->ldcmsg, ldcp->msglen); 811 812 /* Stop the tx thread */ 813 if (ldcp->tx_thread != NULL) { 814 vsw_stop_tx_thread(ldcp); 815 mutex_destroy(&ldcp->tx_thr_lock); 816 cv_destroy(&ldcp->tx_thr_cv); 817 if (ldcp->tx_mhead != NULL) { 818 freemsgchain(ldcp->tx_mhead); 819 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 820 ldcp->tx_cnt = 0; 821 } 822 } 823 824 /* Destory kstats */ 825 vgen_destroy_kstats(ldcp->ksp); 826 827 /* 828 * Before we can close the channel we must release any mapped 829 * resources (e.g. drings). 830 */ 831 vsw_free_lane_resources(ldcp, INBOUND); 832 vsw_free_lane_resources(ldcp, OUTBOUND); 833 834 /* 835 * If the close fails we are in serious trouble, as won't 836 * be able to delete the parent port. 837 */ 838 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 839 DERR(vswp, "%s: error %d closing channel %lld", 840 __func__, rv, ldcp->ldc_id); 841 return (1); 842 } 843 844 (void) ldc_fini(ldcp->ldc_handle); 845 846 ldcp->ldc_status = LDC_INIT; 847 ldcp->ldc_handle = NULL; 848 ldcp->ldc_vswp = NULL; 849 850 851 /* 852 * Most likely some mblks are still in use and 853 * have not been returned to the pool. These mblks are 854 * added to the pool that is maintained in the device instance. 855 * Another attempt will be made to destroy the pool 856 * when the device detaches. 857 */ 858 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 859 860 /* unlink it from the list */ 861 prev_ldcp = ldcp->ldc_next; 862 863 mutex_destroy(&ldcp->ldc_txlock); 864 mutex_destroy(&ldcp->ldc_rxlock); 865 mutex_destroy(&ldcp->ldc_cblock); 866 cv_destroy(&ldcp->drain_cv); 867 mutex_destroy(&ldcp->drain_cv_lock); 868 mutex_destroy(&ldcp->status_lock); 869 rw_destroy(&ldcp->lane_in.dlistrw); 870 rw_destroy(&ldcp->lane_out.dlistrw); 871 872 kmem_free(ldcp, sizeof (vsw_ldc_t)); 873 874 return (0); 875 } 876 877 /* 878 * Open and attempt to bring up the channel. Note that channel 879 * can only be brought up if peer has also opened channel. 880 * 881 * Returns 0 if can open and bring up channel, otherwise 882 * returns 1. 883 */ 884 static int 885 vsw_ldc_init(vsw_ldc_t *ldcp) 886 { 887 vsw_t *vswp = ldcp->ldc_vswp; 888 ldc_status_t istatus = 0; 889 int rv; 890 891 D1(vswp, "%s: enter", __func__); 892 893 LDC_ENTER_LOCK(ldcp); 894 895 /* don't start at 0 in case clients don't like that */ 896 ldcp->next_ident = 1; 897 898 rv = ldc_open(ldcp->ldc_handle); 899 if (rv != 0) { 900 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 901 __func__, ldcp->ldc_id, rv); 902 LDC_EXIT_LOCK(ldcp); 903 return (1); 904 } 905 906 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 907 DERR(vswp, "%s: unable to get status", __func__); 908 LDC_EXIT_LOCK(ldcp); 909 return (1); 910 911 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 912 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 913 __func__, ldcp->ldc_id, istatus); 914 LDC_EXIT_LOCK(ldcp); 915 return (1); 916 } 917 918 mutex_enter(&ldcp->status_lock); 919 ldcp->ldc_status = istatus; 920 mutex_exit(&ldcp->status_lock); 921 922 rv = ldc_up(ldcp->ldc_handle); 923 if (rv != 0) { 924 /* 925 * Not a fatal error for ldc_up() to fail, as peer 926 * end point may simply not be ready yet. 927 */ 928 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 929 ldcp->ldc_id, rv); 930 LDC_EXIT_LOCK(ldcp); 931 return (1); 932 } 933 934 /* 935 * ldc_up() call is non-blocking so need to explicitly 936 * check channel status to see if in fact the channel 937 * is UP. 938 */ 939 mutex_enter(&ldcp->status_lock); 940 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 941 DERR(vswp, "%s: unable to get status", __func__); 942 mutex_exit(&ldcp->status_lock); 943 LDC_EXIT_LOCK(ldcp); 944 return (1); 945 946 } 947 948 if (ldcp->ldc_status == LDC_UP) { 949 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 950 ldcp->ldc_id, istatus); 951 mutex_exit(&ldcp->status_lock); 952 LDC_EXIT_LOCK(ldcp); 953 954 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 955 return (0); 956 } 957 958 mutex_exit(&ldcp->status_lock); 959 LDC_EXIT_LOCK(ldcp); 960 961 D1(vswp, "%s: exit", __func__); 962 return (0); 963 } 964 965 /* disable callbacks on the channel */ 966 static int 967 vsw_ldc_uninit(vsw_ldc_t *ldcp) 968 { 969 vsw_t *vswp = ldcp->ldc_vswp; 970 int rv; 971 972 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 973 974 LDC_ENTER_LOCK(ldcp); 975 976 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 977 if (rv != 0) { 978 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 979 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 980 LDC_EXIT_LOCK(ldcp); 981 return (1); 982 } 983 984 mutex_enter(&ldcp->status_lock); 985 ldcp->ldc_status = LDC_INIT; 986 mutex_exit(&ldcp->status_lock); 987 988 LDC_EXIT_LOCK(ldcp); 989 990 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 991 992 return (0); 993 } 994 995 static int 996 vsw_init_ldcs(vsw_port_t *port) 997 { 998 vsw_ldc_list_t *ldcl = &port->p_ldclist; 999 vsw_ldc_t *ldcp; 1000 1001 READ_ENTER(&ldcl->lockrw); 1002 ldcp = ldcl->head; 1003 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1004 (void) vsw_ldc_init(ldcp); 1005 } 1006 RW_EXIT(&ldcl->lockrw); 1007 1008 return (0); 1009 } 1010 1011 static int 1012 vsw_uninit_ldcs(vsw_port_t *port) 1013 { 1014 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1015 vsw_ldc_t *ldcp; 1016 1017 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1018 1019 READ_ENTER(&ldcl->lockrw); 1020 ldcp = ldcl->head; 1021 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1022 (void) vsw_ldc_uninit(ldcp); 1023 } 1024 RW_EXIT(&ldcl->lockrw); 1025 1026 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1027 1028 return (0); 1029 } 1030 1031 /* 1032 * Wait until the callback(s) associated with the ldcs under the specified 1033 * port have completed. 1034 * 1035 * Prior to this function being invoked each channel under this port 1036 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1037 * 1038 * A short explaination of what we are doing below.. 1039 * 1040 * The simplest approach would be to have a reference counter in 1041 * the ldc structure which is increment/decremented by the callbacks as 1042 * they use the channel. The drain function could then simply disable any 1043 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1044 * there is a tiny window here - before the callback is able to get the lock 1045 * on the channel it is interrupted and this function gets to execute. It 1046 * sees that the ref count is zero and believes its free to delete the 1047 * associated data structures. 1048 * 1049 * We get around this by taking advantage of the fact that before the ldc 1050 * framework invokes a callback it sets a flag to indicate that there is a 1051 * callback active (or about to become active). If when we attempt to 1052 * unregister a callback when this active flag is set then the unregister 1053 * will fail with EWOULDBLOCK. 1054 * 1055 * If the unregister fails we do a cv_timedwait. We will either be signaled 1056 * by the callback as it is exiting (note we have to wait a short period to 1057 * allow the callback to return fully to the ldc framework and it to clear 1058 * the active flag), or by the timer expiring. In either case we again attempt 1059 * the unregister. We repeat this until we can succesfully unregister the 1060 * callback. 1061 * 1062 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1063 * the case where the callback has finished but the ldc framework has not yet 1064 * cleared the active flag. In this case we would never get a cv_signal. 1065 */ 1066 static int 1067 vsw_drain_ldcs(vsw_port_t *port) 1068 { 1069 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1070 vsw_ldc_t *ldcp; 1071 vsw_t *vswp = port->p_vswp; 1072 1073 D1(vswp, "%s: enter", __func__); 1074 1075 READ_ENTER(&ldcl->lockrw); 1076 1077 ldcp = ldcl->head; 1078 1079 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1080 /* 1081 * If we can unregister the channel callback then we 1082 * know that there is no callback either running or 1083 * scheduled to run for this channel so move on to next 1084 * channel in the list. 1085 */ 1086 mutex_enter(&ldcp->drain_cv_lock); 1087 1088 /* prompt active callbacks to quit */ 1089 ldcp->drain_state = VSW_LDC_DRAINING; 1090 1091 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1092 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1093 ldcp->ldc_id); 1094 mutex_exit(&ldcp->drain_cv_lock); 1095 continue; 1096 } else { 1097 /* 1098 * If we end up here we know that either 1) a callback 1099 * is currently executing, 2) is about to start (i.e. 1100 * the ldc framework has set the active flag but 1101 * has not actually invoked the callback yet, or 3) 1102 * has finished and has returned to the ldc framework 1103 * but the ldc framework has not yet cleared the 1104 * active bit. 1105 * 1106 * Wait for it to finish. 1107 */ 1108 while (ldc_unreg_callback(ldcp->ldc_handle) 1109 == EWOULDBLOCK) 1110 (void) cv_timedwait(&ldcp->drain_cv, 1111 &ldcp->drain_cv_lock, lbolt + hz); 1112 1113 mutex_exit(&ldcp->drain_cv_lock); 1114 D2(vswp, "%s: unreg callback for chan %ld after " 1115 "timeout", __func__, ldcp->ldc_id); 1116 } 1117 } 1118 RW_EXIT(&ldcl->lockrw); 1119 1120 D1(vswp, "%s: exit", __func__); 1121 return (0); 1122 } 1123 1124 /* 1125 * Wait until all tasks which reference this port have completed. 1126 * 1127 * Prior to this function being invoked each channel under this port 1128 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1129 */ 1130 static int 1131 vsw_drain_port_taskq(vsw_port_t *port) 1132 { 1133 vsw_t *vswp = port->p_vswp; 1134 1135 D1(vswp, "%s: enter", __func__); 1136 1137 /* 1138 * Mark the port as in the process of being detached, and 1139 * dispatch a marker task to the queue so we know when all 1140 * relevant tasks have completed. 1141 */ 1142 mutex_enter(&port->state_lock); 1143 port->state = VSW_PORT_DETACHING; 1144 1145 if ((vswp->taskq_p == NULL) || 1146 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1147 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1148 DERR(vswp, "%s: unable to dispatch marker task", 1149 __func__); 1150 mutex_exit(&port->state_lock); 1151 return (1); 1152 } 1153 1154 /* 1155 * Wait for the marker task to finish. 1156 */ 1157 while (port->state != VSW_PORT_DETACHABLE) 1158 cv_wait(&port->state_cv, &port->state_lock); 1159 1160 mutex_exit(&port->state_lock); 1161 1162 D1(vswp, "%s: exit", __func__); 1163 1164 return (0); 1165 } 1166 1167 static void 1168 vsw_marker_task(void *arg) 1169 { 1170 vsw_port_t *port = arg; 1171 vsw_t *vswp = port->p_vswp; 1172 1173 D1(vswp, "%s: enter", __func__); 1174 1175 mutex_enter(&port->state_lock); 1176 1177 /* 1178 * No further tasks should be dispatched which reference 1179 * this port so ok to mark it as safe to detach. 1180 */ 1181 port->state = VSW_PORT_DETACHABLE; 1182 1183 cv_signal(&port->state_cv); 1184 1185 mutex_exit(&port->state_lock); 1186 1187 D1(vswp, "%s: exit", __func__); 1188 } 1189 1190 vsw_port_t * 1191 vsw_lookup_port(vsw_t *vswp, int p_instance) 1192 { 1193 vsw_port_list_t *plist = &vswp->plist; 1194 vsw_port_t *port; 1195 1196 for (port = plist->head; port != NULL; port = port->p_next) { 1197 if (port->p_instance == p_instance) { 1198 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1199 return (port); 1200 } 1201 } 1202 1203 return (NULL); 1204 } 1205 1206 void 1207 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1208 { 1209 vsw_ldc_list_t *ldclp; 1210 vsw_ldc_t *ldcp; 1211 1212 ldclp = &portp->p_ldclist; 1213 1214 READ_ENTER(&ldclp->lockrw); 1215 1216 /* 1217 * NOTE: for now, we will assume we have a single channel. 1218 */ 1219 if (ldclp->head == NULL) { 1220 RW_EXIT(&ldclp->lockrw); 1221 return; 1222 } 1223 ldcp = ldclp->head; 1224 1225 mutex_enter(&ldcp->ldc_cblock); 1226 1227 /* 1228 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1229 * the connection. See comments in vsw_set_vnet_proto_ops(). 1230 */ 1231 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1232 portp->nvids != 0) { 1233 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1234 } 1235 1236 mutex_exit(&ldcp->ldc_cblock); 1237 1238 RW_EXIT(&ldclp->lockrw); 1239 } 1240 1241 void 1242 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1243 { 1244 vsw_ldc_list_t *ldclp; 1245 vsw_ldc_t *ldcp; 1246 1247 ldclp = &portp->p_ldclist; 1248 1249 READ_ENTER(&ldclp->lockrw); 1250 1251 /* 1252 * NOTE: for now, we will assume we have a single channel. 1253 */ 1254 if (ldclp->head == NULL) { 1255 RW_EXIT(&ldclp->lockrw); 1256 return; 1257 } 1258 ldcp = ldclp->head; 1259 1260 mutex_enter(&ldcp->ldc_cblock); 1261 1262 /* 1263 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1264 * to trigger re-negotiation, which inturn trigger HybridIO 1265 * setup/cleanup. 1266 */ 1267 if ((ldcp->hphase == VSW_MILESTONE4) && 1268 (portp->p_hio_capable == B_TRUE)) { 1269 if (immediate == B_TRUE) { 1270 (void) ldc_down(ldcp->ldc_handle); 1271 } else { 1272 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1273 } 1274 } 1275 1276 mutex_exit(&ldcp->ldc_cblock); 1277 1278 RW_EXIT(&ldclp->lockrw); 1279 } 1280 1281 /* 1282 * Search for and remove the specified port from the port 1283 * list. Returns 0 if able to locate and remove port, otherwise 1284 * returns 1. 1285 */ 1286 static int 1287 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1288 { 1289 vsw_port_list_t *plist = &vswp->plist; 1290 vsw_port_t *curr_p, *prev_p; 1291 1292 if (plist->head == NULL) 1293 return (1); 1294 1295 curr_p = prev_p = plist->head; 1296 1297 while (curr_p != NULL) { 1298 if (curr_p == port) { 1299 if (prev_p == curr_p) { 1300 plist->head = curr_p->p_next; 1301 } else { 1302 prev_p->p_next = curr_p->p_next; 1303 } 1304 plist->num_ports--; 1305 break; 1306 } else { 1307 prev_p = curr_p; 1308 curr_p = curr_p->p_next; 1309 } 1310 } 1311 return (0); 1312 } 1313 1314 /* 1315 * Interrupt handler for ldc messages. 1316 */ 1317 static uint_t 1318 vsw_ldc_cb(uint64_t event, caddr_t arg) 1319 { 1320 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1321 vsw_t *vswp = ldcp->ldc_vswp; 1322 1323 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1324 1325 mutex_enter(&ldcp->ldc_cblock); 1326 ldcp->ldc_stats.callbacks++; 1327 1328 mutex_enter(&ldcp->status_lock); 1329 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1330 mutex_exit(&ldcp->status_lock); 1331 mutex_exit(&ldcp->ldc_cblock); 1332 return (LDC_SUCCESS); 1333 } 1334 mutex_exit(&ldcp->status_lock); 1335 1336 if (event & LDC_EVT_UP) { 1337 /* 1338 * Channel has come up. 1339 */ 1340 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1341 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1342 1343 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1344 1345 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1346 } 1347 1348 if (event & LDC_EVT_READ) { 1349 /* 1350 * Data available for reading. 1351 */ 1352 D2(vswp, "%s: id(ld) event(%llx) data READ", 1353 __func__, ldcp->ldc_id, event); 1354 1355 if (ldcp->rx_thread != NULL) { 1356 /* 1357 * If the receive thread is enabled, then 1358 * wakeup the receive thread to process the 1359 * LDC messages. 1360 */ 1361 mutex_exit(&ldcp->ldc_cblock); 1362 mutex_enter(&ldcp->rx_thr_lock); 1363 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1364 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1365 cv_signal(&ldcp->rx_thr_cv); 1366 } 1367 mutex_exit(&ldcp->rx_thr_lock); 1368 mutex_enter(&ldcp->ldc_cblock); 1369 } else { 1370 vsw_process_pkt(ldcp); 1371 } 1372 1373 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1374 1375 goto vsw_cb_exit; 1376 } 1377 1378 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1379 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1380 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1381 1382 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1383 } 1384 1385 /* 1386 * Catch either LDC_EVT_WRITE which we don't support or any 1387 * unknown event. 1388 */ 1389 if (event & 1390 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1391 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1392 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1393 } 1394 1395 vsw_cb_exit: 1396 mutex_exit(&ldcp->ldc_cblock); 1397 1398 /* 1399 * Let the drain function know we are finishing if it 1400 * is waiting. 1401 */ 1402 mutex_enter(&ldcp->drain_cv_lock); 1403 if (ldcp->drain_state == VSW_LDC_DRAINING) 1404 cv_signal(&ldcp->drain_cv); 1405 mutex_exit(&ldcp->drain_cv_lock); 1406 1407 return (LDC_SUCCESS); 1408 } 1409 1410 /* 1411 * Reinitialise data structures associated with the channel. 1412 */ 1413 static void 1414 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1415 { 1416 vsw_t *vswp = ldcp->ldc_vswp; 1417 vsw_port_t *port; 1418 vsw_ldc_list_t *ldcl; 1419 1420 D1(vswp, "%s: enter", __func__); 1421 1422 port = ldcp->ldc_port; 1423 ldcl = &port->p_ldclist; 1424 1425 READ_ENTER(&ldcl->lockrw); 1426 1427 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1428 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1429 1430 vsw_free_lane_resources(ldcp, INBOUND); 1431 vsw_free_lane_resources(ldcp, OUTBOUND); 1432 RW_EXIT(&ldcl->lockrw); 1433 1434 ldcp->lane_in.lstate = 0; 1435 ldcp->lane_out.lstate = 0; 1436 1437 /* Remove the fdb entry for this port/mac address */ 1438 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1439 1440 /* remove the port from vlans it has been assigned to */ 1441 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1442 1443 /* 1444 * Remove parent port from any multicast groups 1445 * it may have registered with. Client must resend 1446 * multicast add command after handshake completes. 1447 */ 1448 vsw_del_mcst_port(port); 1449 1450 ldcp->peer_session = 0; 1451 ldcp->session_status = 0; 1452 ldcp->hcnt = 0; 1453 ldcp->hphase = VSW_MILESTONE0; 1454 1455 vsw_reset_vnet_proto_ops(ldcp); 1456 1457 D1(vswp, "%s: exit", __func__); 1458 } 1459 1460 /* 1461 * Process a connection event. 1462 * 1463 * Note - care must be taken to ensure that this function is 1464 * not called with the dlistrw lock held. 1465 */ 1466 static void 1467 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1468 { 1469 vsw_t *vswp = ldcp->ldc_vswp; 1470 vsw_conn_evt_t *conn = NULL; 1471 1472 D1(vswp, "%s: enter", __func__); 1473 1474 /* 1475 * Check if either a reset or restart event is pending 1476 * or in progress. If so just return. 1477 * 1478 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1479 * being received by the callback handler, or a ECONNRESET error 1480 * code being returned from a ldc_read() or ldc_write() call. 1481 * 1482 * A VSW_CONN_RESTART event occurs when some error checking code 1483 * decides that there is a problem with data from the channel, 1484 * and that the handshake should be restarted. 1485 */ 1486 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1487 (ldstub((uint8_t *)&ldcp->reset_active))) 1488 return; 1489 1490 /* 1491 * If it is an LDC_UP event we first check the recorded 1492 * state of the channel. If this is UP then we know that 1493 * the channel moving to the UP state has already been dealt 1494 * with and don't need to dispatch a new task. 1495 * 1496 * The reason for this check is that when we do a ldc_up(), 1497 * depending on the state of the peer, we may or may not get 1498 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1499 * every time we do ldc_up() we explicitly check the channel 1500 * status to see has it come up (ldc_up() is asynch and will 1501 * complete at some undefined time), and take the appropriate 1502 * action. 1503 * 1504 * The flip side of this is that we may get a LDC_UP event 1505 * when we have already seen that the channel is up and have 1506 * dealt with that. 1507 */ 1508 mutex_enter(&ldcp->status_lock); 1509 if (evt == VSW_CONN_UP) { 1510 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1511 mutex_exit(&ldcp->status_lock); 1512 return; 1513 } 1514 } 1515 mutex_exit(&ldcp->status_lock); 1516 1517 /* 1518 * The transaction group id allows us to identify and discard 1519 * any tasks which are still pending on the taskq and refer 1520 * to the handshake session we are about to restart or reset. 1521 * These stale messages no longer have any real meaning. 1522 */ 1523 (void) atomic_inc_32(&ldcp->hss_id); 1524 1525 ASSERT(vswp->taskq_p != NULL); 1526 1527 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1528 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1529 " connection event", vswp->instance); 1530 goto err_exit; 1531 } 1532 1533 conn->evt = evt; 1534 conn->ldcp = ldcp; 1535 1536 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1537 DDI_NOSLEEP) != DDI_SUCCESS) { 1538 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1539 vswp->instance); 1540 1541 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1542 goto err_exit; 1543 } 1544 1545 D1(vswp, "%s: exit", __func__); 1546 return; 1547 1548 err_exit: 1549 /* 1550 * Have mostly likely failed due to memory shortage. Clear the flag so 1551 * that future requests will at least be attempted and will hopefully 1552 * succeed. 1553 */ 1554 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1555 ldcp->reset_active = 0; 1556 } 1557 1558 /* 1559 * Deal with events relating to a connection. Invoked from a taskq. 1560 */ 1561 static void 1562 vsw_conn_task(void *arg) 1563 { 1564 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1565 vsw_ldc_t *ldcp = NULL; 1566 vsw_port_t *portp; 1567 vsw_t *vswp = NULL; 1568 uint16_t evt; 1569 ldc_status_t curr_status; 1570 1571 ldcp = conn->ldcp; 1572 evt = conn->evt; 1573 vswp = ldcp->ldc_vswp; 1574 portp = ldcp->ldc_port; 1575 1576 D1(vswp, "%s: enter", __func__); 1577 1578 /* can safely free now have copied out data */ 1579 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1580 1581 mutex_enter(&ldcp->status_lock); 1582 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1583 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1584 "channel %ld", vswp->instance, ldcp->ldc_id); 1585 mutex_exit(&ldcp->status_lock); 1586 return; 1587 } 1588 1589 /* 1590 * If we wish to restart the handshake on this channel, then if 1591 * the channel is UP we bring it DOWN to flush the underlying 1592 * ldc queue. 1593 */ 1594 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1595 (void) ldc_down(ldcp->ldc_handle); 1596 1597 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1598 vsw_hio_stop(vswp, ldcp); 1599 } 1600 1601 /* 1602 * re-init all the associated data structures. 1603 */ 1604 vsw_ldc_reinit(ldcp); 1605 1606 /* 1607 * Bring the channel back up (note it does no harm to 1608 * do this even if the channel is already UP, Just 1609 * becomes effectively a no-op). 1610 */ 1611 (void) ldc_up(ldcp->ldc_handle); 1612 1613 /* 1614 * Check if channel is now UP. This will only happen if 1615 * peer has also done a ldc_up(). 1616 */ 1617 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1618 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1619 "channel %ld", vswp->instance, ldcp->ldc_id); 1620 mutex_exit(&ldcp->status_lock); 1621 return; 1622 } 1623 1624 ldcp->ldc_status = curr_status; 1625 1626 /* channel UP so restart handshake by sending version info */ 1627 if (curr_status == LDC_UP) { 1628 if (ldcp->hcnt++ > vsw_num_handshakes) { 1629 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1630 " handshake attempts (%d) on channel %ld", 1631 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1632 mutex_exit(&ldcp->status_lock); 1633 return; 1634 } 1635 1636 if (vsw_obp_ver_proto_workaround == B_FALSE && 1637 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1638 DDI_NOSLEEP) != DDI_SUCCESS)) { 1639 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1640 vswp->instance); 1641 1642 /* 1643 * Don't count as valid restart attempt if couldn't 1644 * send version msg. 1645 */ 1646 if (ldcp->hcnt > 0) 1647 ldcp->hcnt--; 1648 } 1649 } 1650 1651 /* 1652 * Mark that the process is complete by clearing the flag. 1653 * 1654 * Note is it possible that the taskq dispatch above may have failed, 1655 * most likely due to memory shortage. We still clear the flag so 1656 * future attempts will at least be attempted and will hopefully 1657 * succeed. 1658 */ 1659 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1660 ldcp->reset_active = 0; 1661 1662 mutex_exit(&ldcp->status_lock); 1663 1664 D1(vswp, "%s: exit", __func__); 1665 } 1666 1667 /* 1668 * returns 0 if legal for event signified by flag to have 1669 * occured at the time it did. Otherwise returns 1. 1670 */ 1671 int 1672 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1673 { 1674 vsw_t *vswp = ldcp->ldc_vswp; 1675 uint64_t state; 1676 uint64_t phase; 1677 1678 if (dir == INBOUND) 1679 state = ldcp->lane_in.lstate; 1680 else 1681 state = ldcp->lane_out.lstate; 1682 1683 phase = ldcp->hphase; 1684 1685 switch (flag) { 1686 case VSW_VER_INFO_RECV: 1687 if (phase > VSW_MILESTONE0) { 1688 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1689 " when in state %d\n", ldcp->ldc_id, phase); 1690 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1691 return (1); 1692 } 1693 break; 1694 1695 case VSW_VER_ACK_RECV: 1696 case VSW_VER_NACK_RECV: 1697 if (!(state & VSW_VER_INFO_SENT)) { 1698 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1699 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1700 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1701 return (1); 1702 } else 1703 state &= ~VSW_VER_INFO_SENT; 1704 break; 1705 1706 case VSW_ATTR_INFO_RECV: 1707 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1708 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1709 " when in state %d\n", ldcp->ldc_id, phase); 1710 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1711 return (1); 1712 } 1713 break; 1714 1715 case VSW_ATTR_ACK_RECV: 1716 case VSW_ATTR_NACK_RECV: 1717 if (!(state & VSW_ATTR_INFO_SENT)) { 1718 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1719 " or ATTR_NACK when in state %d\n", 1720 ldcp->ldc_id, phase); 1721 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1722 return (1); 1723 } else 1724 state &= ~VSW_ATTR_INFO_SENT; 1725 break; 1726 1727 case VSW_DRING_INFO_RECV: 1728 if (phase < VSW_MILESTONE1) { 1729 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1730 " when in state %d\n", ldcp->ldc_id, phase); 1731 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1732 return (1); 1733 } 1734 break; 1735 1736 case VSW_DRING_ACK_RECV: 1737 case VSW_DRING_NACK_RECV: 1738 if (!(state & VSW_DRING_INFO_SENT)) { 1739 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1740 " or DRING_NACK when in state %d\n", 1741 ldcp->ldc_id, phase); 1742 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1743 return (1); 1744 } else 1745 state &= ~VSW_DRING_INFO_SENT; 1746 break; 1747 1748 case VSW_RDX_INFO_RECV: 1749 if (phase < VSW_MILESTONE3) { 1750 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1751 " when in state %d\n", ldcp->ldc_id, phase); 1752 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1753 return (1); 1754 } 1755 break; 1756 1757 case VSW_RDX_ACK_RECV: 1758 case VSW_RDX_NACK_RECV: 1759 if (!(state & VSW_RDX_INFO_SENT)) { 1760 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1761 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1762 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1763 return (1); 1764 } else 1765 state &= ~VSW_RDX_INFO_SENT; 1766 break; 1767 1768 case VSW_MCST_INFO_RECV: 1769 if (phase < VSW_MILESTONE3) { 1770 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1771 " when in state %d\n", ldcp->ldc_id, phase); 1772 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1773 return (1); 1774 } 1775 break; 1776 1777 default: 1778 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1779 ldcp->ldc_id, flag); 1780 return (1); 1781 } 1782 1783 if (dir == INBOUND) 1784 ldcp->lane_in.lstate = state; 1785 else 1786 ldcp->lane_out.lstate = state; 1787 1788 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1789 1790 return (0); 1791 } 1792 1793 void 1794 vsw_next_milestone(vsw_ldc_t *ldcp) 1795 { 1796 vsw_t *vswp = ldcp->ldc_vswp; 1797 vsw_port_t *portp = ldcp->ldc_port; 1798 1799 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1800 ldcp->ldc_id, ldcp->hphase); 1801 1802 DUMP_FLAGS(ldcp->lane_in.lstate); 1803 DUMP_FLAGS(ldcp->lane_out.lstate); 1804 1805 switch (ldcp->hphase) { 1806 1807 case VSW_MILESTONE0: 1808 /* 1809 * If we haven't started to handshake with our peer, 1810 * start to do so now. 1811 */ 1812 if (ldcp->lane_out.lstate == 0) { 1813 D2(vswp, "%s: (chan %lld) starting handshake " 1814 "with peer", __func__, ldcp->ldc_id); 1815 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1816 } 1817 1818 /* 1819 * Only way to pass this milestone is to have successfully 1820 * negotiated version info. 1821 */ 1822 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1823 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1824 1825 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1826 __func__, ldcp->ldc_id); 1827 1828 vsw_set_vnet_proto_ops(ldcp); 1829 1830 /* 1831 * Next milestone is passed when attribute 1832 * information has been successfully exchanged. 1833 */ 1834 ldcp->hphase = VSW_MILESTONE1; 1835 vsw_send_attr(ldcp); 1836 1837 } 1838 break; 1839 1840 case VSW_MILESTONE1: 1841 /* 1842 * Only way to pass this milestone is to have successfully 1843 * negotiated attribute information. 1844 */ 1845 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1846 1847 ldcp->hphase = VSW_MILESTONE2; 1848 1849 /* 1850 * If the peer device has said it wishes to 1851 * use descriptor rings then we send it our ring 1852 * info, otherwise we just set up a private ring 1853 * which we use an internal buffer 1854 */ 1855 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1856 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1857 (VSW_VER_LT(ldcp, 1, 2) && 1858 (ldcp->lane_in.xfer_mode == 1859 VIO_DRING_MODE_V1_0))) { 1860 vsw_send_dring_info(ldcp); 1861 } 1862 } 1863 break; 1864 1865 case VSW_MILESTONE2: 1866 /* 1867 * If peer has indicated in its attribute message that 1868 * it wishes to use descriptor rings then the only way 1869 * to pass this milestone is for us to have received 1870 * valid dring info. 1871 * 1872 * If peer is not using descriptor rings then just fall 1873 * through. 1874 */ 1875 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1876 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1877 (VSW_VER_LT(ldcp, 1, 2) && 1878 (ldcp->lane_in.xfer_mode == 1879 VIO_DRING_MODE_V1_0))) { 1880 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1881 break; 1882 } 1883 1884 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1885 __func__, ldcp->ldc_id); 1886 1887 ldcp->hphase = VSW_MILESTONE3; 1888 vsw_send_rdx(ldcp); 1889 break; 1890 1891 case VSW_MILESTONE3: 1892 /* 1893 * Pass this milestone when all paramaters have been 1894 * successfully exchanged and RDX sent in both directions. 1895 * 1896 * Mark outbound lane as available to transmit data. 1897 */ 1898 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1899 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1900 1901 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1902 __func__, ldcp->ldc_id); 1903 D2(vswp, "%s: ** handshake complete (0x%llx : " 1904 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1905 ldcp->lane_out.lstate); 1906 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1907 ldcp->hphase = VSW_MILESTONE4; 1908 ldcp->hcnt = 0; 1909 DISPLAY_STATE(); 1910 /* Start HIO if enabled and capable */ 1911 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1912 D2(vswp, "%s: start HybridIO setup", __func__); 1913 vsw_hio_start(vswp, ldcp); 1914 } 1915 } else { 1916 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1917 __func__, ldcp->lane_in.lstate, 1918 ldcp->lane_out.lstate); 1919 } 1920 break; 1921 1922 case VSW_MILESTONE4: 1923 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1924 ldcp->ldc_id); 1925 break; 1926 1927 default: 1928 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1929 ldcp->ldc_id, ldcp->hphase); 1930 } 1931 1932 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1933 ldcp->hphase); 1934 } 1935 1936 /* 1937 * Check if major version is supported. 1938 * 1939 * Returns 0 if finds supported major number, and if necessary 1940 * adjusts the minor field. 1941 * 1942 * Returns 1 if can't match major number exactly. Sets mjor/minor 1943 * to next lowest support values, or to zero if no other values possible. 1944 */ 1945 static int 1946 vsw_supported_version(vio_ver_msg_t *vp) 1947 { 1948 int i; 1949 1950 D1(NULL, "vsw_supported_version: enter"); 1951 1952 for (i = 0; i < VSW_NUM_VER; i++) { 1953 if (vsw_versions[i].ver_major == vp->ver_major) { 1954 /* 1955 * Matching or lower major version found. Update 1956 * minor number if necessary. 1957 */ 1958 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1959 D2(NULL, "%s: adjusting minor value from %d " 1960 "to %d", __func__, vp->ver_minor, 1961 vsw_versions[i].ver_minor); 1962 vp->ver_minor = vsw_versions[i].ver_minor; 1963 } 1964 1965 return (0); 1966 } 1967 1968 /* 1969 * If the message contains a higher major version number, set 1970 * the message's major/minor versions to the current values 1971 * and return false, so this message will get resent with 1972 * these values. 1973 */ 1974 if (vsw_versions[i].ver_major < vp->ver_major) { 1975 D2(NULL, "%s: adjusting major and minor " 1976 "values to %d, %d\n", 1977 __func__, vsw_versions[i].ver_major, 1978 vsw_versions[i].ver_minor); 1979 vp->ver_major = vsw_versions[i].ver_major; 1980 vp->ver_minor = vsw_versions[i].ver_minor; 1981 return (1); 1982 } 1983 } 1984 1985 /* No match was possible, zero out fields */ 1986 vp->ver_major = 0; 1987 vp->ver_minor = 0; 1988 1989 D1(NULL, "vsw_supported_version: exit"); 1990 1991 return (1); 1992 } 1993 1994 /* 1995 * Set vnet-protocol-version dependent functions based on version. 1996 */ 1997 static void 1998 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1999 { 2000 vsw_t *vswp = ldcp->ldc_vswp; 2001 lane_t *lp = &ldcp->lane_out; 2002 2003 if (VSW_VER_GTEQ(ldcp, 1, 3)) { 2004 /* 2005 * If the version negotiated with peer is >= 1.3, 2006 * set the mtu in our attributes to max_frame_size. 2007 */ 2008 lp->mtu = vswp->max_frame_size; 2009 } else { 2010 vsw_port_t *portp = ldcp->ldc_port; 2011 /* 2012 * Pre-1.3 peers expect max frame size of ETHERMAX. 2013 * We can negotiate that size with those peers provided the 2014 * following conditions are true: 2015 * - Our max_frame_size is greater only by VLAN_TAGSZ (4). 2016 * - Only pvid is defined for our peer and there are no vids. 2017 * If the above conditions are true, then we can send/recv only 2018 * untagged frames of max size ETHERMAX. Note that pvid of the 2019 * peer can be different, as vsw has to serve the vnet in that 2020 * vlan even if itself is not assigned to that vlan. 2021 */ 2022 if ((vswp->max_frame_size == ETHERMAX + VLAN_TAGSZ) && 2023 portp->nvids == 0) { 2024 lp->mtu = ETHERMAX; 2025 } 2026 } 2027 2028 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2029 /* Versions >= 1.2 */ 2030 2031 if (VSW_PRI_ETH_DEFINED(vswp)) { 2032 /* 2033 * enable priority routines and pkt mode only if 2034 * at least one pri-eth-type is specified in MD. 2035 */ 2036 ldcp->tx = vsw_ldctx_pri; 2037 ldcp->rx_pktdata = vsw_process_pkt_data; 2038 2039 /* set xfer mode for vsw_send_attr() */ 2040 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2041 } else { 2042 /* no priority eth types defined in MD */ 2043 2044 ldcp->tx = vsw_ldctx; 2045 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2046 2047 /* set xfer mode for vsw_send_attr() */ 2048 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2049 } 2050 2051 } else { 2052 /* Versions prior to 1.2 */ 2053 2054 vsw_reset_vnet_proto_ops(ldcp); 2055 } 2056 } 2057 2058 /* 2059 * Reset vnet-protocol-version dependent functions to v1.0. 2060 */ 2061 static void 2062 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2063 { 2064 lane_t *lp = &ldcp->lane_out; 2065 2066 ldcp->tx = vsw_ldctx; 2067 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2068 2069 /* set xfer mode for vsw_send_attr() */ 2070 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2071 } 2072 2073 /* 2074 * Main routine for processing messages received over LDC. 2075 */ 2076 static void 2077 vsw_process_pkt(void *arg) 2078 { 2079 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2080 vsw_t *vswp = ldcp->ldc_vswp; 2081 size_t msglen; 2082 vio_msg_tag_t *tagp; 2083 uint64_t *ldcmsg; 2084 int rv = 0; 2085 2086 2087 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2088 2089 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2090 2091 ldcmsg = ldcp->ldcmsg; 2092 /* 2093 * If channel is up read messages until channel is empty. 2094 */ 2095 do { 2096 msglen = ldcp->msglen; 2097 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2098 2099 if (rv != 0) { 2100 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2101 __func__, ldcp->ldc_id, rv, msglen); 2102 } 2103 2104 /* channel has been reset */ 2105 if (rv == ECONNRESET) { 2106 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2107 break; 2108 } 2109 2110 if (msglen == 0) { 2111 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2112 ldcp->ldc_id); 2113 break; 2114 } 2115 2116 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2117 ldcp->ldc_id, msglen); 2118 2119 /* 2120 * Figure out what sort of packet we have gotten by 2121 * examining the msg tag, and then switch it appropriately. 2122 */ 2123 tagp = (vio_msg_tag_t *)ldcmsg; 2124 2125 switch (tagp->vio_msgtype) { 2126 case VIO_TYPE_CTRL: 2127 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2128 break; 2129 case VIO_TYPE_DATA: 2130 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2131 break; 2132 case VIO_TYPE_ERR: 2133 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2134 break; 2135 default: 2136 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2137 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2138 break; 2139 } 2140 } while (msglen); 2141 2142 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2143 } 2144 2145 /* 2146 * Dispatch a task to process a VIO control message. 2147 */ 2148 static void 2149 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2150 { 2151 vsw_ctrl_task_t *ctaskp = NULL; 2152 vsw_port_t *port = ldcp->ldc_port; 2153 vsw_t *vswp = port->p_vswp; 2154 2155 D1(vswp, "%s: enter", __func__); 2156 2157 /* 2158 * We need to handle RDX ACK messages in-band as once they 2159 * are exchanged it is possible that we will get an 2160 * immediate (legitimate) data packet. 2161 */ 2162 if ((tagp->vio_subtype_env == VIO_RDX) && 2163 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2164 2165 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2166 return; 2167 2168 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2169 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2170 "(ostate 0x%llx : hphase %d)", __func__, 2171 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2172 vsw_next_milestone(ldcp); 2173 return; 2174 } 2175 2176 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2177 2178 if (ctaskp == NULL) { 2179 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2181 return; 2182 } 2183 2184 ctaskp->ldcp = ldcp; 2185 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2186 ctaskp->hss_id = ldcp->hss_id; 2187 2188 /* 2189 * Dispatch task to processing taskq if port is not in 2190 * the process of being detached. 2191 */ 2192 mutex_enter(&port->state_lock); 2193 if (port->state == VSW_PORT_INIT) { 2194 if ((vswp->taskq_p == NULL) || 2195 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2196 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2197 DERR(vswp, "%s: unable to dispatch task to taskq", 2198 __func__); 2199 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2200 mutex_exit(&port->state_lock); 2201 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2202 return; 2203 } 2204 } else { 2205 DWARN(vswp, "%s: port %d detaching, not dispatching " 2206 "task", __func__, port->p_instance); 2207 } 2208 2209 mutex_exit(&port->state_lock); 2210 2211 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2212 ldcp->ldc_id); 2213 D1(vswp, "%s: exit", __func__); 2214 } 2215 2216 /* 2217 * Process a VIO ctrl message. Invoked from taskq. 2218 */ 2219 static void 2220 vsw_process_ctrl_pkt(void *arg) 2221 { 2222 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2223 vsw_ldc_t *ldcp = ctaskp->ldcp; 2224 vsw_t *vswp = ldcp->ldc_vswp; 2225 vio_msg_tag_t tag; 2226 uint16_t env; 2227 2228 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2229 2230 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2231 env = tag.vio_subtype_env; 2232 2233 /* stale pkt check */ 2234 if (ctaskp->hss_id < ldcp->hss_id) { 2235 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2236 " (%ld) handshake session", __func__, ctaskp->hss_id); 2237 return; 2238 } 2239 2240 /* session id check */ 2241 if (ldcp->session_status & VSW_PEER_SESSION) { 2242 if (ldcp->peer_session != tag.vio_sid) { 2243 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2244 __func__, ldcp->ldc_id, tag.vio_sid); 2245 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2246 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2247 return; 2248 } 2249 } 2250 2251 /* 2252 * Switch on vio_subtype envelope, then let lower routines 2253 * decide if its an INFO, ACK or NACK packet. 2254 */ 2255 switch (env) { 2256 case VIO_VER_INFO: 2257 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2258 break; 2259 case VIO_DRING_REG: 2260 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2261 break; 2262 case VIO_DRING_UNREG: 2263 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2264 break; 2265 case VIO_ATTR_INFO: 2266 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2267 break; 2268 case VNET_MCAST_INFO: 2269 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2270 break; 2271 case VIO_RDX: 2272 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2273 break; 2274 case VIO_DDS_INFO: 2275 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2276 break; 2277 default: 2278 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2279 } 2280 2281 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2282 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2283 } 2284 2285 /* 2286 * Version negotiation. We can end up here either because our peer 2287 * has responded to a handshake message we have sent it, or our peer 2288 * has initiated a handshake with us. If its the former then can only 2289 * be ACK or NACK, if its the later can only be INFO. 2290 * 2291 * If its an ACK we move to the next stage of the handshake, namely 2292 * attribute exchange. If its a NACK we see if we can specify another 2293 * version, if we can't we stop. 2294 * 2295 * If it is an INFO we reset all params associated with communication 2296 * in that direction over this channel (remember connection is 2297 * essentially 2 independent simplex channels). 2298 */ 2299 void 2300 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2301 { 2302 vio_ver_msg_t *ver_pkt; 2303 vsw_t *vswp = ldcp->ldc_vswp; 2304 2305 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2306 2307 /* 2308 * We know this is a ctrl/version packet so 2309 * cast it into the correct structure. 2310 */ 2311 ver_pkt = (vio_ver_msg_t *)pkt; 2312 2313 switch (ver_pkt->tag.vio_subtype) { 2314 case VIO_SUBTYPE_INFO: 2315 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2316 2317 /* 2318 * Record the session id, which we will use from now 2319 * until we see another VER_INFO msg. Even then the 2320 * session id in most cases will be unchanged, execpt 2321 * if channel was reset. 2322 */ 2323 if ((ldcp->session_status & VSW_PEER_SESSION) && 2324 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2325 DERR(vswp, "%s: updating session id for chan %lld " 2326 "from %llx to %llx", __func__, ldcp->ldc_id, 2327 ldcp->peer_session, ver_pkt->tag.vio_sid); 2328 } 2329 2330 ldcp->peer_session = ver_pkt->tag.vio_sid; 2331 ldcp->session_status |= VSW_PEER_SESSION; 2332 2333 /* Legal message at this time ? */ 2334 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2335 return; 2336 2337 /* 2338 * First check the device class. Currently only expect 2339 * to be talking to a network device. In the future may 2340 * also talk to another switch. 2341 */ 2342 if (ver_pkt->dev_class != VDEV_NETWORK) { 2343 DERR(vswp, "%s: illegal device class %d", __func__, 2344 ver_pkt->dev_class); 2345 2346 ver_pkt->tag.vio_sid = ldcp->local_session; 2347 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2348 2349 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2350 2351 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2352 sizeof (vio_ver_msg_t), B_TRUE); 2353 2354 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2355 vsw_next_milestone(ldcp); 2356 return; 2357 } else { 2358 ldcp->dev_class = ver_pkt->dev_class; 2359 } 2360 2361 /* 2362 * Now check the version. 2363 */ 2364 if (vsw_supported_version(ver_pkt) == 0) { 2365 /* 2366 * Support this major version and possibly 2367 * adjusted minor version. 2368 */ 2369 2370 D2(vswp, "%s: accepted ver %d:%d", __func__, 2371 ver_pkt->ver_major, ver_pkt->ver_minor); 2372 2373 /* Store accepted values */ 2374 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2375 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2376 2377 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2378 2379 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2380 2381 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2382 /* 2383 * Send a version info message 2384 * using the accepted version that 2385 * we are about to ack. Also note that 2386 * we send our ver info before we ack. 2387 * Otherwise, as soon as receiving the 2388 * ack, obp sends attr info msg, which 2389 * breaks vsw_check_flag() invoked 2390 * from vsw_process_ctrl_attr_pkt(); 2391 * as we also need VSW_VER_ACK_RECV to 2392 * be set in lane_out.lstate, before 2393 * we can receive attr info. 2394 */ 2395 vsw_send_ver(ldcp); 2396 } 2397 } else { 2398 /* 2399 * NACK back with the next lower major/minor 2400 * pairing we support (if don't suuport any more 2401 * versions then they will be set to zero. 2402 */ 2403 2404 D2(vswp, "%s: replying with ver %d:%d", __func__, 2405 ver_pkt->ver_major, ver_pkt->ver_minor); 2406 2407 /* Store updated values */ 2408 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2409 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2410 2411 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2412 2413 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2414 } 2415 2416 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2417 ver_pkt->tag.vio_sid = ldcp->local_session; 2418 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2419 sizeof (vio_ver_msg_t), B_TRUE); 2420 2421 vsw_next_milestone(ldcp); 2422 break; 2423 2424 case VIO_SUBTYPE_ACK: 2425 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2426 2427 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2428 return; 2429 2430 /* Store updated values */ 2431 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2432 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2433 2434 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2435 vsw_next_milestone(ldcp); 2436 2437 break; 2438 2439 case VIO_SUBTYPE_NACK: 2440 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2441 2442 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2443 return; 2444 2445 /* 2446 * If our peer sent us a NACK with the ver fields set to 2447 * zero then there is nothing more we can do. Otherwise see 2448 * if we support either the version suggested, or a lesser 2449 * one. 2450 */ 2451 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2452 DERR(vswp, "%s: peer unable to negotiate any " 2453 "further.", __func__); 2454 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2455 vsw_next_milestone(ldcp); 2456 return; 2457 } 2458 2459 /* 2460 * Check to see if we support this major version or 2461 * a lower one. If we don't then maj/min will be set 2462 * to zero. 2463 */ 2464 (void) vsw_supported_version(ver_pkt); 2465 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2466 /* Nothing more we can do */ 2467 DERR(vswp, "%s: version negotiation failed.\n", 2468 __func__); 2469 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2470 vsw_next_milestone(ldcp); 2471 } else { 2472 /* found a supported major version */ 2473 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2474 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2475 2476 D2(vswp, "%s: resending with updated values (%x, %x)", 2477 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2478 2479 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2480 ver_pkt->tag.vio_sid = ldcp->local_session; 2481 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2482 2483 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2484 2485 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2486 sizeof (vio_ver_msg_t), B_TRUE); 2487 2488 vsw_next_milestone(ldcp); 2489 2490 } 2491 break; 2492 2493 default: 2494 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2495 ver_pkt->tag.vio_subtype); 2496 } 2497 2498 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2499 } 2500 2501 /* 2502 * Process an attribute packet. We can end up here either because our peer 2503 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2504 * peer has sent us an attribute INFO message 2505 * 2506 * If its an ACK we then move to the next stage of the handshake which 2507 * is to send our descriptor ring info to our peer. If its a NACK then 2508 * there is nothing more we can (currently) do. 2509 * 2510 * If we get a valid/acceptable INFO packet (and we have already negotiated 2511 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2512 * NACK back and reset channel state to INACTIV. 2513 * 2514 * FUTURE: in time we will probably negotiate over attributes, but for 2515 * the moment unacceptable attributes are regarded as a fatal error. 2516 * 2517 */ 2518 void 2519 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2520 { 2521 vnet_attr_msg_t *attr_pkt; 2522 vsw_t *vswp = ldcp->ldc_vswp; 2523 vsw_port_t *port = ldcp->ldc_port; 2524 uint64_t macaddr = 0; 2525 int i; 2526 2527 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2528 2529 /* 2530 * We know this is a ctrl/attr packet so 2531 * cast it into the correct structure. 2532 */ 2533 attr_pkt = (vnet_attr_msg_t *)pkt; 2534 2535 switch (attr_pkt->tag.vio_subtype) { 2536 case VIO_SUBTYPE_INFO: 2537 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2538 2539 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2540 return; 2541 2542 /* 2543 * If the attributes are unacceptable then we NACK back. 2544 */ 2545 if (vsw_check_attr(attr_pkt, ldcp)) { 2546 2547 DERR(vswp, "%s (chan %d): invalid attributes", 2548 __func__, ldcp->ldc_id); 2549 2550 vsw_free_lane_resources(ldcp, INBOUND); 2551 2552 attr_pkt->tag.vio_sid = ldcp->local_session; 2553 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2554 2555 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2556 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2557 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2558 sizeof (vnet_attr_msg_t), B_TRUE); 2559 2560 vsw_next_milestone(ldcp); 2561 return; 2562 } 2563 2564 /* 2565 * Otherwise store attributes for this lane and update 2566 * lane state. 2567 */ 2568 ldcp->lane_in.mtu = attr_pkt->mtu; 2569 ldcp->lane_in.addr = attr_pkt->addr; 2570 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2571 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2572 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2573 2574 macaddr = ldcp->lane_in.addr; 2575 for (i = ETHERADDRL - 1; i >= 0; i--) { 2576 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2577 macaddr >>= 8; 2578 } 2579 2580 /* create the fdb entry for this port/mac address */ 2581 vsw_fdbe_add(vswp, port); 2582 2583 /* add the port to the specified vlans */ 2584 vsw_vlan_add_ids(port, VSW_VNETPORT); 2585 2586 /* setup device specifc xmit routines */ 2587 mutex_enter(&port->tx_lock); 2588 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2589 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2590 (VSW_VER_LT(ldcp, 1, 2) && 2591 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2592 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2593 port->transmit = vsw_dringsend; 2594 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2595 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2596 vsw_create_privring(ldcp); 2597 port->transmit = vsw_descrsend; 2598 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2599 } 2600 2601 /* 2602 * HybridIO is supported only vnet, not by OBP. 2603 * So, set hio_capable to true only when in DRING mode. 2604 */ 2605 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2606 (ldcp->lane_in.xfer_mode != VIO_DESC_MODE)) { 2607 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2608 } else { 2609 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2610 } 2611 2612 mutex_exit(&port->tx_lock); 2613 2614 attr_pkt->tag.vio_sid = ldcp->local_session; 2615 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2616 2617 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2618 2619 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2620 2621 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2622 sizeof (vnet_attr_msg_t), B_TRUE); 2623 2624 vsw_next_milestone(ldcp); 2625 break; 2626 2627 case VIO_SUBTYPE_ACK: 2628 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2629 2630 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2631 return; 2632 2633 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2634 vsw_next_milestone(ldcp); 2635 break; 2636 2637 case VIO_SUBTYPE_NACK: 2638 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2639 2640 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2641 return; 2642 2643 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2644 vsw_next_milestone(ldcp); 2645 break; 2646 2647 default: 2648 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2649 attr_pkt->tag.vio_subtype); 2650 } 2651 2652 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2653 } 2654 2655 /* 2656 * Process a dring info packet. We can end up here either because our peer 2657 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2658 * peer has sent us a dring INFO message. 2659 * 2660 * If we get a valid/acceptable INFO packet (and we have already negotiated 2661 * a version) we ACK back and update the lane state, otherwise we NACK back. 2662 * 2663 * FUTURE: nothing to stop client from sending us info on multiple dring's 2664 * but for the moment we will just use the first one we are given. 2665 * 2666 */ 2667 void 2668 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2669 { 2670 vio_dring_reg_msg_t *dring_pkt; 2671 vsw_t *vswp = ldcp->ldc_vswp; 2672 ldc_mem_info_t minfo; 2673 dring_info_t *dp, *dbp; 2674 int dring_found = 0; 2675 2676 /* 2677 * We know this is a ctrl/dring packet so 2678 * cast it into the correct structure. 2679 */ 2680 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2681 2682 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2683 2684 switch (dring_pkt->tag.vio_subtype) { 2685 case VIO_SUBTYPE_INFO: 2686 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2687 2688 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2689 return; 2690 2691 /* 2692 * If the dring params are unacceptable then we NACK back. 2693 */ 2694 if (vsw_check_dring_info(dring_pkt)) { 2695 2696 DERR(vswp, "%s (%lld): invalid dring info", 2697 __func__, ldcp->ldc_id); 2698 2699 vsw_free_lane_resources(ldcp, INBOUND); 2700 2701 dring_pkt->tag.vio_sid = ldcp->local_session; 2702 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2703 2704 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2705 2706 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2707 2708 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2709 sizeof (vio_dring_reg_msg_t), B_TRUE); 2710 2711 vsw_next_milestone(ldcp); 2712 return; 2713 } 2714 2715 /* 2716 * Otherwise, attempt to map in the dring using the 2717 * cookie. If that succeeds we send back a unique dring 2718 * identifier that the sending side will use in future 2719 * to refer to this descriptor ring. 2720 */ 2721 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2722 2723 dp->num_descriptors = dring_pkt->num_descriptors; 2724 dp->descriptor_size = dring_pkt->descriptor_size; 2725 dp->options = dring_pkt->options; 2726 dp->ncookies = dring_pkt->ncookies; 2727 2728 /* 2729 * Note: should only get one cookie. Enforced in 2730 * the ldc layer. 2731 */ 2732 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2733 sizeof (ldc_mem_cookie_t)); 2734 2735 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2736 dp->num_descriptors, dp->descriptor_size); 2737 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2738 dp->options, dp->ncookies); 2739 2740 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2741 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2742 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 2743 2744 DERR(vswp, "%s: dring_map failed\n", __func__); 2745 2746 kmem_free(dp, sizeof (dring_info_t)); 2747 vsw_free_lane_resources(ldcp, INBOUND); 2748 2749 dring_pkt->tag.vio_sid = ldcp->local_session; 2750 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2751 2752 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2753 2754 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2755 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2756 sizeof (vio_dring_reg_msg_t), B_TRUE); 2757 2758 vsw_next_milestone(ldcp); 2759 return; 2760 } 2761 2762 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2763 2764 DERR(vswp, "%s: dring_addr failed\n", __func__); 2765 2766 kmem_free(dp, sizeof (dring_info_t)); 2767 vsw_free_lane_resources(ldcp, INBOUND); 2768 2769 dring_pkt->tag.vio_sid = ldcp->local_session; 2770 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2771 2772 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2773 2774 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2775 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2776 sizeof (vio_dring_reg_msg_t), B_TRUE); 2777 2778 vsw_next_milestone(ldcp); 2779 return; 2780 } else { 2781 /* store the address of the pub part of ring */ 2782 dp->pub_addr = minfo.vaddr; 2783 2784 /* cache the dring mtype */ 2785 dp->dring_mtype = minfo.mtype; 2786 } 2787 2788 /* no private section as we are importing */ 2789 dp->priv_addr = NULL; 2790 2791 /* 2792 * Using simple mono increasing int for ident at 2793 * the moment. 2794 */ 2795 dp->ident = ldcp->next_ident; 2796 ldcp->next_ident++; 2797 2798 dp->end_idx = 0; 2799 dp->next = NULL; 2800 2801 /* 2802 * Link it onto the end of the list of drings 2803 * for this lane. 2804 */ 2805 if (ldcp->lane_in.dringp == NULL) { 2806 D2(vswp, "%s: adding first INBOUND dring", __func__); 2807 ldcp->lane_in.dringp = dp; 2808 } else { 2809 dbp = ldcp->lane_in.dringp; 2810 2811 while (dbp->next != NULL) 2812 dbp = dbp->next; 2813 2814 dbp->next = dp; 2815 } 2816 2817 /* acknowledge it */ 2818 dring_pkt->tag.vio_sid = ldcp->local_session; 2819 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2820 dring_pkt->dring_ident = dp->ident; 2821 2822 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2823 sizeof (vio_dring_reg_msg_t), B_TRUE); 2824 2825 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2826 vsw_next_milestone(ldcp); 2827 break; 2828 2829 case VIO_SUBTYPE_ACK: 2830 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2831 2832 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2833 return; 2834 2835 /* 2836 * Peer is acknowledging our dring info and will have 2837 * sent us a dring identifier which we will use to 2838 * refer to this ring w.r.t. our peer. 2839 */ 2840 dp = ldcp->lane_out.dringp; 2841 if (dp != NULL) { 2842 /* 2843 * Find the ring this ident should be associated 2844 * with. 2845 */ 2846 if (vsw_dring_match(dp, dring_pkt)) { 2847 dring_found = 1; 2848 2849 } else while (dp != NULL) { 2850 if (vsw_dring_match(dp, dring_pkt)) { 2851 dring_found = 1; 2852 break; 2853 } 2854 dp = dp->next; 2855 } 2856 2857 if (dring_found == 0) { 2858 DERR(NULL, "%s: unrecognised ring cookie", 2859 __func__); 2860 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2861 return; 2862 } 2863 2864 } else { 2865 DERR(vswp, "%s: DRING ACK received but no drings " 2866 "allocated", __func__); 2867 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2868 return; 2869 } 2870 2871 /* store ident */ 2872 dp->ident = dring_pkt->dring_ident; 2873 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2874 vsw_next_milestone(ldcp); 2875 break; 2876 2877 case VIO_SUBTYPE_NACK: 2878 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2879 2880 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2881 return; 2882 2883 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2884 vsw_next_milestone(ldcp); 2885 break; 2886 2887 default: 2888 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2889 dring_pkt->tag.vio_subtype); 2890 } 2891 2892 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2893 } 2894 2895 /* 2896 * Process a request from peer to unregister a dring. 2897 * 2898 * For the moment we just restart the handshake if our 2899 * peer endpoint attempts to unregister a dring. 2900 */ 2901 void 2902 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2903 { 2904 vsw_t *vswp = ldcp->ldc_vswp; 2905 vio_dring_unreg_msg_t *dring_pkt; 2906 2907 /* 2908 * We know this is a ctrl/dring packet so 2909 * cast it into the correct structure. 2910 */ 2911 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2912 2913 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2914 2915 switch (dring_pkt->tag.vio_subtype) { 2916 case VIO_SUBTYPE_INFO: 2917 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2918 2919 DWARN(vswp, "%s: restarting handshake..", __func__); 2920 break; 2921 2922 case VIO_SUBTYPE_ACK: 2923 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2924 2925 DWARN(vswp, "%s: restarting handshake..", __func__); 2926 break; 2927 2928 case VIO_SUBTYPE_NACK: 2929 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2930 2931 DWARN(vswp, "%s: restarting handshake..", __func__); 2932 break; 2933 2934 default: 2935 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2936 dring_pkt->tag.vio_subtype); 2937 } 2938 2939 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2940 2941 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2942 } 2943 2944 #define SND_MCST_NACK(ldcp, pkt) \ 2945 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2946 pkt->tag.vio_sid = ldcp->local_session; \ 2947 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2948 sizeof (vnet_mcast_msg_t), B_TRUE); 2949 2950 /* 2951 * Process a multicast request from a vnet. 2952 * 2953 * Vnet's specify a multicast address that they are interested in. This 2954 * address is used as a key into the hash table which forms the multicast 2955 * forwarding database (mFDB). 2956 * 2957 * The table keys are the multicast addresses, while the table entries 2958 * are pointers to lists of ports which wish to receive packets for the 2959 * specified multicast address. 2960 * 2961 * When a multicast packet is being switched we use the address as a key 2962 * into the hash table, and then walk the appropriate port list forwarding 2963 * the pkt to each port in turn. 2964 * 2965 * If a vnet is no longer interested in a particular multicast grouping 2966 * we simply find the correct location in the hash table and then delete 2967 * the relevant port from the port list. 2968 * 2969 * To deal with the case whereby a port is being deleted without first 2970 * removing itself from the lists in the hash table, we maintain a list 2971 * of multicast addresses the port has registered an interest in, within 2972 * the port structure itself. We then simply walk that list of addresses 2973 * using them as keys into the hash table and remove the port from the 2974 * appropriate lists. 2975 */ 2976 static void 2977 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2978 { 2979 vnet_mcast_msg_t *mcst_pkt; 2980 vsw_port_t *port = ldcp->ldc_port; 2981 vsw_t *vswp = ldcp->ldc_vswp; 2982 int i; 2983 2984 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2985 2986 /* 2987 * We know this is a ctrl/mcast packet so 2988 * cast it into the correct structure. 2989 */ 2990 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2991 2992 switch (mcst_pkt->tag.vio_subtype) { 2993 case VIO_SUBTYPE_INFO: 2994 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2995 2996 /* 2997 * Check if in correct state to receive a multicast 2998 * message (i.e. handshake complete). If not reset 2999 * the handshake. 3000 */ 3001 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3002 return; 3003 3004 /* 3005 * Before attempting to add or remove address check 3006 * that they are valid multicast addresses. 3007 * If not, then NACK back. 3008 */ 3009 for (i = 0; i < mcst_pkt->count; i++) { 3010 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3011 DERR(vswp, "%s: invalid multicast address", 3012 __func__); 3013 SND_MCST_NACK(ldcp, mcst_pkt); 3014 return; 3015 } 3016 } 3017 3018 /* 3019 * Now add/remove the addresses. If this fails we 3020 * NACK back. 3021 */ 3022 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3023 SND_MCST_NACK(ldcp, mcst_pkt); 3024 return; 3025 } 3026 3027 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3028 mcst_pkt->tag.vio_sid = ldcp->local_session; 3029 3030 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3031 3032 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3033 sizeof (vnet_mcast_msg_t), B_TRUE); 3034 break; 3035 3036 case VIO_SUBTYPE_ACK: 3037 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3038 3039 /* 3040 * We shouldn't ever get a multicast ACK message as 3041 * at the moment we never request multicast addresses 3042 * to be set on some other device. This may change in 3043 * the future if we have cascading switches. 3044 */ 3045 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3046 return; 3047 3048 /* Do nothing */ 3049 break; 3050 3051 case VIO_SUBTYPE_NACK: 3052 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3053 3054 /* 3055 * We shouldn't get a multicast NACK packet for the 3056 * same reasons as we shouldn't get a ACK packet. 3057 */ 3058 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3059 return; 3060 3061 /* Do nothing */ 3062 break; 3063 3064 default: 3065 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3066 mcst_pkt->tag.vio_subtype); 3067 } 3068 3069 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3070 } 3071 3072 static void 3073 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3074 { 3075 vio_rdx_msg_t *rdx_pkt; 3076 vsw_t *vswp = ldcp->ldc_vswp; 3077 3078 /* 3079 * We know this is a ctrl/rdx packet so 3080 * cast it into the correct structure. 3081 */ 3082 rdx_pkt = (vio_rdx_msg_t *)pkt; 3083 3084 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3085 3086 switch (rdx_pkt->tag.vio_subtype) { 3087 case VIO_SUBTYPE_INFO: 3088 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3089 3090 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3091 return; 3092 3093 rdx_pkt->tag.vio_sid = ldcp->local_session; 3094 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3095 3096 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3097 3098 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3099 3100 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3101 sizeof (vio_rdx_msg_t), B_TRUE); 3102 3103 vsw_next_milestone(ldcp); 3104 break; 3105 3106 case VIO_SUBTYPE_ACK: 3107 /* 3108 * Should be handled in-band by callback handler. 3109 */ 3110 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3111 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3112 break; 3113 3114 case VIO_SUBTYPE_NACK: 3115 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3116 3117 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3118 return; 3119 3120 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3121 vsw_next_milestone(ldcp); 3122 break; 3123 3124 default: 3125 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3126 rdx_pkt->tag.vio_subtype); 3127 } 3128 3129 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3130 } 3131 3132 static void 3133 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3134 uint32_t msglen) 3135 { 3136 uint16_t env = tagp->vio_subtype_env; 3137 vsw_t *vswp = ldcp->ldc_vswp; 3138 3139 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3140 3141 /* session id check */ 3142 if (ldcp->session_status & VSW_PEER_SESSION) { 3143 if (ldcp->peer_session != tagp->vio_sid) { 3144 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3145 __func__, ldcp->ldc_id, tagp->vio_sid); 3146 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3147 return; 3148 } 3149 } 3150 3151 /* 3152 * It is an error for us to be getting data packets 3153 * before the handshake has completed. 3154 */ 3155 if (ldcp->hphase != VSW_MILESTONE4) { 3156 DERR(vswp, "%s: got data packet before handshake complete " 3157 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3158 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3159 DUMP_FLAGS(ldcp->lane_in.lstate); 3160 DUMP_FLAGS(ldcp->lane_out.lstate); 3161 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3162 return; 3163 } 3164 3165 /* 3166 * To reduce the locking contention, release the 3167 * ldc_cblock here and re-acquire it once we are done 3168 * receiving packets. 3169 */ 3170 mutex_exit(&ldcp->ldc_cblock); 3171 mutex_enter(&ldcp->ldc_rxlock); 3172 3173 /* 3174 * Switch on vio_subtype envelope, then let lower routines 3175 * decide if its an INFO, ACK or NACK packet. 3176 */ 3177 if (env == VIO_DRING_DATA) { 3178 vsw_process_data_dring_pkt(ldcp, dpkt); 3179 } else if (env == VIO_PKT_DATA) { 3180 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3181 } else if (env == VIO_DESC_DATA) { 3182 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3183 } else { 3184 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3185 } 3186 3187 mutex_exit(&ldcp->ldc_rxlock); 3188 mutex_enter(&ldcp->ldc_cblock); 3189 3190 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3191 } 3192 3193 #define SND_DRING_NACK(ldcp, pkt) \ 3194 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3195 pkt->tag.vio_sid = ldcp->local_session; \ 3196 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3197 sizeof (vio_dring_msg_t), B_TRUE); 3198 3199 static void 3200 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3201 { 3202 vio_dring_msg_t *dring_pkt; 3203 vnet_public_desc_t desc, *pub_addr = NULL; 3204 vsw_private_desc_t *priv_addr = NULL; 3205 dring_info_t *dp = NULL; 3206 vsw_t *vswp = ldcp->ldc_vswp; 3207 mblk_t *mp = NULL; 3208 mblk_t *bp = NULL; 3209 mblk_t *bpt = NULL; 3210 size_t nbytes = 0; 3211 uint64_t chain = 0; 3212 uint64_t len; 3213 uint32_t pos, start; 3214 uint32_t range_start, range_end; 3215 int32_t end, num, cnt = 0; 3216 int i, rv, rng_rv = 0, msg_rv = 0; 3217 boolean_t prev_desc_ack = B_FALSE; 3218 int read_attempts = 0; 3219 struct ether_header *ehp; 3220 3221 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3222 3223 /* 3224 * We know this is a data/dring packet so 3225 * cast it into the correct structure. 3226 */ 3227 dring_pkt = (vio_dring_msg_t *)dpkt; 3228 3229 /* 3230 * Switch on the vio_subtype. If its INFO then we need to 3231 * process the data. If its an ACK we need to make sure 3232 * it makes sense (i.e did we send an earlier data/info), 3233 * and if its a NACK then we maybe attempt a retry. 3234 */ 3235 switch (dring_pkt->tag.vio_subtype) { 3236 case VIO_SUBTYPE_INFO: 3237 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3238 3239 READ_ENTER(&ldcp->lane_in.dlistrw); 3240 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3241 dring_pkt->dring_ident)) == NULL) { 3242 RW_EXIT(&ldcp->lane_in.dlistrw); 3243 3244 DERR(vswp, "%s(%lld): unable to find dring from " 3245 "ident 0x%llx", __func__, ldcp->ldc_id, 3246 dring_pkt->dring_ident); 3247 3248 SND_DRING_NACK(ldcp, dring_pkt); 3249 return; 3250 } 3251 3252 start = pos = dring_pkt->start_idx; 3253 end = dring_pkt->end_idx; 3254 len = dp->num_descriptors; 3255 3256 range_start = range_end = pos; 3257 3258 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3259 __func__, ldcp->ldc_id, start, end); 3260 3261 if (end == -1) { 3262 num = -1; 3263 } else if (end >= 0) { 3264 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3265 3266 /* basic sanity check */ 3267 if (end > len) { 3268 RW_EXIT(&ldcp->lane_in.dlistrw); 3269 DERR(vswp, "%s(%lld): endpoint %lld outside " 3270 "ring length %lld", __func__, 3271 ldcp->ldc_id, end, len); 3272 3273 SND_DRING_NACK(ldcp, dring_pkt); 3274 return; 3275 } 3276 } else { 3277 RW_EXIT(&ldcp->lane_in.dlistrw); 3278 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3279 __func__, ldcp->ldc_id, end); 3280 SND_DRING_NACK(ldcp, dring_pkt); 3281 return; 3282 } 3283 3284 while (cnt != num) { 3285 vsw_recheck_desc: 3286 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3287 3288 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3289 &desc, dp->dring_mtype, dp->handle, 3290 pos, pos)) != 0) { 3291 DERR(vswp, "%s(%lld): unable to copy " 3292 "descriptor at pos %d: err %d", 3293 __func__, pos, ldcp->ldc_id, rng_rv); 3294 ldcp->ldc_stats.ierrors++; 3295 break; 3296 } 3297 3298 /* 3299 * When given a bounded range of descriptors 3300 * to process, its an error to hit a descriptor 3301 * which is not ready. In the non-bounded case 3302 * (end_idx == -1) this simply indicates we have 3303 * reached the end of the current active range. 3304 */ 3305 if (desc.hdr.dstate != VIO_DESC_READY) { 3306 /* unbound - no error */ 3307 if (end == -1) { 3308 if (read_attempts == vsw_read_attempts) 3309 break; 3310 3311 delay(drv_usectohz(vsw_desc_delay)); 3312 read_attempts++; 3313 goto vsw_recheck_desc; 3314 } 3315 3316 /* bounded - error - so NACK back */ 3317 RW_EXIT(&ldcp->lane_in.dlistrw); 3318 DERR(vswp, "%s(%lld): descriptor not READY " 3319 "(%d)", __func__, ldcp->ldc_id, 3320 desc.hdr.dstate); 3321 SND_DRING_NACK(ldcp, dring_pkt); 3322 return; 3323 } 3324 3325 DTRACE_PROBE1(read_attempts, int, read_attempts); 3326 3327 range_end = pos; 3328 3329 /* 3330 * If we ACK'd the previous descriptor then now 3331 * record the new range start position for later 3332 * ACK's. 3333 */ 3334 if (prev_desc_ack) { 3335 range_start = pos; 3336 3337 D2(vswp, "%s(%lld): updating range start to be " 3338 "%d", __func__, ldcp->ldc_id, range_start); 3339 3340 prev_desc_ack = B_FALSE; 3341 } 3342 3343 D2(vswp, "%s(%lld): processing desc %lld at pos" 3344 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3345 __func__, ldcp->ldc_id, pos, &desc, 3346 desc.hdr.dstate, desc.nbytes); 3347 3348 /* 3349 * Ensure that we ask ldc for an aligned 3350 * number of bytes. Data is padded to align on 8 3351 * byte boundary, desc.nbytes is actual data length, 3352 * i.e. minus that padding. 3353 */ 3354 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3355 3356 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3357 if (mp == NULL) { 3358 ldcp->ldc_stats.rx_vio_allocb_fail++; 3359 /* 3360 * No free receive buffers available, so 3361 * fallback onto allocb(9F). Make sure that 3362 * we get a data buffer which is a multiple 3363 * of 8 as this is required by ldc_mem_copy. 3364 */ 3365 DTRACE_PROBE(allocb); 3366 if ((mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3367 BPRI_MED)) == NULL) { 3368 DERR(vswp, "%s(%ld): allocb failed", 3369 __func__, ldcp->ldc_id); 3370 rng_rv = vnet_dring_entry_set_dstate( 3371 pub_addr, dp->dring_mtype, 3372 dp->handle, pos, pos, 3373 VIO_DESC_DONE); 3374 ldcp->ldc_stats.ierrors++; 3375 ldcp->ldc_stats.rx_allocb_fail++; 3376 break; 3377 } 3378 } 3379 3380 rv = ldc_mem_copy(ldcp->ldc_handle, 3381 (caddr_t)mp->b_rptr, 0, &nbytes, 3382 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3383 if (rv != 0) { 3384 DERR(vswp, "%s(%d): unable to copy in data " 3385 "from %d cookies in desc %d (rv %d)", 3386 __func__, ldcp->ldc_id, desc.ncookies, 3387 pos, rv); 3388 freemsg(mp); 3389 3390 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3391 dp->dring_mtype, dp->handle, pos, pos, 3392 VIO_DESC_DONE); 3393 ldcp->ldc_stats.ierrors++; 3394 break; 3395 } else { 3396 D2(vswp, "%s(%d): copied in %ld bytes" 3397 " using %d cookies", __func__, 3398 ldcp->ldc_id, nbytes, desc.ncookies); 3399 } 3400 3401 /* adjust the read pointer to skip over the padding */ 3402 mp->b_rptr += VNET_IPALIGN; 3403 3404 /* point to the actual end of data */ 3405 mp->b_wptr = mp->b_rptr + desc.nbytes; 3406 3407 /* update statistics */ 3408 ehp = (struct ether_header *)mp->b_rptr; 3409 if (IS_BROADCAST(ehp)) 3410 ldcp->ldc_stats.brdcstrcv++; 3411 else if (IS_MULTICAST(ehp)) 3412 ldcp->ldc_stats.multircv++; 3413 3414 ldcp->ldc_stats.ipackets++; 3415 ldcp->ldc_stats.rbytes += desc.nbytes; 3416 3417 /* 3418 * IPALIGN space can be used for VLAN_TAG 3419 */ 3420 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3421 VSW_VNETPORT, mp); 3422 3423 /* build a chain of received packets */ 3424 if (bp == NULL) { 3425 /* first pkt */ 3426 bp = mp; 3427 bp->b_next = bp->b_prev = NULL; 3428 bpt = bp; 3429 chain = 1; 3430 } else { 3431 mp->b_next = mp->b_prev = NULL; 3432 bpt->b_next = mp; 3433 bpt = mp; 3434 chain++; 3435 } 3436 3437 /* mark we are finished with this descriptor */ 3438 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3439 dp->dring_mtype, dp->handle, pos, pos, 3440 VIO_DESC_DONE)) != 0) { 3441 DERR(vswp, "%s(%lld): unable to update " 3442 "dstate at pos %d: err %d", 3443 __func__, pos, ldcp->ldc_id, rng_rv); 3444 ldcp->ldc_stats.ierrors++; 3445 break; 3446 } 3447 3448 /* 3449 * Send an ACK back to peer if requested. 3450 */ 3451 if (desc.hdr.ack) { 3452 dring_pkt->start_idx = range_start; 3453 dring_pkt->end_idx = range_end; 3454 3455 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3456 " requested", __func__, ldcp->ldc_id, 3457 dring_pkt->start_idx, dring_pkt->end_idx); 3458 3459 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3460 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3461 dring_pkt->tag.vio_sid = ldcp->local_session; 3462 3463 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3464 sizeof (vio_dring_msg_t), B_FALSE); 3465 3466 /* 3467 * Check if ACK was successfully sent. If not 3468 * we break and deal with that below. 3469 */ 3470 if (msg_rv != 0) 3471 break; 3472 3473 prev_desc_ack = B_TRUE; 3474 range_start = pos; 3475 } 3476 3477 /* next descriptor */ 3478 pos = (pos + 1) % len; 3479 cnt++; 3480 3481 /* 3482 * Break out of loop here and stop processing to 3483 * allow some other network device (or disk) to 3484 * get access to the cpu. 3485 */ 3486 if (chain > vsw_chain_len) { 3487 D3(vswp, "%s(%lld): switching chain of %d " 3488 "msgs", __func__, ldcp->ldc_id, chain); 3489 break; 3490 } 3491 } 3492 RW_EXIT(&ldcp->lane_in.dlistrw); 3493 3494 /* send the chain of packets to be switched */ 3495 if (bp != NULL) { 3496 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3497 D3(vswp, "%s(%lld): switching chain of %d msgs", 3498 __func__, ldcp->ldc_id, chain); 3499 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3500 ldcp->ldc_port, NULL); 3501 } 3502 3503 /* 3504 * If when we encountered an error when attempting to 3505 * access an imported dring, initiate a connection reset. 3506 */ 3507 if (rng_rv != 0) { 3508 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3509 break; 3510 } 3511 3512 /* 3513 * If when we attempted to send the ACK we found that the 3514 * channel had been reset then now handle this. We deal with 3515 * it here as we cannot reset the channel while holding the 3516 * dlistrw lock, and we don't want to acquire/release it 3517 * continuously in the above loop, as a channel reset should 3518 * be a rare event. 3519 */ 3520 if (msg_rv == ECONNRESET) { 3521 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3522 break; 3523 } 3524 3525 DTRACE_PROBE1(msg_cnt, int, cnt); 3526 3527 /* 3528 * We are now finished so ACK back with the state 3529 * set to STOPPING so our peer knows we are finished 3530 */ 3531 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3532 dring_pkt->tag.vio_sid = ldcp->local_session; 3533 3534 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3535 3536 DTRACE_PROBE(stop_process_sent); 3537 3538 /* 3539 * We have not processed any more descriptors beyond 3540 * the last one we ACK'd. 3541 */ 3542 if (prev_desc_ack) 3543 range_start = range_end; 3544 3545 dring_pkt->start_idx = range_start; 3546 dring_pkt->end_idx = range_end; 3547 3548 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3549 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3550 dring_pkt->end_idx); 3551 3552 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3553 sizeof (vio_dring_msg_t), B_TRUE); 3554 break; 3555 3556 case VIO_SUBTYPE_ACK: 3557 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3558 /* 3559 * Verify that the relevant descriptors are all 3560 * marked as DONE 3561 */ 3562 READ_ENTER(&ldcp->lane_out.dlistrw); 3563 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3564 dring_pkt->dring_ident)) == NULL) { 3565 RW_EXIT(&ldcp->lane_out.dlistrw); 3566 DERR(vswp, "%s: unknown ident in ACK", __func__); 3567 return; 3568 } 3569 3570 start = end = 0; 3571 start = dring_pkt->start_idx; 3572 end = dring_pkt->end_idx; 3573 len = dp->num_descriptors; 3574 3575 3576 mutex_enter(&dp->dlock); 3577 dp->last_ack_recv = end; 3578 ldcp->ldc_stats.dring_data_acks++; 3579 mutex_exit(&dp->dlock); 3580 3581 (void) vsw_reclaim_dring(dp, start); 3582 3583 /* 3584 * If our peer is stopping processing descriptors then 3585 * we check to make sure it has processed all the descriptors 3586 * we have updated. If not then we send it a new message 3587 * to prompt it to restart. 3588 */ 3589 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3590 DTRACE_PROBE(stop_process_recv); 3591 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3592 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3593 dring_pkt->end_idx); 3594 3595 /* 3596 * Check next descriptor in public section of ring. 3597 * If its marked as READY then we need to prompt our 3598 * peer to start processing the ring again. 3599 */ 3600 i = (end + 1) % len; 3601 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3602 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3603 3604 /* 3605 * Hold the restart lock across all of this to 3606 * make sure that its not possible for us to 3607 * decide that a msg needs to be sent in the future 3608 * but the sending code having already checked is 3609 * about to exit. 3610 */ 3611 mutex_enter(&dp->restart_lock); 3612 ldcp->ldc_stats.dring_stopped_acks++; 3613 mutex_enter(&priv_addr->dstate_lock); 3614 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3615 3616 mutex_exit(&priv_addr->dstate_lock); 3617 3618 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3619 dring_pkt->tag.vio_sid = ldcp->local_session; 3620 3621 dring_pkt->start_idx = (end + 1) % len; 3622 dring_pkt->end_idx = -1; 3623 3624 D2(vswp, "%s(%lld) : sending restart msg:" 3625 " %d : %d", __func__, ldcp->ldc_id, 3626 dring_pkt->start_idx, dring_pkt->end_idx); 3627 3628 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3629 sizeof (vio_dring_msg_t), B_FALSE); 3630 ldcp->ldc_stats.dring_data_msgs++; 3631 3632 } else { 3633 mutex_exit(&priv_addr->dstate_lock); 3634 dp->restart_reqd = B_TRUE; 3635 } 3636 mutex_exit(&dp->restart_lock); 3637 } 3638 RW_EXIT(&ldcp->lane_out.dlistrw); 3639 3640 /* only do channel reset after dropping dlistrw lock */ 3641 if (msg_rv == ECONNRESET) 3642 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3643 3644 break; 3645 3646 case VIO_SUBTYPE_NACK: 3647 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3648 __func__, ldcp->ldc_id); 3649 /* 3650 * Something is badly wrong if we are getting NACK's 3651 * for our data pkts. So reset the channel. 3652 */ 3653 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3654 3655 break; 3656 3657 default: 3658 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3659 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3660 } 3661 3662 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3663 } 3664 3665 /* 3666 * dummy pkt data handler function for vnet protocol version 1.0 3667 */ 3668 static void 3669 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3670 { 3671 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3672 } 3673 3674 /* 3675 * This function handles raw pkt data messages received over the channel. 3676 * Currently, only priority-eth-type frames are received through this mechanism. 3677 * In this case, the frame(data) is present within the message itself which 3678 * is copied into an mblk before switching it. 3679 */ 3680 static void 3681 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3682 { 3683 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3684 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3685 uint32_t size; 3686 mblk_t *mp; 3687 vsw_t *vswp = ldcp->ldc_vswp; 3688 vgen_stats_t *statsp = &ldcp->ldc_stats; 3689 lane_t *lp = &ldcp->lane_out; 3690 3691 size = msglen - VIO_PKT_DATA_HDRSIZE; 3692 if (size < ETHERMIN || size > lp->mtu) { 3693 (void) atomic_inc_32(&statsp->rx_pri_fail); 3694 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3695 ldcp->ldc_id, size); 3696 return; 3697 } 3698 3699 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3700 if (mp == NULL) { 3701 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3702 if (mp == NULL) { 3703 (void) atomic_inc_32(&statsp->rx_pri_fail); 3704 DWARN(vswp, "%s(%lld) allocb failure, " 3705 "unable to process priority frame\n", __func__, 3706 ldcp->ldc_id); 3707 return; 3708 } 3709 } 3710 3711 /* skip over the extra space for vlan tag */ 3712 mp->b_rptr += VLAN_TAGSZ; 3713 3714 /* copy the frame from the payload of raw data msg into the mblk */ 3715 bcopy(dpkt->data, mp->b_rptr, size); 3716 mp->b_wptr = mp->b_rptr + size; 3717 3718 /* update stats */ 3719 (void) atomic_inc_64(&statsp->rx_pri_packets); 3720 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3721 3722 /* 3723 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3724 */ 3725 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3726 3727 /* switch the frame to destination */ 3728 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3729 } 3730 3731 /* 3732 * Process an in-band descriptor message (most likely from 3733 * OBP). 3734 */ 3735 static void 3736 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3737 { 3738 vnet_ibnd_desc_t *ibnd_desc; 3739 dring_info_t *dp = NULL; 3740 vsw_private_desc_t *priv_addr = NULL; 3741 vsw_t *vswp = ldcp->ldc_vswp; 3742 mblk_t *mp = NULL; 3743 size_t nbytes = 0; 3744 size_t off = 0; 3745 uint64_t idx = 0; 3746 uint32_t num = 1, len, datalen = 0; 3747 uint64_t ncookies = 0; 3748 int i, rv; 3749 int j = 0; 3750 3751 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3752 3753 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3754 3755 switch (ibnd_desc->hdr.tag.vio_subtype) { 3756 case VIO_SUBTYPE_INFO: 3757 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3758 3759 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3760 return; 3761 3762 /* 3763 * Data is padded to align on a 8 byte boundary, 3764 * nbytes is actual data length, i.e. minus that 3765 * padding. 3766 */ 3767 datalen = ibnd_desc->nbytes; 3768 3769 D2(vswp, "%s(%lld): processing inband desc : " 3770 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3771 3772 ncookies = ibnd_desc->ncookies; 3773 3774 /* 3775 * allocb(9F) returns an aligned data block. We 3776 * need to ensure that we ask ldc for an aligned 3777 * number of bytes also. 3778 */ 3779 nbytes = datalen; 3780 if (nbytes & 0x7) { 3781 off = 8 - (nbytes & 0x7); 3782 nbytes += off; 3783 } 3784 3785 /* alloc extra space for VLAN_TAG */ 3786 mp = allocb(datalen + 8, BPRI_MED); 3787 if (mp == NULL) { 3788 DERR(vswp, "%s(%lld): allocb failed", 3789 __func__, ldcp->ldc_id); 3790 ldcp->ldc_stats.rx_allocb_fail++; 3791 return; 3792 } 3793 3794 /* skip over the extra space for VLAN_TAG */ 3795 mp->b_rptr += 8; 3796 3797 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3798 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3799 LDC_COPY_IN); 3800 3801 if (rv != 0) { 3802 DERR(vswp, "%s(%d): unable to copy in data from " 3803 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3804 freemsg(mp); 3805 ldcp->ldc_stats.ierrors++; 3806 return; 3807 } 3808 3809 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3810 __func__, ldcp->ldc_id, nbytes, ncookies); 3811 3812 /* point to the actual end of data */ 3813 mp->b_wptr = mp->b_rptr + datalen; 3814 ldcp->ldc_stats.ipackets++; 3815 ldcp->ldc_stats.rbytes += datalen; 3816 3817 /* 3818 * We ACK back every in-band descriptor message we process 3819 */ 3820 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3821 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3822 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3823 sizeof (vnet_ibnd_desc_t), B_TRUE); 3824 3825 /* 3826 * there is extra space alloc'd for VLAN_TAG 3827 */ 3828 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3829 3830 /* send the packet to be switched */ 3831 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3832 ldcp->ldc_port, NULL); 3833 3834 break; 3835 3836 case VIO_SUBTYPE_ACK: 3837 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3838 3839 /* Verify the ACK is valid */ 3840 idx = ibnd_desc->hdr.desc_handle; 3841 3842 if (idx >= vsw_ntxds) { 3843 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3844 "(idx %ld)", vswp->instance, idx); 3845 return; 3846 } 3847 3848 if ((dp = ldcp->lane_out.dringp) == NULL) { 3849 DERR(vswp, "%s: no dring found", __func__); 3850 return; 3851 } 3852 3853 len = dp->num_descriptors; 3854 /* 3855 * If the descriptor we are being ACK'ed for is not the 3856 * one we expected, then pkts were lost somwhere, either 3857 * when we tried to send a msg, or a previous ACK msg from 3858 * our peer. In either case we now reclaim the descriptors 3859 * in the range from the last ACK we received up to the 3860 * current ACK. 3861 */ 3862 if (idx != dp->last_ack_recv) { 3863 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3864 __func__, dp->last_ack_recv, idx); 3865 num = idx >= dp->last_ack_recv ? 3866 idx - dp->last_ack_recv + 1: 3867 (len - dp->last_ack_recv + 1) + idx; 3868 } 3869 3870 /* 3871 * When we sent the in-band message to our peer we 3872 * marked the copy in our private ring as READY. We now 3873 * check that the descriptor we are being ACK'ed for is in 3874 * fact READY, i.e. it is one we have shared with our peer. 3875 * 3876 * If its not we flag an error, but still reset the descr 3877 * back to FREE. 3878 */ 3879 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3880 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3881 mutex_enter(&priv_addr->dstate_lock); 3882 if (priv_addr->dstate != VIO_DESC_READY) { 3883 DERR(vswp, "%s: (%ld) desc at index %ld not " 3884 "READY (0x%lx)", __func__, 3885 ldcp->ldc_id, idx, priv_addr->dstate); 3886 DERR(vswp, "%s: bound %d: ncookies %ld : " 3887 "datalen %ld", __func__, 3888 priv_addr->bound, priv_addr->ncookies, 3889 priv_addr->datalen); 3890 } 3891 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3892 ldcp->ldc_id, idx); 3893 /* release resources associated with sent msg */ 3894 priv_addr->datalen = 0; 3895 priv_addr->dstate = VIO_DESC_FREE; 3896 mutex_exit(&priv_addr->dstate_lock); 3897 } 3898 /* update to next expected value */ 3899 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3900 3901 break; 3902 3903 case VIO_SUBTYPE_NACK: 3904 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3905 3906 /* 3907 * We should only get a NACK if our peer doesn't like 3908 * something about a message we have sent it. If this 3909 * happens we just release the resources associated with 3910 * the message. (We are relying on higher layers to decide 3911 * whether or not to resend. 3912 */ 3913 3914 /* limit check */ 3915 idx = ibnd_desc->hdr.desc_handle; 3916 3917 if (idx >= vsw_ntxds) { 3918 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3919 __func__, idx); 3920 return; 3921 } 3922 3923 if ((dp = ldcp->lane_out.dringp) == NULL) { 3924 DERR(vswp, "%s: no dring found", __func__); 3925 return; 3926 } 3927 3928 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3929 3930 /* move to correct location in ring */ 3931 priv_addr += idx; 3932 3933 /* release resources associated with sent msg */ 3934 mutex_enter(&priv_addr->dstate_lock); 3935 priv_addr->datalen = 0; 3936 priv_addr->dstate = VIO_DESC_FREE; 3937 mutex_exit(&priv_addr->dstate_lock); 3938 3939 break; 3940 3941 default: 3942 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3943 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3944 } 3945 3946 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3947 } 3948 3949 static void 3950 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3951 { 3952 _NOTE(ARGUNUSED(epkt)) 3953 3954 vsw_t *vswp = ldcp->ldc_vswp; 3955 uint16_t env = tagp->vio_subtype_env; 3956 3957 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3958 3959 /* 3960 * Error vio_subtypes have yet to be defined. So for 3961 * the moment we can't do anything. 3962 */ 3963 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3964 3965 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3966 } 3967 3968 /* transmit the packet over the given port */ 3969 int 3970 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3971 { 3972 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3973 vsw_ldc_t *ldcp; 3974 int status = 0; 3975 uint32_t n; 3976 3977 READ_ENTER(&ldcl->lockrw); 3978 /* 3979 * Note for now, we have a single channel. 3980 */ 3981 ldcp = ldcl->head; 3982 if (ldcp == NULL) { 3983 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3984 freemsgchain(mp); 3985 RW_EXIT(&ldcl->lockrw); 3986 return (1); 3987 } 3988 3989 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3990 3991 count -= n; 3992 if (count == 0) { 3993 goto vsw_portsend_exit; 3994 } 3995 3996 status = ldcp->tx(ldcp, mp, mpt, count); 3997 3998 vsw_portsend_exit: 3999 RW_EXIT(&ldcl->lockrw); 4000 4001 return (status); 4002 } 4003 4004 /* 4005 * Break up frames into 2 seperate chains: normal and 4006 * priority, based on the frame type. The number of 4007 * priority frames is also counted and returned. 4008 * 4009 * Params: 4010 * vswp: pointer to the instance of vsw 4011 * np: head of packet chain to be broken 4012 * npt: tail of packet chain to be broken 4013 * 4014 * Returns: 4015 * np: head of normal data packets 4016 * npt: tail of normal data packets 4017 * hp: head of high priority packets 4018 * hpt: tail of high priority packets 4019 */ 4020 static uint32_t 4021 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4022 mblk_t **hp, mblk_t **hpt) 4023 { 4024 mblk_t *tmp = NULL; 4025 mblk_t *smp = NULL; 4026 mblk_t *hmp = NULL; /* high prio pkts head */ 4027 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4028 mblk_t *nmp = NULL; /* normal pkts head */ 4029 mblk_t *nmpt = NULL; /* normal pkts tail */ 4030 uint32_t count = 0; 4031 int i; 4032 struct ether_header *ehp; 4033 uint32_t num_types; 4034 uint16_t *types; 4035 4036 tmp = *np; 4037 while (tmp != NULL) { 4038 4039 smp = tmp; 4040 tmp = tmp->b_next; 4041 smp->b_next = NULL; 4042 smp->b_prev = NULL; 4043 4044 ehp = (struct ether_header *)smp->b_rptr; 4045 num_types = vswp->pri_num_types; 4046 types = vswp->pri_types; 4047 for (i = 0; i < num_types; i++) { 4048 if (ehp->ether_type == types[i]) { 4049 /* high priority frame */ 4050 4051 if (hmp != NULL) { 4052 hmpt->b_next = smp; 4053 hmpt = smp; 4054 } else { 4055 hmp = hmpt = smp; 4056 } 4057 count++; 4058 break; 4059 } 4060 } 4061 if (i == num_types) { 4062 /* normal data frame */ 4063 4064 if (nmp != NULL) { 4065 nmpt->b_next = smp; 4066 nmpt = smp; 4067 } else { 4068 nmp = nmpt = smp; 4069 } 4070 } 4071 } 4072 4073 *hp = hmp; 4074 *hpt = hmpt; 4075 *np = nmp; 4076 *npt = nmpt; 4077 4078 return (count); 4079 } 4080 4081 /* 4082 * Wrapper function to transmit normal and/or priority frames over the channel. 4083 */ 4084 static int 4085 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4086 { 4087 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4088 mblk_t *tmp; 4089 mblk_t *smp; 4090 mblk_t *hmp; /* high prio pkts head */ 4091 mblk_t *hmpt; /* high prio pkts tail */ 4092 mblk_t *nmp; /* normal pkts head */ 4093 mblk_t *nmpt; /* normal pkts tail */ 4094 uint32_t n = 0; 4095 vsw_t *vswp = ldcp->ldc_vswp; 4096 4097 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4098 ASSERT(count != 0); 4099 4100 nmp = mp; 4101 nmpt = mpt; 4102 4103 /* gather any priority frames from the chain of packets */ 4104 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4105 4106 /* transmit priority frames */ 4107 tmp = hmp; 4108 while (tmp != NULL) { 4109 smp = tmp; 4110 tmp = tmp->b_next; 4111 smp->b_next = NULL; 4112 vsw_ldcsend_pkt(ldcp, smp); 4113 } 4114 4115 count -= n; 4116 4117 if (count == 0) { 4118 /* no normal data frames to process */ 4119 return (0); 4120 } 4121 4122 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4123 } 4124 4125 /* 4126 * Wrapper function to transmit normal frames over the channel. 4127 */ 4128 static int 4129 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4130 { 4131 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4132 mblk_t *tmp = NULL; 4133 4134 ASSERT(count != 0); 4135 /* 4136 * If the TX thread is enabled, then queue the 4137 * ordinary frames and signal the tx thread. 4138 */ 4139 if (ldcp->tx_thread != NULL) { 4140 4141 mutex_enter(&ldcp->tx_thr_lock); 4142 4143 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4144 /* 4145 * If we reached queue limit, 4146 * do not queue new packets, 4147 * drop them. 4148 */ 4149 ldcp->ldc_stats.tx_qfull += count; 4150 mutex_exit(&ldcp->tx_thr_lock); 4151 freemsgchain(mp); 4152 goto exit; 4153 } 4154 if (ldcp->tx_mhead == NULL) { 4155 ldcp->tx_mhead = mp; 4156 ldcp->tx_mtail = mpt; 4157 cv_signal(&ldcp->tx_thr_cv); 4158 } else { 4159 ldcp->tx_mtail->b_next = mp; 4160 ldcp->tx_mtail = mpt; 4161 } 4162 ldcp->tx_cnt += count; 4163 mutex_exit(&ldcp->tx_thr_lock); 4164 } else { 4165 while (mp != NULL) { 4166 tmp = mp->b_next; 4167 mp->b_next = mp->b_prev = NULL; 4168 (void) vsw_ldcsend(ldcp, mp, 1); 4169 mp = tmp; 4170 } 4171 } 4172 4173 exit: 4174 return (0); 4175 } 4176 4177 /* 4178 * This function transmits the frame in the payload of a raw data 4179 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4180 * send special frames with high priorities, without going through 4181 * the normal data path which uses descriptor ring mechanism. 4182 */ 4183 static void 4184 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4185 { 4186 vio_raw_data_msg_t *pkt; 4187 mblk_t *bp; 4188 mblk_t *nmp = NULL; 4189 caddr_t dst; 4190 uint32_t mblksz; 4191 uint32_t size; 4192 uint32_t nbytes; 4193 int rv; 4194 vsw_t *vswp = ldcp->ldc_vswp; 4195 vgen_stats_t *statsp = &ldcp->ldc_stats; 4196 4197 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4198 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4199 (void) atomic_inc_32(&statsp->tx_pri_fail); 4200 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4201 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4202 ldcp->lane_out.lstate); 4203 goto send_pkt_exit; 4204 } 4205 4206 size = msgsize(mp); 4207 4208 /* frame size bigger than available payload len of raw data msg ? */ 4209 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4210 (void) atomic_inc_32(&statsp->tx_pri_fail); 4211 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4212 ldcp->ldc_id, size); 4213 goto send_pkt_exit; 4214 } 4215 4216 if (size < ETHERMIN) 4217 size = ETHERMIN; 4218 4219 /* alloc space for a raw data message */ 4220 nmp = vio_allocb(vswp->pri_tx_vmp); 4221 if (nmp == NULL) { 4222 (void) atomic_inc_32(&statsp->tx_pri_fail); 4223 DWARN(vswp, "vio_allocb failed\n"); 4224 goto send_pkt_exit; 4225 } 4226 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4227 4228 /* copy frame into the payload of raw data message */ 4229 dst = (caddr_t)pkt->data; 4230 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4231 mblksz = MBLKL(bp); 4232 bcopy(bp->b_rptr, dst, mblksz); 4233 dst += mblksz; 4234 } 4235 4236 /* setup the raw data msg */ 4237 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4238 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4239 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4240 pkt->tag.vio_sid = ldcp->local_session; 4241 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4242 4243 /* send the msg over ldc */ 4244 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4245 if (rv != 0) { 4246 (void) atomic_inc_32(&statsp->tx_pri_fail); 4247 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4248 ldcp->ldc_id); 4249 goto send_pkt_exit; 4250 } 4251 4252 /* update stats */ 4253 (void) atomic_inc_64(&statsp->tx_pri_packets); 4254 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4255 4256 send_pkt_exit: 4257 if (nmp != NULL) 4258 freemsg(nmp); 4259 freemsg(mp); 4260 } 4261 4262 /* 4263 * Transmit the packet over the given LDC channel. 4264 * 4265 * The 'retries' argument indicates how many times a packet 4266 * is retried before it is dropped. Note, the retry is done 4267 * only for a resource related failure, for all other failures 4268 * the packet is dropped immediately. 4269 */ 4270 static int 4271 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4272 { 4273 int i; 4274 int rc; 4275 int status = 0; 4276 vsw_port_t *port = ldcp->ldc_port; 4277 dring_info_t *dp = NULL; 4278 4279 4280 for (i = 0; i < retries; ) { 4281 /* 4282 * Send the message out using the appropriate 4283 * transmit function which will free mblock when it 4284 * is finished with it. 4285 */ 4286 mutex_enter(&port->tx_lock); 4287 if (port->transmit != NULL) { 4288 status = (*port->transmit)(ldcp, mp); 4289 } 4290 if (status == LDC_TX_SUCCESS) { 4291 mutex_exit(&port->tx_lock); 4292 break; 4293 } 4294 i++; /* increment the counter here */ 4295 4296 /* If its the last retry, then update the oerror */ 4297 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4298 ldcp->ldc_stats.oerrors++; 4299 } 4300 mutex_exit(&port->tx_lock); 4301 4302 if (status != LDC_TX_NORESOURCES) { 4303 /* 4304 * No retrying required for errors un-related 4305 * to resources. 4306 */ 4307 break; 4308 } 4309 READ_ENTER(&ldcp->lane_out.dlistrw); 4310 if (((dp = ldcp->lane_out.dringp) != NULL) && 4311 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4312 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4313 ((VSW_VER_LT(ldcp, 1, 2) && 4314 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4315 rc = vsw_reclaim_dring(dp, dp->end_idx); 4316 } else { 4317 /* 4318 * If there is no dring or the xfer_mode is 4319 * set to DESC_MODE(ie., OBP), then simply break here. 4320 */ 4321 RW_EXIT(&ldcp->lane_out.dlistrw); 4322 break; 4323 } 4324 RW_EXIT(&ldcp->lane_out.dlistrw); 4325 4326 /* 4327 * Delay only if none were reclaimed 4328 * and its not the last retry. 4329 */ 4330 if ((rc == 0) && (i < retries)) { 4331 delay(drv_usectohz(vsw_ldc_tx_delay)); 4332 } 4333 } 4334 freemsg(mp); 4335 return (status); 4336 } 4337 4338 /* 4339 * Send packet out via descriptor ring to a logical device. 4340 */ 4341 static int 4342 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4343 { 4344 vio_dring_msg_t dring_pkt; 4345 dring_info_t *dp = NULL; 4346 vsw_private_desc_t *priv_desc = NULL; 4347 vnet_public_desc_t *pub = NULL; 4348 vsw_t *vswp = ldcp->ldc_vswp; 4349 mblk_t *bp; 4350 size_t n, size; 4351 caddr_t bufp; 4352 int idx; 4353 int status = LDC_TX_SUCCESS; 4354 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4355 lane_t *lp = &ldcp->lane_out; 4356 4357 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4358 4359 /* TODO: make test a macro */ 4360 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4361 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4362 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4363 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4364 ldcp->lane_out.lstate); 4365 ldcp->ldc_stats.oerrors++; 4366 return (LDC_TX_FAILURE); 4367 } 4368 4369 /* 4370 * Note - using first ring only, this may change 4371 * in the future. 4372 */ 4373 READ_ENTER(&ldcp->lane_out.dlistrw); 4374 if ((dp = ldcp->lane_out.dringp) == NULL) { 4375 RW_EXIT(&ldcp->lane_out.dlistrw); 4376 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4377 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4378 ldcp->ldc_stats.oerrors++; 4379 return (LDC_TX_FAILURE); 4380 } 4381 4382 size = msgsize(mp); 4383 if (size > (size_t)lp->mtu) { 4384 RW_EXIT(&ldcp->lane_out.dlistrw); 4385 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4386 ldcp->ldc_id, size); 4387 ldcp->ldc_stats.oerrors++; 4388 return (LDC_TX_FAILURE); 4389 } 4390 4391 /* 4392 * Find a free descriptor 4393 * 4394 * Note: for the moment we are assuming that we will only 4395 * have one dring going from the switch to each of its 4396 * peers. This may change in the future. 4397 */ 4398 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4399 D2(vswp, "%s(%lld): no descriptor available for ring " 4400 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4401 4402 /* nothing more we can do */ 4403 status = LDC_TX_NORESOURCES; 4404 ldcp->ldc_stats.tx_no_desc++; 4405 goto vsw_dringsend_free_exit; 4406 } else { 4407 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4408 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4409 } 4410 4411 /* copy data into the descriptor */ 4412 bufp = priv_desc->datap; 4413 bufp += VNET_IPALIGN; 4414 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4415 n = MBLKL(bp); 4416 bcopy(bp->b_rptr, bufp, n); 4417 bufp += n; 4418 } 4419 4420 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4421 4422 pub = priv_desc->descp; 4423 pub->nbytes = priv_desc->datalen; 4424 4425 /* update statistics */ 4426 if (IS_BROADCAST(ehp)) 4427 ldcp->ldc_stats.brdcstxmt++; 4428 else if (IS_MULTICAST(ehp)) 4429 ldcp->ldc_stats.multixmt++; 4430 ldcp->ldc_stats.opackets++; 4431 ldcp->ldc_stats.obytes += priv_desc->datalen; 4432 4433 mutex_enter(&priv_desc->dstate_lock); 4434 pub->hdr.dstate = VIO_DESC_READY; 4435 mutex_exit(&priv_desc->dstate_lock); 4436 4437 /* 4438 * Determine whether or not we need to send a message to our 4439 * peer prompting them to read our newly updated descriptor(s). 4440 */ 4441 mutex_enter(&dp->restart_lock); 4442 if (dp->restart_reqd) { 4443 dp->restart_reqd = B_FALSE; 4444 ldcp->ldc_stats.dring_data_msgs++; 4445 mutex_exit(&dp->restart_lock); 4446 4447 /* 4448 * Send a vio_dring_msg to peer to prompt them to read 4449 * the updated descriptor ring. 4450 */ 4451 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4452 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4453 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4454 dring_pkt.tag.vio_sid = ldcp->local_session; 4455 4456 /* Note - for now using first ring */ 4457 dring_pkt.dring_ident = dp->ident; 4458 4459 /* 4460 * If last_ack_recv is -1 then we know we've not 4461 * received any ack's yet, so this must be the first 4462 * msg sent, so set the start to the begining of the ring. 4463 */ 4464 mutex_enter(&dp->dlock); 4465 if (dp->last_ack_recv == -1) { 4466 dring_pkt.start_idx = 0; 4467 } else { 4468 dring_pkt.start_idx = 4469 (dp->last_ack_recv + 1) % dp->num_descriptors; 4470 } 4471 dring_pkt.end_idx = -1; 4472 mutex_exit(&dp->dlock); 4473 4474 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4475 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4476 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4477 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4478 dring_pkt.end_idx); 4479 4480 RW_EXIT(&ldcp->lane_out.dlistrw); 4481 4482 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4483 sizeof (vio_dring_msg_t), B_TRUE); 4484 4485 return (status); 4486 4487 } else { 4488 mutex_exit(&dp->restart_lock); 4489 D2(vswp, "%s(%lld): updating descp %d", __func__, 4490 ldcp->ldc_id, idx); 4491 } 4492 4493 vsw_dringsend_free_exit: 4494 4495 RW_EXIT(&ldcp->lane_out.dlistrw); 4496 4497 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4498 return (status); 4499 } 4500 4501 /* 4502 * Send an in-band descriptor message over ldc. 4503 */ 4504 static int 4505 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4506 { 4507 vsw_t *vswp = ldcp->ldc_vswp; 4508 vnet_ibnd_desc_t ibnd_msg; 4509 vsw_private_desc_t *priv_desc = NULL; 4510 dring_info_t *dp = NULL; 4511 size_t n, size = 0; 4512 caddr_t bufp; 4513 mblk_t *bp; 4514 int idx, i; 4515 int status = LDC_TX_SUCCESS; 4516 static int warn_msg = 1; 4517 lane_t *lp = &ldcp->lane_out; 4518 4519 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4520 4521 ASSERT(mp != NULL); 4522 4523 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4524 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4525 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4526 __func__, ldcp->ldc_id, ldcp->ldc_status, 4527 ldcp->lane_out.lstate); 4528 ldcp->ldc_stats.oerrors++; 4529 return (LDC_TX_FAILURE); 4530 } 4531 4532 /* 4533 * only expect single dring to exist, which we use 4534 * as an internal buffer, rather than a transfer channel. 4535 */ 4536 READ_ENTER(&ldcp->lane_out.dlistrw); 4537 if ((dp = ldcp->lane_out.dringp) == NULL) { 4538 DERR(vswp, "%s(%lld): no dring for outbound lane", 4539 __func__, ldcp->ldc_id); 4540 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4541 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4542 RW_EXIT(&ldcp->lane_out.dlistrw); 4543 ldcp->ldc_stats.oerrors++; 4544 return (LDC_TX_FAILURE); 4545 } 4546 4547 size = msgsize(mp); 4548 if (size > (size_t)lp->mtu) { 4549 RW_EXIT(&ldcp->lane_out.dlistrw); 4550 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4551 ldcp->ldc_id, size); 4552 ldcp->ldc_stats.oerrors++; 4553 return (LDC_TX_FAILURE); 4554 } 4555 4556 /* 4557 * Find a free descriptor in our buffer ring 4558 */ 4559 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4560 RW_EXIT(&ldcp->lane_out.dlistrw); 4561 if (warn_msg) { 4562 DERR(vswp, "%s(%lld): no descriptor available for ring " 4563 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4564 warn_msg = 0; 4565 } 4566 4567 /* nothing more we can do */ 4568 status = LDC_TX_NORESOURCES; 4569 goto vsw_descrsend_free_exit; 4570 } else { 4571 D2(vswp, "%s(%lld): free private descriptor found at pos " 4572 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4573 warn_msg = 1; 4574 } 4575 4576 /* copy data into the descriptor */ 4577 bufp = priv_desc->datap; 4578 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4579 n = MBLKL(bp); 4580 bcopy(bp->b_rptr, bufp, n); 4581 bufp += n; 4582 } 4583 4584 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4585 4586 /* create and send the in-band descp msg */ 4587 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4588 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4589 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4590 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4591 4592 /* 4593 * Copy the mem cookies describing the data from the 4594 * private region of the descriptor ring into the inband 4595 * descriptor. 4596 */ 4597 for (i = 0; i < priv_desc->ncookies; i++) { 4598 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4599 sizeof (ldc_mem_cookie_t)); 4600 } 4601 4602 ibnd_msg.hdr.desc_handle = idx; 4603 ibnd_msg.ncookies = priv_desc->ncookies; 4604 ibnd_msg.nbytes = size; 4605 4606 ldcp->ldc_stats.opackets++; 4607 ldcp->ldc_stats.obytes += size; 4608 4609 RW_EXIT(&ldcp->lane_out.dlistrw); 4610 4611 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4612 sizeof (vnet_ibnd_desc_t), B_TRUE); 4613 4614 vsw_descrsend_free_exit: 4615 4616 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4617 return (status); 4618 } 4619 4620 static void 4621 vsw_send_ver(void *arg) 4622 { 4623 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4624 vsw_t *vswp = ldcp->ldc_vswp; 4625 lane_t *lp = &ldcp->lane_out; 4626 vio_ver_msg_t ver_msg; 4627 4628 D1(vswp, "%s enter", __func__); 4629 4630 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4631 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4632 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4633 ver_msg.tag.vio_sid = ldcp->local_session; 4634 4635 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4636 ver_msg.ver_major = vsw_versions[0].ver_major; 4637 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4638 } else { 4639 /* use the major,minor that we've ack'd */ 4640 lane_t *lpi = &ldcp->lane_in; 4641 ver_msg.ver_major = lpi->ver_major; 4642 ver_msg.ver_minor = lpi->ver_minor; 4643 } 4644 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4645 4646 lp->lstate |= VSW_VER_INFO_SENT; 4647 lp->ver_major = ver_msg.ver_major; 4648 lp->ver_minor = ver_msg.ver_minor; 4649 4650 DUMP_TAG(ver_msg.tag); 4651 4652 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4653 4654 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4655 } 4656 4657 static void 4658 vsw_send_attr(vsw_ldc_t *ldcp) 4659 { 4660 vsw_t *vswp = ldcp->ldc_vswp; 4661 lane_t *lp = &ldcp->lane_out; 4662 vnet_attr_msg_t attr_msg; 4663 4664 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4665 4666 /* 4667 * Subtype is set to INFO by default 4668 */ 4669 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4670 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4671 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4672 attr_msg.tag.vio_sid = ldcp->local_session; 4673 4674 /* payload copied from default settings for lane */ 4675 attr_msg.mtu = lp->mtu; 4676 attr_msg.addr_type = lp->addr_type; 4677 attr_msg.xfer_mode = lp->xfer_mode; 4678 attr_msg.ack_freq = lp->xfer_mode; 4679 4680 READ_ENTER(&vswp->if_lockrw); 4681 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4682 RW_EXIT(&vswp->if_lockrw); 4683 4684 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4685 4686 DUMP_TAG(attr_msg.tag); 4687 4688 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4689 4690 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4691 } 4692 4693 /* 4694 * Create dring info msg (which also results in the creation of 4695 * a dring). 4696 */ 4697 static vio_dring_reg_msg_t * 4698 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4699 { 4700 vio_dring_reg_msg_t *mp; 4701 dring_info_t *dp; 4702 vsw_t *vswp = ldcp->ldc_vswp; 4703 4704 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4705 4706 /* 4707 * If we can't create a dring, obviously no point sending 4708 * a message. 4709 */ 4710 if ((dp = vsw_create_dring(ldcp)) == NULL) 4711 return (NULL); 4712 4713 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4714 4715 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4716 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4717 mp->tag.vio_subtype_env = VIO_DRING_REG; 4718 mp->tag.vio_sid = ldcp->local_session; 4719 4720 /* payload */ 4721 mp->num_descriptors = dp->num_descriptors; 4722 mp->descriptor_size = dp->descriptor_size; 4723 mp->options = dp->options; 4724 mp->ncookies = dp->ncookies; 4725 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4726 4727 mp->dring_ident = 0; 4728 4729 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4730 4731 return (mp); 4732 } 4733 4734 static void 4735 vsw_send_dring_info(vsw_ldc_t *ldcp) 4736 { 4737 vio_dring_reg_msg_t *dring_msg; 4738 vsw_t *vswp = ldcp->ldc_vswp; 4739 4740 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4741 4742 dring_msg = vsw_create_dring_info_pkt(ldcp); 4743 if (dring_msg == NULL) { 4744 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4745 vswp->instance, __func__); 4746 return; 4747 } 4748 4749 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4750 4751 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4752 4753 (void) vsw_send_msg(ldcp, dring_msg, 4754 sizeof (vio_dring_reg_msg_t), B_TRUE); 4755 4756 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4757 4758 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4759 } 4760 4761 static void 4762 vsw_send_rdx(vsw_ldc_t *ldcp) 4763 { 4764 vsw_t *vswp = ldcp->ldc_vswp; 4765 vio_rdx_msg_t rdx_msg; 4766 4767 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4768 4769 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4770 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4771 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4772 rdx_msg.tag.vio_sid = ldcp->local_session; 4773 4774 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4775 4776 DUMP_TAG(rdx_msg.tag); 4777 4778 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4779 4780 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4781 } 4782 4783 /* 4784 * Generic routine to send message out over ldc channel. 4785 * 4786 * It is possible that when we attempt to write over the ldc channel 4787 * that we get notified that it has been reset. Depending on the value 4788 * of the handle_reset flag we either handle that event here or simply 4789 * notify the caller that the channel was reset. 4790 */ 4791 int 4792 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4793 { 4794 int rv; 4795 size_t msglen = size; 4796 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4797 vsw_t *vswp = ldcp->ldc_vswp; 4798 vio_dring_msg_t *dmsg; 4799 vio_raw_data_msg_t *rmsg; 4800 vnet_ibnd_desc_t *imsg; 4801 boolean_t data_msg = B_FALSE; 4802 4803 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4804 ldcp->ldc_id, size); 4805 4806 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4807 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4808 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4809 4810 mutex_enter(&ldcp->ldc_txlock); 4811 4812 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4813 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4814 dmsg = (vio_dring_msg_t *)tag; 4815 dmsg->seq_num = ldcp->lane_out.seq_num; 4816 data_msg = B_TRUE; 4817 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4818 rmsg = (vio_raw_data_msg_t *)tag; 4819 rmsg->seq_num = ldcp->lane_out.seq_num; 4820 data_msg = B_TRUE; 4821 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4822 imsg = (vnet_ibnd_desc_t *)tag; 4823 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4824 data_msg = B_TRUE; 4825 } 4826 } 4827 4828 do { 4829 msglen = size; 4830 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4831 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4832 4833 if (rv == 0 && data_msg == B_TRUE) { 4834 ldcp->lane_out.seq_num++; 4835 } 4836 4837 if ((rv != 0) || (msglen != size)) { 4838 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4839 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4840 ldcp->ldc_stats.oerrors++; 4841 } 4842 4843 mutex_exit(&ldcp->ldc_txlock); 4844 4845 /* 4846 * If channel has been reset we either handle it here or 4847 * simply report back that it has been reset and let caller 4848 * decide what to do. 4849 */ 4850 if (rv == ECONNRESET) { 4851 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4852 4853 /* 4854 * N.B - must never be holding the dlistrw lock when 4855 * we do a reset of the channel. 4856 */ 4857 if (handle_reset) { 4858 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4859 } 4860 } 4861 4862 return (rv); 4863 } 4864 4865 /* 4866 * Remove the specified address from the list of address maintained 4867 * in this port node. 4868 */ 4869 mcst_addr_t * 4870 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4871 { 4872 vsw_t *vswp = NULL; 4873 vsw_port_t *port = NULL; 4874 mcst_addr_t *prev_p = NULL; 4875 mcst_addr_t *curr_p = NULL; 4876 4877 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4878 __func__, devtype, addr); 4879 4880 if (devtype == VSW_VNETPORT) { 4881 port = (vsw_port_t *)arg; 4882 mutex_enter(&port->mca_lock); 4883 prev_p = curr_p = port->mcap; 4884 } else { 4885 vswp = (vsw_t *)arg; 4886 mutex_enter(&vswp->mca_lock); 4887 prev_p = curr_p = vswp->mcap; 4888 } 4889 4890 while (curr_p != NULL) { 4891 if (curr_p->addr == addr) { 4892 D2(NULL, "%s: address found", __func__); 4893 /* match found */ 4894 if (prev_p == curr_p) { 4895 /* list head */ 4896 if (devtype == VSW_VNETPORT) 4897 port->mcap = curr_p->nextp; 4898 else 4899 vswp->mcap = curr_p->nextp; 4900 } else { 4901 prev_p->nextp = curr_p->nextp; 4902 } 4903 break; 4904 } else { 4905 prev_p = curr_p; 4906 curr_p = curr_p->nextp; 4907 } 4908 } 4909 4910 if (devtype == VSW_VNETPORT) 4911 mutex_exit(&port->mca_lock); 4912 else 4913 mutex_exit(&vswp->mca_lock); 4914 4915 D1(NULL, "%s: exit", __func__); 4916 4917 return (curr_p); 4918 } 4919 4920 /* 4921 * Creates a descriptor ring (dring) and links it into the 4922 * link of outbound drings for this channel. 4923 * 4924 * Returns NULL if creation failed. 4925 */ 4926 static dring_info_t * 4927 vsw_create_dring(vsw_ldc_t *ldcp) 4928 { 4929 vsw_private_desc_t *priv_addr = NULL; 4930 vsw_t *vswp = ldcp->ldc_vswp; 4931 ldc_mem_info_t minfo; 4932 dring_info_t *dp, *tp; 4933 int i; 4934 4935 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4936 4937 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4938 4939 /* create public section of ring */ 4940 if ((ldc_mem_dring_create(vsw_ntxds, 4941 VSW_PUB_SIZE, &dp->handle)) != 0) { 4942 4943 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4944 "failed", ldcp->ldc_id); 4945 goto create_fail_exit; 4946 } 4947 4948 ASSERT(dp->handle != NULL); 4949 4950 /* 4951 * Get the base address of the public section of the ring. 4952 */ 4953 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4954 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4955 ldcp->ldc_id); 4956 goto dring_fail_exit; 4957 } else { 4958 ASSERT(minfo.vaddr != 0); 4959 dp->pub_addr = minfo.vaddr; 4960 } 4961 4962 dp->num_descriptors = vsw_ntxds; 4963 dp->descriptor_size = VSW_PUB_SIZE; 4964 dp->options = VIO_TX_DRING; 4965 dp->ncookies = 1; /* guaranteed by ldc */ 4966 4967 /* 4968 * create private portion of ring 4969 */ 4970 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4971 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4972 4973 if (vsw_setup_ring(ldcp, dp)) { 4974 DERR(vswp, "%s: unable to setup ring", __func__); 4975 goto dring_fail_exit; 4976 } 4977 4978 /* haven't used any descriptors yet */ 4979 dp->end_idx = 0; 4980 dp->last_ack_recv = -1; 4981 4982 /* bind dring to the channel */ 4983 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4984 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 4985 &dp->cookie[0], &dp->ncookies)) != 0) { 4986 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4987 "%lld", ldcp->ldc_id); 4988 goto dring_fail_exit; 4989 } 4990 4991 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4992 dp->restart_reqd = B_TRUE; 4993 4994 /* 4995 * Only ever create rings for outgoing lane. Link it onto 4996 * end of list. 4997 */ 4998 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4999 if (ldcp->lane_out.dringp == NULL) { 5000 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5001 ldcp->lane_out.dringp = dp; 5002 } else { 5003 tp = ldcp->lane_out.dringp; 5004 while (tp->next != NULL) 5005 tp = tp->next; 5006 5007 tp->next = dp; 5008 } 5009 RW_EXIT(&ldcp->lane_out.dlistrw); 5010 5011 return (dp); 5012 5013 dring_fail_exit: 5014 (void) ldc_mem_dring_destroy(dp->handle); 5015 5016 create_fail_exit: 5017 if (dp->priv_addr != NULL) { 5018 priv_addr = dp->priv_addr; 5019 for (i = 0; i < vsw_ntxds; i++) { 5020 if (priv_addr->memhandle != NULL) 5021 (void) ldc_mem_free_handle( 5022 priv_addr->memhandle); 5023 priv_addr++; 5024 } 5025 kmem_free(dp->priv_addr, 5026 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5027 } 5028 mutex_destroy(&dp->dlock); 5029 5030 kmem_free(dp, sizeof (dring_info_t)); 5031 return (NULL); 5032 } 5033 5034 /* 5035 * Create a ring consisting of just a private portion and link 5036 * it into the list of rings for the outbound lane. 5037 * 5038 * These type of rings are used primarily for temporary data 5039 * storage (i.e. as data buffers). 5040 */ 5041 void 5042 vsw_create_privring(vsw_ldc_t *ldcp) 5043 { 5044 dring_info_t *dp, *tp; 5045 vsw_t *vswp = ldcp->ldc_vswp; 5046 5047 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5048 5049 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5050 5051 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5052 5053 /* no public section */ 5054 dp->pub_addr = NULL; 5055 5056 dp->priv_addr = kmem_zalloc( 5057 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5058 5059 dp->num_descriptors = vsw_ntxds; 5060 5061 if (vsw_setup_ring(ldcp, dp)) { 5062 DERR(vswp, "%s: setup of ring failed", __func__); 5063 kmem_free(dp->priv_addr, 5064 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5065 mutex_destroy(&dp->dlock); 5066 kmem_free(dp, sizeof (dring_info_t)); 5067 return; 5068 } 5069 5070 /* haven't used any descriptors yet */ 5071 dp->end_idx = 0; 5072 5073 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5074 dp->restart_reqd = B_TRUE; 5075 5076 /* 5077 * Only ever create rings for outgoing lane. Link it onto 5078 * end of list. 5079 */ 5080 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5081 if (ldcp->lane_out.dringp == NULL) { 5082 D2(vswp, "%s: adding first outbound privring", __func__); 5083 ldcp->lane_out.dringp = dp; 5084 } else { 5085 tp = ldcp->lane_out.dringp; 5086 while (tp->next != NULL) 5087 tp = tp->next; 5088 5089 tp->next = dp; 5090 } 5091 RW_EXIT(&ldcp->lane_out.dlistrw); 5092 5093 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5094 } 5095 5096 /* 5097 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5098 * failure. 5099 */ 5100 int 5101 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5102 { 5103 vnet_public_desc_t *pub_addr = NULL; 5104 vsw_private_desc_t *priv_addr = NULL; 5105 vsw_t *vswp = ldcp->ldc_vswp; 5106 uint64_t *tmpp; 5107 uint64_t offset = 0; 5108 uint32_t ncookies = 0; 5109 static char *name = "vsw_setup_ring"; 5110 int i, j, nc, rv; 5111 size_t data_sz; 5112 5113 priv_addr = dp->priv_addr; 5114 pub_addr = dp->pub_addr; 5115 5116 /* public section may be null but private should never be */ 5117 ASSERT(priv_addr != NULL); 5118 5119 /* 5120 * Allocate the region of memory which will be used to hold 5121 * the data the descriptors will refer to. 5122 */ 5123 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5124 data_sz = VNET_ROUNDUP_2K(data_sz); 5125 dp->desc_data_sz = data_sz; 5126 dp->data_sz = vsw_ntxds * data_sz; 5127 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5128 5129 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5130 dp->data_sz, dp->data_addr); 5131 5132 tmpp = (uint64_t *)dp->data_addr; 5133 offset = dp->desc_data_sz/sizeof (tmpp); 5134 5135 /* 5136 * Initialise some of the private and public (if they exist) 5137 * descriptor fields. 5138 */ 5139 for (i = 0; i < vsw_ntxds; i++) { 5140 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5141 5142 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5143 &priv_addr->memhandle)) != 0) { 5144 DERR(vswp, "%s: alloc mem handle failed", name); 5145 goto setup_ring_cleanup; 5146 } 5147 5148 priv_addr->datap = (void *)tmpp; 5149 5150 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5151 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5152 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5153 &(priv_addr->memcookie[0]), &ncookies); 5154 if (rv != 0) { 5155 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5156 "(rv %d)", name, ldcp->ldc_id, rv); 5157 goto setup_ring_cleanup; 5158 } 5159 priv_addr->bound = 1; 5160 5161 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5162 name, i, priv_addr->memcookie[0].addr, 5163 priv_addr->memcookie[0].size); 5164 5165 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5166 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5167 "invalid num of cookies (%d) for size 0x%llx", 5168 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5169 5170 goto setup_ring_cleanup; 5171 } else { 5172 for (j = 1; j < ncookies; j++) { 5173 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5174 &(priv_addr->memcookie[j])); 5175 if (rv != 0) { 5176 DERR(vswp, "%s: ldc_mem_nextcookie " 5177 "failed rv (%d)", name, rv); 5178 goto setup_ring_cleanup; 5179 } 5180 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5181 "size 0x%llx", name, j, 5182 priv_addr->memcookie[j].addr, 5183 priv_addr->memcookie[j].size); 5184 } 5185 5186 } 5187 priv_addr->ncookies = ncookies; 5188 priv_addr->dstate = VIO_DESC_FREE; 5189 5190 if (pub_addr != NULL) { 5191 5192 /* link pub and private sides */ 5193 priv_addr->descp = pub_addr; 5194 5195 pub_addr->ncookies = priv_addr->ncookies; 5196 5197 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5198 bcopy(&priv_addr->memcookie[nc], 5199 &pub_addr->memcookie[nc], 5200 sizeof (ldc_mem_cookie_t)); 5201 } 5202 5203 pub_addr->hdr.dstate = VIO_DESC_FREE; 5204 pub_addr++; 5205 } 5206 5207 /* 5208 * move to next element in the dring and the next 5209 * position in the data buffer. 5210 */ 5211 priv_addr++; 5212 tmpp += offset; 5213 } 5214 5215 return (0); 5216 5217 setup_ring_cleanup: 5218 priv_addr = dp->priv_addr; 5219 5220 for (j = 0; j < i; j++) { 5221 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5222 (void) ldc_mem_free_handle(priv_addr->memhandle); 5223 5224 mutex_destroy(&priv_addr->dstate_lock); 5225 5226 priv_addr++; 5227 } 5228 kmem_free(dp->data_addr, dp->data_sz); 5229 5230 return (1); 5231 } 5232 5233 /* 5234 * Searches the private section of a ring for a free descriptor, 5235 * starting at the location of the last free descriptor found 5236 * previously. 5237 * 5238 * Returns 0 if free descriptor is available, and updates state 5239 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5240 * 5241 * FUTURE: might need to return contiguous range of descriptors 5242 * as dring info msg assumes all will be contiguous. 5243 */ 5244 static int 5245 vsw_dring_find_free_desc(dring_info_t *dringp, 5246 vsw_private_desc_t **priv_p, int *idx) 5247 { 5248 vsw_private_desc_t *addr = NULL; 5249 int num = vsw_ntxds; 5250 int ret = 1; 5251 5252 D1(NULL, "%s enter\n", __func__); 5253 5254 ASSERT(dringp->priv_addr != NULL); 5255 5256 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5257 __func__, dringp, dringp->end_idx); 5258 5259 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5260 5261 mutex_enter(&addr->dstate_lock); 5262 if (addr->dstate == VIO_DESC_FREE) { 5263 addr->dstate = VIO_DESC_READY; 5264 *priv_p = addr; 5265 *idx = dringp->end_idx; 5266 dringp->end_idx = (dringp->end_idx + 1) % num; 5267 ret = 0; 5268 5269 } 5270 mutex_exit(&addr->dstate_lock); 5271 5272 /* ring full */ 5273 if (ret == 1) { 5274 D2(NULL, "%s: no desp free: started at %d", __func__, 5275 dringp->end_idx); 5276 } 5277 5278 D1(NULL, "%s: exit\n", __func__); 5279 5280 return (ret); 5281 } 5282 5283 /* 5284 * Map from a dring identifier to the ring itself. Returns 5285 * pointer to ring or NULL if no match found. 5286 * 5287 * Should be called with dlistrw rwlock held as reader. 5288 */ 5289 static dring_info_t * 5290 vsw_ident2dring(lane_t *lane, uint64_t ident) 5291 { 5292 dring_info_t *dp = NULL; 5293 5294 if ((dp = lane->dringp) == NULL) { 5295 return (NULL); 5296 } else { 5297 if (dp->ident == ident) 5298 return (dp); 5299 5300 while (dp != NULL) { 5301 if (dp->ident == ident) 5302 break; 5303 dp = dp->next; 5304 } 5305 } 5306 5307 return (dp); 5308 } 5309 5310 /* 5311 * Set the default lane attributes. These are copied into 5312 * the attr msg we send to our peer. If they are not acceptable 5313 * then (currently) the handshake ends. 5314 */ 5315 static void 5316 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5317 { 5318 bzero(lp, sizeof (lane_t)); 5319 5320 READ_ENTER(&vswp->if_lockrw); 5321 ether_copy(&(vswp->if_addr), &(lp->addr)); 5322 RW_EXIT(&vswp->if_lockrw); 5323 5324 lp->mtu = vswp->max_frame_size; 5325 lp->addr_type = ADDR_TYPE_MAC; 5326 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5327 lp->ack_freq = 0; /* for shared mode */ 5328 lp->seq_num = VNET_ISS; 5329 } 5330 5331 /* 5332 * Verify that the attributes are acceptable. 5333 * 5334 * FUTURE: If some attributes are not acceptable, change them 5335 * our desired values. 5336 */ 5337 static int 5338 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5339 { 5340 int ret = 0; 5341 struct ether_addr ea; 5342 vsw_port_t *port = ldcp->ldc_port; 5343 lane_t *lp = &ldcp->lane_out; 5344 5345 D1(NULL, "vsw_check_attr enter\n"); 5346 5347 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5348 (pkt->xfer_mode != lp->xfer_mode)) { 5349 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5350 ret = 1; 5351 } 5352 5353 /* Only support MAC addresses at moment. */ 5354 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5355 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5356 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5357 ret = 1; 5358 } 5359 5360 /* 5361 * MAC address supplied by device should match that stored 5362 * in the vsw-port OBP node. Need to decide what to do if they 5363 * don't match, for the moment just warn but don't fail. 5364 */ 5365 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5366 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5367 DERR(NULL, "vsw_check_attr: device supplied address " 5368 "0x%llx doesn't match node address 0x%llx\n", 5369 pkt->addr, port->p_macaddr); 5370 } 5371 5372 /* 5373 * Ack freq only makes sense in pkt mode, in shared 5374 * mode the ring descriptors say whether or not to 5375 * send back an ACK. 5376 */ 5377 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5378 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5379 (VSW_VER_LT(ldcp, 1, 2) && 5380 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5381 if (pkt->ack_freq > 0) { 5382 D2(NULL, "vsw_check_attr: non zero ack freq " 5383 " in SHM mode\n"); 5384 ret = 1; 5385 } 5386 } 5387 5388 /* 5389 * Note: for the moment we only support ETHER 5390 * frames. This may change in the future. 5391 */ 5392 if ((pkt->mtu > lp->mtu) || (pkt->mtu <= 0)) { 5393 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5394 pkt->mtu); 5395 ret = 1; 5396 } 5397 5398 D1(NULL, "vsw_check_attr exit\n"); 5399 5400 return (ret); 5401 } 5402 5403 /* 5404 * Returns 1 if there is a problem, 0 otherwise. 5405 */ 5406 static int 5407 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5408 { 5409 _NOTE(ARGUNUSED(pkt)) 5410 5411 int ret = 0; 5412 5413 D1(NULL, "vsw_check_dring_info enter\n"); 5414 5415 if ((pkt->num_descriptors == 0) || 5416 (pkt->descriptor_size == 0) || 5417 (pkt->ncookies != 1)) { 5418 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5419 ret = 1; 5420 } 5421 5422 D1(NULL, "vsw_check_dring_info exit\n"); 5423 5424 return (ret); 5425 } 5426 5427 /* 5428 * Returns 1 if two memory cookies match. Otherwise returns 0. 5429 */ 5430 static int 5431 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5432 { 5433 if ((m1->addr != m2->addr) || 5434 (m2->size != m2->size)) { 5435 return (0); 5436 } else { 5437 return (1); 5438 } 5439 } 5440 5441 /* 5442 * Returns 1 if ring described in reg message matches that 5443 * described by dring_info structure. Otherwise returns 0. 5444 */ 5445 static int 5446 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5447 { 5448 if ((msg->descriptor_size != dp->descriptor_size) || 5449 (msg->num_descriptors != dp->num_descriptors) || 5450 (msg->ncookies != dp->ncookies) || 5451 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5452 return (0); 5453 } else { 5454 return (1); 5455 } 5456 5457 } 5458 5459 static caddr_t 5460 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5461 { 5462 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5463 a[0], a[1], a[2], a[3], a[4], a[5]); 5464 return (ebuf); 5465 } 5466 5467 /* 5468 * Reset and free all the resources associated with 5469 * the channel. 5470 */ 5471 static void 5472 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5473 { 5474 dring_info_t *dp, *dpp; 5475 lane_t *lp = NULL; 5476 int rv = 0; 5477 5478 ASSERT(ldcp != NULL); 5479 5480 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5481 5482 if (dir == INBOUND) { 5483 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5484 " of channel %lld", __func__, ldcp->ldc_id); 5485 lp = &ldcp->lane_in; 5486 } else { 5487 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5488 " of channel %lld", __func__, ldcp->ldc_id); 5489 lp = &ldcp->lane_out; 5490 } 5491 5492 lp->lstate = VSW_LANE_INACTIV; 5493 lp->seq_num = VNET_ISS; 5494 5495 if (lp->dringp) { 5496 if (dir == INBOUND) { 5497 WRITE_ENTER(&lp->dlistrw); 5498 dp = lp->dringp; 5499 while (dp != NULL) { 5500 dpp = dp->next; 5501 if (dp->handle != NULL) 5502 (void) ldc_mem_dring_unmap(dp->handle); 5503 kmem_free(dp, sizeof (dring_info_t)); 5504 dp = dpp; 5505 } 5506 RW_EXIT(&lp->dlistrw); 5507 } else { 5508 /* 5509 * unbind, destroy exported dring, free dring struct 5510 */ 5511 WRITE_ENTER(&lp->dlistrw); 5512 dp = lp->dringp; 5513 rv = vsw_free_ring(dp); 5514 RW_EXIT(&lp->dlistrw); 5515 } 5516 if (rv == 0) { 5517 lp->dringp = NULL; 5518 } 5519 } 5520 5521 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5522 } 5523 5524 /* 5525 * Free ring and all associated resources. 5526 * 5527 * Should be called with dlistrw rwlock held as writer. 5528 */ 5529 static int 5530 vsw_free_ring(dring_info_t *dp) 5531 { 5532 vsw_private_desc_t *paddr = NULL; 5533 dring_info_t *dpp; 5534 int i, rv = 1; 5535 5536 while (dp != NULL) { 5537 mutex_enter(&dp->dlock); 5538 dpp = dp->next; 5539 if (dp->priv_addr != NULL) { 5540 /* 5541 * First unbind and free the memory handles 5542 * stored in each descriptor within the ring. 5543 */ 5544 for (i = 0; i < vsw_ntxds; i++) { 5545 paddr = (vsw_private_desc_t *) 5546 dp->priv_addr + i; 5547 if (paddr->memhandle != NULL) { 5548 if (paddr->bound == 1) { 5549 rv = ldc_mem_unbind_handle( 5550 paddr->memhandle); 5551 5552 if (rv != 0) { 5553 DERR(NULL, "error " 5554 "unbinding handle for " 5555 "ring 0x%llx at pos %d", 5556 dp, i); 5557 mutex_exit(&dp->dlock); 5558 return (rv); 5559 } 5560 paddr->bound = 0; 5561 } 5562 5563 rv = ldc_mem_free_handle( 5564 paddr->memhandle); 5565 if (rv != 0) { 5566 DERR(NULL, "error freeing " 5567 "handle for ring 0x%llx " 5568 "at pos %d", dp, i); 5569 mutex_exit(&dp->dlock); 5570 return (rv); 5571 } 5572 paddr->memhandle = NULL; 5573 } 5574 mutex_destroy(&paddr->dstate_lock); 5575 } 5576 kmem_free(dp->priv_addr, 5577 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5578 } 5579 5580 /* 5581 * Now unbind and destroy the ring itself. 5582 */ 5583 if (dp->handle != NULL) { 5584 (void) ldc_mem_dring_unbind(dp->handle); 5585 (void) ldc_mem_dring_destroy(dp->handle); 5586 } 5587 5588 if (dp->data_addr != NULL) { 5589 kmem_free(dp->data_addr, dp->data_sz); 5590 } 5591 5592 mutex_exit(&dp->dlock); 5593 mutex_destroy(&dp->dlock); 5594 mutex_destroy(&dp->restart_lock); 5595 kmem_free(dp, sizeof (dring_info_t)); 5596 5597 dp = dpp; 5598 } 5599 return (0); 5600 } 5601 5602 /* 5603 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5604 * This thread is woken up by the LDC interrupt handler to process 5605 * LDC packets and receive data. 5606 */ 5607 static void 5608 vsw_ldc_rx_worker(void *arg) 5609 { 5610 callb_cpr_t cprinfo; 5611 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5612 vsw_t *vswp = ldcp->ldc_vswp; 5613 5614 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5615 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5616 "vsw_rx_thread"); 5617 mutex_enter(&ldcp->rx_thr_lock); 5618 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5619 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5620 5621 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5622 /* 5623 * Wait until the data is received or a stop 5624 * request is received. 5625 */ 5626 while (!(ldcp->rx_thr_flags & 5627 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5628 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5629 } 5630 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5631 5632 /* 5633 * First process the stop request. 5634 */ 5635 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5636 D2(vswp, "%s(%lld):Rx thread stopped\n", 5637 __func__, ldcp->ldc_id); 5638 break; 5639 } 5640 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5641 mutex_exit(&ldcp->rx_thr_lock); 5642 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5643 __func__, ldcp->ldc_id); 5644 mutex_enter(&ldcp->ldc_cblock); 5645 vsw_process_pkt(ldcp); 5646 mutex_exit(&ldcp->ldc_cblock); 5647 mutex_enter(&ldcp->rx_thr_lock); 5648 } 5649 5650 /* 5651 * Update the run status and wakeup the thread that 5652 * has sent the stop request. 5653 */ 5654 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5655 cv_signal(&ldcp->rx_thr_cv); 5656 CALLB_CPR_EXIT(&cprinfo); 5657 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5658 thread_exit(); 5659 } 5660 5661 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5662 static void 5663 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5664 { 5665 vsw_t *vswp = ldcp->ldc_vswp; 5666 5667 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5668 /* 5669 * Send a stop request by setting the stop flag and 5670 * wait until the receive thread stops. 5671 */ 5672 mutex_enter(&ldcp->rx_thr_lock); 5673 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5674 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5675 cv_signal(&ldcp->rx_thr_cv); 5676 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5677 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5678 } 5679 } 5680 mutex_exit(&ldcp->rx_thr_lock); 5681 ldcp->rx_thread = NULL; 5682 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5683 } 5684 5685 /* 5686 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5687 * This thread is woken up by the vsw_portsend to transmit 5688 * packets. 5689 */ 5690 static void 5691 vsw_ldc_tx_worker(void *arg) 5692 { 5693 callb_cpr_t cprinfo; 5694 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5695 vsw_t *vswp = ldcp->ldc_vswp; 5696 mblk_t *mp; 5697 mblk_t *tmp; 5698 5699 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5700 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5701 "vnet_tx_thread"); 5702 mutex_enter(&ldcp->tx_thr_lock); 5703 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5704 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5705 5706 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5707 /* 5708 * Wait until the data is received or a stop 5709 * request is received. 5710 */ 5711 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5712 (ldcp->tx_mhead == NULL)) { 5713 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5714 } 5715 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5716 5717 /* 5718 * First process the stop request. 5719 */ 5720 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5721 D2(vswp, "%s(%lld):tx thread stopped\n", 5722 __func__, ldcp->ldc_id); 5723 break; 5724 } 5725 mp = ldcp->tx_mhead; 5726 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5727 ldcp->tx_cnt = 0; 5728 mutex_exit(&ldcp->tx_thr_lock); 5729 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5730 __func__, ldcp->ldc_id); 5731 while (mp != NULL) { 5732 tmp = mp->b_next; 5733 mp->b_next = mp->b_prev = NULL; 5734 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5735 mp = tmp; 5736 } 5737 mutex_enter(&ldcp->tx_thr_lock); 5738 } 5739 5740 /* 5741 * Update the run status and wakeup the thread that 5742 * has sent the stop request. 5743 */ 5744 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5745 cv_signal(&ldcp->tx_thr_cv); 5746 CALLB_CPR_EXIT(&cprinfo); 5747 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5748 thread_exit(); 5749 } 5750 5751 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5752 static void 5753 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5754 { 5755 vsw_t *vswp = ldcp->ldc_vswp; 5756 5757 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5758 /* 5759 * Send a stop request by setting the stop flag and 5760 * wait until the receive thread stops. 5761 */ 5762 mutex_enter(&ldcp->tx_thr_lock); 5763 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5764 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5765 cv_signal(&ldcp->tx_thr_cv); 5766 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5767 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5768 } 5769 } 5770 mutex_exit(&ldcp->tx_thr_lock); 5771 ldcp->tx_thread = NULL; 5772 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5773 } 5774 5775 /* vsw_reclaim_dring -- reclaim descriptors */ 5776 static int 5777 vsw_reclaim_dring(dring_info_t *dp, int start) 5778 { 5779 int i, j, len; 5780 vsw_private_desc_t *priv_addr; 5781 vnet_public_desc_t *pub_addr; 5782 5783 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5784 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5785 len = dp->num_descriptors; 5786 5787 D2(NULL, "%s: start index %ld\n", __func__, start); 5788 5789 j = 0; 5790 for (i = start; j < len; i = (i + 1) % len, j++) { 5791 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5792 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5793 5794 mutex_enter(&priv_addr->dstate_lock); 5795 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5796 mutex_exit(&priv_addr->dstate_lock); 5797 break; 5798 } 5799 pub_addr->hdr.dstate = VIO_DESC_FREE; 5800 priv_addr->dstate = VIO_DESC_FREE; 5801 /* clear all the fields */ 5802 priv_addr->datalen = 0; 5803 pub_addr->hdr.ack = 0; 5804 mutex_exit(&priv_addr->dstate_lock); 5805 5806 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5807 i, pub_addr->hdr.dstate, priv_addr->dstate); 5808 } 5809 return (j); 5810 } 5811 5812 /* 5813 * Debugging routines 5814 */ 5815 static void 5816 display_state(void) 5817 { 5818 vsw_t *vswp; 5819 vsw_port_list_t *plist; 5820 vsw_port_t *port; 5821 vsw_ldc_list_t *ldcl; 5822 vsw_ldc_t *ldcp; 5823 extern vsw_t *vsw_head; 5824 5825 cmn_err(CE_NOTE, "***** system state *****"); 5826 5827 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5828 plist = &vswp->plist; 5829 READ_ENTER(&plist->lockrw); 5830 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5831 vswp->instance, plist->num_ports); 5832 5833 for (port = plist->head; port != NULL; port = port->p_next) { 5834 ldcl = &port->p_ldclist; 5835 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5836 port->p_instance, port->num_ldcs); 5837 READ_ENTER(&ldcl->lockrw); 5838 ldcp = ldcl->head; 5839 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5840 cmn_err(CE_CONT, "chan %lu : dev %d : " 5841 "status %d : phase %u\n", 5842 ldcp->ldc_id, ldcp->dev_class, 5843 ldcp->ldc_status, ldcp->hphase); 5844 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5845 "psession %lu\n", ldcp->ldc_id, 5846 ldcp->local_session, ldcp->peer_session); 5847 5848 cmn_err(CE_CONT, "Inbound lane:\n"); 5849 display_lane(&ldcp->lane_in); 5850 cmn_err(CE_CONT, "Outbound lane:\n"); 5851 display_lane(&ldcp->lane_out); 5852 } 5853 RW_EXIT(&ldcl->lockrw); 5854 } 5855 RW_EXIT(&plist->lockrw); 5856 } 5857 cmn_err(CE_NOTE, "***** system state *****"); 5858 } 5859 5860 static void 5861 display_lane(lane_t *lp) 5862 { 5863 dring_info_t *drp; 5864 5865 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5866 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5867 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5868 lp->addr_type, lp->addr, lp->xfer_mode); 5869 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5870 5871 cmn_err(CE_CONT, "Dring info:\n"); 5872 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5873 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5874 drp->num_descriptors, drp->descriptor_size); 5875 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5876 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5877 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5878 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5879 drp->ident, drp->end_idx); 5880 display_ring(drp); 5881 } 5882 } 5883 5884 static void 5885 display_ring(dring_info_t *dringp) 5886 { 5887 uint64_t i; 5888 uint64_t priv_count = 0; 5889 uint64_t pub_count = 0; 5890 vnet_public_desc_t *pub_addr = NULL; 5891 vsw_private_desc_t *priv_addr = NULL; 5892 5893 for (i = 0; i < vsw_ntxds; i++) { 5894 if (dringp->pub_addr != NULL) { 5895 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5896 5897 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5898 pub_count++; 5899 } 5900 5901 if (dringp->priv_addr != NULL) { 5902 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5903 5904 if (priv_addr->dstate == VIO_DESC_FREE) 5905 priv_count++; 5906 } 5907 } 5908 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5909 i, priv_count, pub_count); 5910 } 5911 5912 static void 5913 dump_flags(uint64_t state) 5914 { 5915 int i; 5916 5917 typedef struct flag_name { 5918 int flag_val; 5919 char *flag_name; 5920 } flag_name_t; 5921 5922 flag_name_t flags[] = { 5923 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5924 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5925 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5926 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5927 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5928 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5929 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5930 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5931 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5932 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5933 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5934 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5935 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5936 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5937 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5938 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5939 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5940 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5941 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5942 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5943 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5944 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5945 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5946 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5947 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5948 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5949 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5950 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5951 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5952 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5953 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5954 5955 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5956 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5957 if (state & flags[i].flag_val) 5958 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5959 } 5960 } 5961