1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* Port add/deletion/etc routines */ 78 static int vsw_port_delete(vsw_port_t *port); 79 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 80 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 81 static int vsw_init_ldcs(vsw_port_t *port); 82 static int vsw_uninit_ldcs(vsw_port_t *port); 83 static int vsw_ldc_init(vsw_ldc_t *ldcp); 84 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 85 static int vsw_drain_ldcs(vsw_port_t *port); 86 static int vsw_drain_port_taskq(vsw_port_t *port); 87 static void vsw_marker_task(void *); 88 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 89 int vsw_detach_ports(vsw_t *vswp); 90 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 91 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 92 int vsw_port_detach(vsw_t *vswp, int p_instance); 93 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 94 int vsw_port_attach(vsw_port_t *portp); 95 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 96 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 97 98 /* Interrupt routines */ 99 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 100 101 /* Handshake routines */ 102 static void vsw_ldc_reinit(vsw_ldc_t *); 103 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 104 static void vsw_conn_task(void *); 105 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 106 static void vsw_next_milestone(vsw_ldc_t *); 107 static int vsw_supported_version(vio_ver_msg_t *); 108 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 109 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 110 111 /* Data processing routines */ 112 static void vsw_process_pkt(void *); 113 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 114 static void vsw_process_ctrl_pkt(void *); 115 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 122 uint32_t); 123 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 124 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 125 static void vsw_process_pkt_data(void *, void *, uint32_t); 126 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 127 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 128 129 /* Switching/data transmit routines */ 130 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 131 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 132 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 133 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 134 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 135 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 137 /* Packet creation routines */ 138 static void vsw_send_ver(void *); 139 static void vsw_send_attr(vsw_ldc_t *); 140 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 141 static void vsw_send_dring_info(vsw_ldc_t *); 142 static void vsw_send_rdx(vsw_ldc_t *); 143 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 144 145 /* Dring routines */ 146 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 147 static void vsw_create_privring(vsw_ldc_t *); 148 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 149 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 150 int *); 151 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 152 static int vsw_reclaim_dring(dring_info_t *dp, int start); 153 154 static void vsw_set_lane_attr(vsw_t *, lane_t *); 155 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 156 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 157 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 158 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 159 160 /* Rcv/Tx thread routines */ 161 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 162 static void vsw_ldc_tx_worker(void *arg); 163 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 164 static void vsw_ldc_rx_worker(void *arg); 165 166 /* Misc support routines */ 167 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 168 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 169 static int vsw_free_ring(dring_info_t *); 170 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 171 static int vsw_get_same_dest_list(struct ether_header *ehp, 172 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 173 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 174 175 /* Debugging routines */ 176 static void dump_flags(uint64_t); 177 static void display_state(void); 178 static void display_lane(lane_t *); 179 static void display_ring(dring_info_t *); 180 181 /* 182 * Functions imported from other files. 183 */ 184 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 185 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 186 extern void vsw_reconfig_hw(vsw_t *); 187 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 188 extern void vsw_del_mcst_port(vsw_port_t *port); 189 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 190 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 191 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 192 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 193 extern void vsw_create_vlans(void *arg, int type); 194 extern void vsw_destroy_vlans(void *arg, int type); 195 extern void vsw_vlan_add_ids(void *arg, int type); 196 extern void vsw_vlan_remove_ids(void *arg, int type); 197 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 198 struct ether_header *ehp, uint16_t *vidp); 199 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 200 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 201 mblk_t **npt); 202 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 203 204 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 205 206 /* 207 * Tunables used in this file. 208 */ 209 extern int vsw_num_handshakes; 210 extern int vsw_wretries; 211 extern int vsw_desc_delay; 212 extern int vsw_read_attempts; 213 extern int vsw_ldc_tx_delay; 214 extern int vsw_ldc_tx_retries; 215 extern boolean_t vsw_ldc_rxthr_enabled; 216 extern boolean_t vsw_ldc_txthr_enabled; 217 extern uint32_t vsw_ntxds; 218 extern uint32_t vsw_max_tx_qcount; 219 extern uint32_t vsw_chain_len; 220 extern uint32_t vsw_mblk_size1; 221 extern uint32_t vsw_mblk_size2; 222 extern uint32_t vsw_mblk_size3; 223 extern uint32_t vsw_num_mblks1; 224 extern uint32_t vsw_num_mblks2; 225 extern uint32_t vsw_num_mblks3; 226 extern boolean_t vsw_obp_ver_proto_workaround; 227 228 #define LDC_ENTER_LOCK(ldcp) \ 229 mutex_enter(&((ldcp)->ldc_cblock));\ 230 mutex_enter(&((ldcp)->ldc_rxlock));\ 231 mutex_enter(&((ldcp)->ldc_txlock)); 232 #define LDC_EXIT_LOCK(ldcp) \ 233 mutex_exit(&((ldcp)->ldc_txlock));\ 234 mutex_exit(&((ldcp)->ldc_rxlock));\ 235 mutex_exit(&((ldcp)->ldc_cblock)); 236 237 #define VSW_VER_EQ(ldcp, major, minor) \ 238 ((ldcp)->lane_out.ver_major == (major) && \ 239 (ldcp)->lane_out.ver_minor == (minor)) 240 241 #define VSW_VER_LT(ldcp, major, minor) \ 242 (((ldcp)->lane_out.ver_major < (major)) || \ 243 ((ldcp)->lane_out.ver_major == (major) && \ 244 (ldcp)->lane_out.ver_minor < (minor))) 245 246 #define VSW_VER_GTEQ(ldcp, major, minor) \ 247 (((ldcp)->lane_out.ver_major > (major)) || \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor >= (minor))) 250 251 /* supported versions */ 252 static ver_sup_t vsw_versions[] = { {1, 3} }; 253 254 /* 255 * For the moment the state dump routines have their own 256 * private flag. 257 */ 258 #define DUMP_STATE 0 259 260 #if DUMP_STATE 261 262 #define DUMP_TAG(tag) \ 263 { \ 264 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 265 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 266 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 267 } 268 269 #define DUMP_TAG_PTR(tag) \ 270 { \ 271 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 272 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 273 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 274 } 275 276 #define DUMP_FLAGS(flags) dump_flags(flags); 277 #define DISPLAY_STATE() display_state() 278 279 #else 280 281 #define DUMP_TAG(tag) 282 #define DUMP_TAG_PTR(tag) 283 #define DUMP_FLAGS(state) 284 #define DISPLAY_STATE() 285 286 #endif /* DUMP_STATE */ 287 288 /* 289 * Attach the specified port. 290 * 291 * Returns 0 on success, 1 on failure. 292 */ 293 int 294 vsw_port_attach(vsw_port_t *port) 295 { 296 vsw_t *vswp = port->p_vswp; 297 vsw_port_list_t *plist = &vswp->plist; 298 vsw_port_t *p, **pp; 299 int i; 300 int nids = port->num_ldcs; 301 uint64_t *ldcids; 302 303 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 304 305 /* port already exists? */ 306 READ_ENTER(&plist->lockrw); 307 for (p = plist->head; p != NULL; p = p->p_next) { 308 if (p->p_instance == port->p_instance) { 309 DWARN(vswp, "%s: port instance %d already attached", 310 __func__, p->p_instance); 311 RW_EXIT(&plist->lockrw); 312 return (1); 313 } 314 } 315 RW_EXIT(&plist->lockrw); 316 317 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 318 319 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 320 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 321 322 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 323 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 324 port->state = VSW_PORT_INIT; 325 326 D2(vswp, "%s: %d nids", __func__, nids); 327 ldcids = port->ldc_ids; 328 for (i = 0; i < nids; i++) { 329 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 330 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 331 DERR(vswp, "%s: ldc_attach failed", __func__); 332 333 rw_destroy(&port->p_ldclist.lockrw); 334 335 cv_destroy(&port->state_cv); 336 mutex_destroy(&port->state_lock); 337 338 mutex_destroy(&port->tx_lock); 339 mutex_destroy(&port->mca_lock); 340 kmem_free(port, sizeof (vsw_port_t)); 341 return (1); 342 } 343 } 344 345 if (vswp->switching_setup_done == B_TRUE) { 346 /* 347 * If the underlying physical device has been setup, 348 * program the mac address of this port in it. 349 * Otherwise, port macaddr will be set after the physical 350 * device is successfully setup by the timeout handler. 351 */ 352 mutex_enter(&vswp->hw_lock); 353 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 354 mutex_exit(&vswp->hw_lock); 355 } 356 357 /* create the fdb entry for this port/mac address */ 358 vsw_fdbe_add(vswp, port); 359 360 vsw_create_vlans(port, VSW_VNETPORT); 361 362 WRITE_ENTER(&plist->lockrw); 363 364 /* link it into the list of ports for this vsw instance */ 365 pp = (vsw_port_t **)(&plist->head); 366 port->p_next = *pp; 367 *pp = port; 368 plist->num_ports++; 369 370 RW_EXIT(&plist->lockrw); 371 372 /* 373 * Initialise the port and any ldc's under it. 374 */ 375 (void) vsw_init_ldcs(port); 376 377 D1(vswp, "%s: exit", __func__); 378 return (0); 379 } 380 381 /* 382 * Detach the specified port. 383 * 384 * Returns 0 on success, 1 on failure. 385 */ 386 int 387 vsw_port_detach(vsw_t *vswp, int p_instance) 388 { 389 vsw_port_t *port = NULL; 390 vsw_port_list_t *plist = &vswp->plist; 391 392 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 393 394 WRITE_ENTER(&plist->lockrw); 395 396 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 397 RW_EXIT(&plist->lockrw); 398 return (1); 399 } 400 401 if (vsw_plist_del_node(vswp, port)) { 402 RW_EXIT(&plist->lockrw); 403 return (1); 404 } 405 406 /* 407 * No longer need to hold writer lock on port list now 408 * that we have unlinked the target port from the list. 409 */ 410 RW_EXIT(&plist->lockrw); 411 412 /* Remove the fdb entry for this port/mac address */ 413 vsw_fdbe_del(vswp, &(port->p_macaddr)); 414 vsw_destroy_vlans(port, VSW_VNETPORT); 415 416 /* Remove any multicast addresses.. */ 417 vsw_del_mcst_port(port); 418 419 /* Remove address if was programmed into HW. */ 420 mutex_enter(&vswp->hw_lock); 421 422 /* 423 * Port's address may not have been set in hardware. This could 424 * happen if the underlying physical device is not yet available and 425 * vsw_setup_switching_timeout() may be in progress. 426 * We remove its addr from hardware only if it has been set before. 427 */ 428 if (port->addr_set != VSW_ADDR_UNSET) 429 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 430 431 if (vswp->recfg_reqd) 432 vsw_reconfig_hw(vswp); 433 434 mutex_exit(&vswp->hw_lock); 435 436 if (vsw_port_delete(port)) { 437 return (1); 438 } 439 440 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 441 return (0); 442 } 443 444 /* 445 * Detach all active ports. 446 * 447 * Returns 0 on success, 1 on failure. 448 */ 449 int 450 vsw_detach_ports(vsw_t *vswp) 451 { 452 vsw_port_list_t *plist = &vswp->plist; 453 vsw_port_t *port = NULL; 454 455 D1(vswp, "%s: enter", __func__); 456 457 WRITE_ENTER(&plist->lockrw); 458 459 while ((port = plist->head) != NULL) { 460 if (vsw_plist_del_node(vswp, port)) { 461 DERR(vswp, "%s: Error deleting port %d" 462 " from port list", __func__, port->p_instance); 463 RW_EXIT(&plist->lockrw); 464 return (1); 465 } 466 467 /* Remove address if was programmed into HW. */ 468 mutex_enter(&vswp->hw_lock); 469 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 470 mutex_exit(&vswp->hw_lock); 471 472 /* Remove the fdb entry for this port/mac address */ 473 vsw_fdbe_del(vswp, &(port->p_macaddr)); 474 vsw_destroy_vlans(port, VSW_VNETPORT); 475 476 /* Remove any multicast addresses.. */ 477 vsw_del_mcst_port(port); 478 479 /* 480 * No longer need to hold the lock on the port list 481 * now that we have unlinked the target port from the 482 * list. 483 */ 484 RW_EXIT(&plist->lockrw); 485 if (vsw_port_delete(port)) { 486 DERR(vswp, "%s: Error deleting port %d", 487 __func__, port->p_instance); 488 return (1); 489 } 490 WRITE_ENTER(&plist->lockrw); 491 } 492 RW_EXIT(&plist->lockrw); 493 494 D1(vswp, "%s: exit", __func__); 495 496 return (0); 497 } 498 499 /* 500 * Delete the specified port. 501 * 502 * Returns 0 on success, 1 on failure. 503 */ 504 static int 505 vsw_port_delete(vsw_port_t *port) 506 { 507 vsw_ldc_list_t *ldcl; 508 vsw_t *vswp = port->p_vswp; 509 int num_ldcs; 510 511 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 512 513 (void) vsw_uninit_ldcs(port); 514 515 /* 516 * Wait for any pending ctrl msg tasks which reference this 517 * port to finish. 518 */ 519 if (vsw_drain_port_taskq(port)) 520 return (1); 521 522 /* 523 * Wait for any active callbacks to finish 524 */ 525 if (vsw_drain_ldcs(port)) 526 return (1); 527 528 ldcl = &port->p_ldclist; 529 num_ldcs = port->num_ldcs; 530 WRITE_ENTER(&ldcl->lockrw); 531 while (num_ldcs > 0) { 532 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 533 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 534 vswp->instance, ldcl->head->ldc_id); 535 RW_EXIT(&ldcl->lockrw); 536 port->num_ldcs = num_ldcs; 537 return (1); 538 } 539 num_ldcs--; 540 } 541 RW_EXIT(&ldcl->lockrw); 542 543 rw_destroy(&port->p_ldclist.lockrw); 544 545 mutex_destroy(&port->mca_lock); 546 mutex_destroy(&port->tx_lock); 547 548 cv_destroy(&port->state_cv); 549 mutex_destroy(&port->state_lock); 550 551 if (port->num_ldcs != 0) { 552 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 553 port->num_ldcs = 0; 554 } 555 kmem_free(port, sizeof (vsw_port_t)); 556 557 D1(vswp, "%s: exit", __func__); 558 559 return (0); 560 } 561 562 /* 563 * Attach a logical domain channel (ldc) under a specified port. 564 * 565 * Returns 0 on success, 1 on failure. 566 */ 567 static int 568 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 569 { 570 vsw_t *vswp = port->p_vswp; 571 vsw_ldc_list_t *ldcl = &port->p_ldclist; 572 vsw_ldc_t *ldcp = NULL; 573 ldc_attr_t attr; 574 ldc_status_t istatus; 575 int status = DDI_FAILURE; 576 int rv; 577 char kname[MAXNAMELEN]; 578 enum { PROG_init = 0x0, PROG_mblks = 0x1, 579 PROG_callback = 0x2, PROG_rx_thread = 0x4, 580 PROG_tx_thread = 0x8} 581 progress; 582 583 progress = PROG_init; 584 585 D1(vswp, "%s: enter", __func__); 586 587 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 588 if (ldcp == NULL) { 589 DERR(vswp, "%s: kmem_zalloc failed", __func__); 590 return (1); 591 } 592 ldcp->ldc_id = ldc_id; 593 594 /* Allocate pools of receive mblks */ 595 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 596 vsw_mblk_size1, vsw_mblk_size2, vsw_mblk_size3, 597 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 598 if (rv) { 599 DWARN(vswp, "%s: unable to create free mblk pools for" 600 " channel %ld (rv %d)", __func__, ldc_id, rv); 601 kmem_free(ldcp, sizeof (vsw_ldc_t)); 602 return (1); 603 } 604 605 progress |= PROG_mblks; 606 607 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 608 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 609 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 610 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 611 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 612 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 613 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 614 615 /* required for handshake with peer */ 616 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 617 ldcp->peer_session = 0; 618 ldcp->session_status = 0; 619 ldcp->hss_id = 1; /* Initial handshake session id */ 620 621 /* only set for outbound lane, inbound set by peer */ 622 vsw_set_lane_attr(vswp, &ldcp->lane_out); 623 624 attr.devclass = LDC_DEV_NT_SVC; 625 attr.instance = ddi_get_instance(vswp->dip); 626 attr.mode = LDC_MODE_UNRELIABLE; 627 attr.mtu = VSW_LDC_MTU; 628 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 629 if (status != 0) { 630 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 631 __func__, ldc_id, status); 632 goto ldc_attach_fail; 633 } 634 635 if (vsw_ldc_rxthr_enabled) { 636 ldcp->rx_thr_flags = 0; 637 638 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 639 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 640 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 641 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 642 643 progress |= PROG_rx_thread; 644 if (ldcp->rx_thread == NULL) { 645 DWARN(vswp, "%s(%lld): Failed to create worker thread", 646 __func__, ldc_id); 647 goto ldc_attach_fail; 648 } 649 } 650 651 if (vsw_ldc_txthr_enabled) { 652 ldcp->tx_thr_flags = 0; 653 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 654 655 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 656 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 657 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 658 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 659 660 progress |= PROG_tx_thread; 661 if (ldcp->tx_thread == NULL) { 662 DWARN(vswp, "%s(%lld): Failed to create worker thread", 663 __func__, ldc_id); 664 goto ldc_attach_fail; 665 } 666 } 667 668 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 669 if (status != 0) { 670 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 671 __func__, ldc_id, status); 672 (void) ldc_fini(ldcp->ldc_handle); 673 goto ldc_attach_fail; 674 } 675 /* 676 * allocate a message for ldc_read()s, big enough to hold ctrl and 677 * data msgs, including raw data msgs used to recv priority frames. 678 */ 679 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 680 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 681 682 progress |= PROG_callback; 683 684 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 685 686 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 687 DERR(vswp, "%s: ldc_status failed", __func__); 688 mutex_destroy(&ldcp->status_lock); 689 goto ldc_attach_fail; 690 } 691 692 ldcp->ldc_status = istatus; 693 ldcp->ldc_port = port; 694 ldcp->ldc_vswp = vswp; 695 696 vsw_reset_vnet_proto_ops(ldcp); 697 698 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 699 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 700 kname, &ldcp->ldc_stats); 701 if (ldcp->ksp == NULL) { 702 DERR(vswp, "%s: kstats setup failed", __func__); 703 goto ldc_attach_fail; 704 } 705 706 /* link it into the list of channels for this port */ 707 WRITE_ENTER(&ldcl->lockrw); 708 ldcp->ldc_next = ldcl->head; 709 ldcl->head = ldcp; 710 RW_EXIT(&ldcl->lockrw); 711 712 D1(vswp, "%s: exit", __func__); 713 return (0); 714 715 ldc_attach_fail: 716 717 if (progress & PROG_callback) { 718 (void) ldc_unreg_callback(ldcp->ldc_handle); 719 kmem_free(ldcp->ldcmsg, ldcp->msglen); 720 } 721 722 if (progress & PROG_rx_thread) { 723 if (ldcp->rx_thread != NULL) { 724 vsw_stop_rx_thread(ldcp); 725 } 726 mutex_destroy(&ldcp->rx_thr_lock); 727 cv_destroy(&ldcp->rx_thr_cv); 728 } 729 730 if (progress & PROG_tx_thread) { 731 if (ldcp->tx_thread != NULL) { 732 vsw_stop_tx_thread(ldcp); 733 } 734 mutex_destroy(&ldcp->tx_thr_lock); 735 cv_destroy(&ldcp->tx_thr_cv); 736 } 737 if (ldcp->ksp != NULL) { 738 vgen_destroy_kstats(ldcp->ksp); 739 } 740 mutex_destroy(&ldcp->ldc_txlock); 741 mutex_destroy(&ldcp->ldc_rxlock); 742 mutex_destroy(&ldcp->ldc_cblock); 743 mutex_destroy(&ldcp->drain_cv_lock); 744 745 cv_destroy(&ldcp->drain_cv); 746 747 rw_destroy(&ldcp->lane_in.dlistrw); 748 rw_destroy(&ldcp->lane_out.dlistrw); 749 750 if (progress & PROG_mblks) { 751 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 752 } 753 kmem_free(ldcp, sizeof (vsw_ldc_t)); 754 755 return (1); 756 } 757 758 /* 759 * Detach a logical domain channel (ldc) belonging to a 760 * particular port. 761 * 762 * Returns 0 on success, 1 on failure. 763 */ 764 static int 765 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 766 { 767 vsw_t *vswp = port->p_vswp; 768 vsw_ldc_t *ldcp, *prev_ldcp; 769 vsw_ldc_list_t *ldcl = &port->p_ldclist; 770 int rv; 771 772 prev_ldcp = ldcl->head; 773 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 774 if (ldcp->ldc_id == ldc_id) { 775 break; 776 } 777 } 778 779 /* specified ldc id not found */ 780 if (ldcp == NULL) { 781 DERR(vswp, "%s: ldcp = NULL", __func__); 782 return (1); 783 } 784 785 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 786 787 /* Stop the receive thread */ 788 if (ldcp->rx_thread != NULL) { 789 vsw_stop_rx_thread(ldcp); 790 mutex_destroy(&ldcp->rx_thr_lock); 791 cv_destroy(&ldcp->rx_thr_cv); 792 } 793 kmem_free(ldcp->ldcmsg, ldcp->msglen); 794 795 /* Stop the tx thread */ 796 if (ldcp->tx_thread != NULL) { 797 vsw_stop_tx_thread(ldcp); 798 mutex_destroy(&ldcp->tx_thr_lock); 799 cv_destroy(&ldcp->tx_thr_cv); 800 if (ldcp->tx_mhead != NULL) { 801 freemsgchain(ldcp->tx_mhead); 802 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 803 ldcp->tx_cnt = 0; 804 } 805 } 806 807 /* Destory kstats */ 808 vgen_destroy_kstats(ldcp->ksp); 809 810 /* 811 * Before we can close the channel we must release any mapped 812 * resources (e.g. drings). 813 */ 814 vsw_free_lane_resources(ldcp, INBOUND); 815 vsw_free_lane_resources(ldcp, OUTBOUND); 816 817 /* 818 * If the close fails we are in serious trouble, as won't 819 * be able to delete the parent port. 820 */ 821 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 822 DERR(vswp, "%s: error %d closing channel %lld", 823 __func__, rv, ldcp->ldc_id); 824 return (1); 825 } 826 827 (void) ldc_fini(ldcp->ldc_handle); 828 829 ldcp->ldc_status = LDC_INIT; 830 ldcp->ldc_handle = NULL; 831 ldcp->ldc_vswp = NULL; 832 833 834 /* 835 * Most likely some mblks are still in use and 836 * have not been returned to the pool. These mblks are 837 * added to the pool that is maintained in the device instance. 838 * Another attempt will be made to destroy the pool 839 * when the device detaches. 840 */ 841 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 842 843 /* unlink it from the list */ 844 prev_ldcp = ldcp->ldc_next; 845 846 mutex_destroy(&ldcp->ldc_txlock); 847 mutex_destroy(&ldcp->ldc_rxlock); 848 mutex_destroy(&ldcp->ldc_cblock); 849 cv_destroy(&ldcp->drain_cv); 850 mutex_destroy(&ldcp->drain_cv_lock); 851 mutex_destroy(&ldcp->status_lock); 852 rw_destroy(&ldcp->lane_in.dlistrw); 853 rw_destroy(&ldcp->lane_out.dlistrw); 854 855 kmem_free(ldcp, sizeof (vsw_ldc_t)); 856 857 return (0); 858 } 859 860 /* 861 * Open and attempt to bring up the channel. Note that channel 862 * can only be brought up if peer has also opened channel. 863 * 864 * Returns 0 if can open and bring up channel, otherwise 865 * returns 1. 866 */ 867 static int 868 vsw_ldc_init(vsw_ldc_t *ldcp) 869 { 870 vsw_t *vswp = ldcp->ldc_vswp; 871 ldc_status_t istatus = 0; 872 int rv; 873 874 D1(vswp, "%s: enter", __func__); 875 876 LDC_ENTER_LOCK(ldcp); 877 878 /* don't start at 0 in case clients don't like that */ 879 ldcp->next_ident = 1; 880 881 rv = ldc_open(ldcp->ldc_handle); 882 if (rv != 0) { 883 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 884 __func__, ldcp->ldc_id, rv); 885 LDC_EXIT_LOCK(ldcp); 886 return (1); 887 } 888 889 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 890 DERR(vswp, "%s: unable to get status", __func__); 891 LDC_EXIT_LOCK(ldcp); 892 return (1); 893 894 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 895 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 896 __func__, ldcp->ldc_id, istatus); 897 LDC_EXIT_LOCK(ldcp); 898 return (1); 899 } 900 901 mutex_enter(&ldcp->status_lock); 902 ldcp->ldc_status = istatus; 903 mutex_exit(&ldcp->status_lock); 904 905 rv = ldc_up(ldcp->ldc_handle); 906 if (rv != 0) { 907 /* 908 * Not a fatal error for ldc_up() to fail, as peer 909 * end point may simply not be ready yet. 910 */ 911 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 912 ldcp->ldc_id, rv); 913 LDC_EXIT_LOCK(ldcp); 914 return (1); 915 } 916 917 /* 918 * ldc_up() call is non-blocking so need to explicitly 919 * check channel status to see if in fact the channel 920 * is UP. 921 */ 922 mutex_enter(&ldcp->status_lock); 923 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 924 DERR(vswp, "%s: unable to get status", __func__); 925 mutex_exit(&ldcp->status_lock); 926 LDC_EXIT_LOCK(ldcp); 927 return (1); 928 929 } 930 931 if (ldcp->ldc_status == LDC_UP) { 932 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 933 ldcp->ldc_id, istatus); 934 mutex_exit(&ldcp->status_lock); 935 LDC_EXIT_LOCK(ldcp); 936 937 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 938 return (0); 939 } 940 941 mutex_exit(&ldcp->status_lock); 942 LDC_EXIT_LOCK(ldcp); 943 944 D1(vswp, "%s: exit", __func__); 945 return (0); 946 } 947 948 /* disable callbacks on the channel */ 949 static int 950 vsw_ldc_uninit(vsw_ldc_t *ldcp) 951 { 952 vsw_t *vswp = ldcp->ldc_vswp; 953 int rv; 954 955 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 956 957 LDC_ENTER_LOCK(ldcp); 958 959 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 960 if (rv != 0) { 961 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 962 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 963 LDC_EXIT_LOCK(ldcp); 964 return (1); 965 } 966 967 mutex_enter(&ldcp->status_lock); 968 ldcp->ldc_status = LDC_INIT; 969 mutex_exit(&ldcp->status_lock); 970 971 LDC_EXIT_LOCK(ldcp); 972 973 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 974 975 return (0); 976 } 977 978 static int 979 vsw_init_ldcs(vsw_port_t *port) 980 { 981 vsw_ldc_list_t *ldcl = &port->p_ldclist; 982 vsw_ldc_t *ldcp; 983 984 READ_ENTER(&ldcl->lockrw); 985 ldcp = ldcl->head; 986 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 987 (void) vsw_ldc_init(ldcp); 988 } 989 RW_EXIT(&ldcl->lockrw); 990 991 return (0); 992 } 993 994 static int 995 vsw_uninit_ldcs(vsw_port_t *port) 996 { 997 vsw_ldc_list_t *ldcl = &port->p_ldclist; 998 vsw_ldc_t *ldcp; 999 1000 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1001 1002 READ_ENTER(&ldcl->lockrw); 1003 ldcp = ldcl->head; 1004 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1005 (void) vsw_ldc_uninit(ldcp); 1006 } 1007 RW_EXIT(&ldcl->lockrw); 1008 1009 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1010 1011 return (0); 1012 } 1013 1014 /* 1015 * Wait until the callback(s) associated with the ldcs under the specified 1016 * port have completed. 1017 * 1018 * Prior to this function being invoked each channel under this port 1019 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1020 * 1021 * A short explaination of what we are doing below.. 1022 * 1023 * The simplest approach would be to have a reference counter in 1024 * the ldc structure which is increment/decremented by the callbacks as 1025 * they use the channel. The drain function could then simply disable any 1026 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1027 * there is a tiny window here - before the callback is able to get the lock 1028 * on the channel it is interrupted and this function gets to execute. It 1029 * sees that the ref count is zero and believes its free to delete the 1030 * associated data structures. 1031 * 1032 * We get around this by taking advantage of the fact that before the ldc 1033 * framework invokes a callback it sets a flag to indicate that there is a 1034 * callback active (or about to become active). If when we attempt to 1035 * unregister a callback when this active flag is set then the unregister 1036 * will fail with EWOULDBLOCK. 1037 * 1038 * If the unregister fails we do a cv_timedwait. We will either be signaled 1039 * by the callback as it is exiting (note we have to wait a short period to 1040 * allow the callback to return fully to the ldc framework and it to clear 1041 * the active flag), or by the timer expiring. In either case we again attempt 1042 * the unregister. We repeat this until we can succesfully unregister the 1043 * callback. 1044 * 1045 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1046 * the case where the callback has finished but the ldc framework has not yet 1047 * cleared the active flag. In this case we would never get a cv_signal. 1048 */ 1049 static int 1050 vsw_drain_ldcs(vsw_port_t *port) 1051 { 1052 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1053 vsw_ldc_t *ldcp; 1054 vsw_t *vswp = port->p_vswp; 1055 1056 D1(vswp, "%s: enter", __func__); 1057 1058 READ_ENTER(&ldcl->lockrw); 1059 1060 ldcp = ldcl->head; 1061 1062 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1063 /* 1064 * If we can unregister the channel callback then we 1065 * know that there is no callback either running or 1066 * scheduled to run for this channel so move on to next 1067 * channel in the list. 1068 */ 1069 mutex_enter(&ldcp->drain_cv_lock); 1070 1071 /* prompt active callbacks to quit */ 1072 ldcp->drain_state = VSW_LDC_DRAINING; 1073 1074 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1075 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1076 ldcp->ldc_id); 1077 mutex_exit(&ldcp->drain_cv_lock); 1078 continue; 1079 } else { 1080 /* 1081 * If we end up here we know that either 1) a callback 1082 * is currently executing, 2) is about to start (i.e. 1083 * the ldc framework has set the active flag but 1084 * has not actually invoked the callback yet, or 3) 1085 * has finished and has returned to the ldc framework 1086 * but the ldc framework has not yet cleared the 1087 * active bit. 1088 * 1089 * Wait for it to finish. 1090 */ 1091 while (ldc_unreg_callback(ldcp->ldc_handle) 1092 == EWOULDBLOCK) 1093 (void) cv_timedwait(&ldcp->drain_cv, 1094 &ldcp->drain_cv_lock, lbolt + hz); 1095 1096 mutex_exit(&ldcp->drain_cv_lock); 1097 D2(vswp, "%s: unreg callback for chan %ld after " 1098 "timeout", __func__, ldcp->ldc_id); 1099 } 1100 } 1101 RW_EXIT(&ldcl->lockrw); 1102 1103 D1(vswp, "%s: exit", __func__); 1104 return (0); 1105 } 1106 1107 /* 1108 * Wait until all tasks which reference this port have completed. 1109 * 1110 * Prior to this function being invoked each channel under this port 1111 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1112 */ 1113 static int 1114 vsw_drain_port_taskq(vsw_port_t *port) 1115 { 1116 vsw_t *vswp = port->p_vswp; 1117 1118 D1(vswp, "%s: enter", __func__); 1119 1120 /* 1121 * Mark the port as in the process of being detached, and 1122 * dispatch a marker task to the queue so we know when all 1123 * relevant tasks have completed. 1124 */ 1125 mutex_enter(&port->state_lock); 1126 port->state = VSW_PORT_DETACHING; 1127 1128 if ((vswp->taskq_p == NULL) || 1129 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1130 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1131 DERR(vswp, "%s: unable to dispatch marker task", 1132 __func__); 1133 mutex_exit(&port->state_lock); 1134 return (1); 1135 } 1136 1137 /* 1138 * Wait for the marker task to finish. 1139 */ 1140 while (port->state != VSW_PORT_DETACHABLE) 1141 cv_wait(&port->state_cv, &port->state_lock); 1142 1143 mutex_exit(&port->state_lock); 1144 1145 D1(vswp, "%s: exit", __func__); 1146 1147 return (0); 1148 } 1149 1150 static void 1151 vsw_marker_task(void *arg) 1152 { 1153 vsw_port_t *port = arg; 1154 vsw_t *vswp = port->p_vswp; 1155 1156 D1(vswp, "%s: enter", __func__); 1157 1158 mutex_enter(&port->state_lock); 1159 1160 /* 1161 * No further tasks should be dispatched which reference 1162 * this port so ok to mark it as safe to detach. 1163 */ 1164 port->state = VSW_PORT_DETACHABLE; 1165 1166 cv_signal(&port->state_cv); 1167 1168 mutex_exit(&port->state_lock); 1169 1170 D1(vswp, "%s: exit", __func__); 1171 } 1172 1173 vsw_port_t * 1174 vsw_lookup_port(vsw_t *vswp, int p_instance) 1175 { 1176 vsw_port_list_t *plist = &vswp->plist; 1177 vsw_port_t *port; 1178 1179 for (port = plist->head; port != NULL; port = port->p_next) { 1180 if (port->p_instance == p_instance) { 1181 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1182 return (port); 1183 } 1184 } 1185 1186 return (NULL); 1187 } 1188 1189 void 1190 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1191 { 1192 vsw_ldc_list_t *ldclp; 1193 vsw_ldc_t *ldcp; 1194 1195 ldclp = &portp->p_ldclist; 1196 1197 READ_ENTER(&ldclp->lockrw); 1198 1199 /* 1200 * NOTE: for now, we will assume we have a single channel. 1201 */ 1202 if (ldclp->head == NULL) { 1203 RW_EXIT(&ldclp->lockrw); 1204 return; 1205 } 1206 ldcp = ldclp->head; 1207 1208 mutex_enter(&ldcp->ldc_cblock); 1209 1210 /* 1211 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1212 * the connection. See comments in vsw_set_vnet_proto_ops(). 1213 */ 1214 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1215 portp->nvids != 0) { 1216 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1217 } 1218 1219 mutex_exit(&ldcp->ldc_cblock); 1220 1221 RW_EXIT(&ldclp->lockrw); 1222 } 1223 1224 /* 1225 * Search for and remove the specified port from the port 1226 * list. Returns 0 if able to locate and remove port, otherwise 1227 * returns 1. 1228 */ 1229 static int 1230 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1231 { 1232 vsw_port_list_t *plist = &vswp->plist; 1233 vsw_port_t *curr_p, *prev_p; 1234 1235 if (plist->head == NULL) 1236 return (1); 1237 1238 curr_p = prev_p = plist->head; 1239 1240 while (curr_p != NULL) { 1241 if (curr_p == port) { 1242 if (prev_p == curr_p) { 1243 plist->head = curr_p->p_next; 1244 } else { 1245 prev_p->p_next = curr_p->p_next; 1246 } 1247 plist->num_ports--; 1248 break; 1249 } else { 1250 prev_p = curr_p; 1251 curr_p = curr_p->p_next; 1252 } 1253 } 1254 return (0); 1255 } 1256 1257 /* 1258 * Interrupt handler for ldc messages. 1259 */ 1260 static uint_t 1261 vsw_ldc_cb(uint64_t event, caddr_t arg) 1262 { 1263 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1264 vsw_t *vswp = ldcp->ldc_vswp; 1265 1266 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1267 1268 mutex_enter(&ldcp->ldc_cblock); 1269 ldcp->ldc_stats.callbacks++; 1270 1271 mutex_enter(&ldcp->status_lock); 1272 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1273 mutex_exit(&ldcp->status_lock); 1274 mutex_exit(&ldcp->ldc_cblock); 1275 return (LDC_SUCCESS); 1276 } 1277 mutex_exit(&ldcp->status_lock); 1278 1279 if (event & LDC_EVT_UP) { 1280 /* 1281 * Channel has come up. 1282 */ 1283 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1284 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1285 1286 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1287 1288 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1289 } 1290 1291 if (event & LDC_EVT_READ) { 1292 /* 1293 * Data available for reading. 1294 */ 1295 D2(vswp, "%s: id(ld) event(%llx) data READ", 1296 __func__, ldcp->ldc_id, event); 1297 1298 if (ldcp->rx_thread != NULL) { 1299 /* 1300 * If the receive thread is enabled, then 1301 * wakeup the receive thread to process the 1302 * LDC messages. 1303 */ 1304 mutex_exit(&ldcp->ldc_cblock); 1305 mutex_enter(&ldcp->rx_thr_lock); 1306 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1307 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1308 cv_signal(&ldcp->rx_thr_cv); 1309 } 1310 mutex_exit(&ldcp->rx_thr_lock); 1311 mutex_enter(&ldcp->ldc_cblock); 1312 } else { 1313 vsw_process_pkt(ldcp); 1314 } 1315 1316 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1317 1318 goto vsw_cb_exit; 1319 } 1320 1321 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1322 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1323 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1324 1325 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1326 } 1327 1328 /* 1329 * Catch either LDC_EVT_WRITE which we don't support or any 1330 * unknown event. 1331 */ 1332 if (event & 1333 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1334 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1335 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1336 } 1337 1338 vsw_cb_exit: 1339 mutex_exit(&ldcp->ldc_cblock); 1340 1341 /* 1342 * Let the drain function know we are finishing if it 1343 * is waiting. 1344 */ 1345 mutex_enter(&ldcp->drain_cv_lock); 1346 if (ldcp->drain_state == VSW_LDC_DRAINING) 1347 cv_signal(&ldcp->drain_cv); 1348 mutex_exit(&ldcp->drain_cv_lock); 1349 1350 return (LDC_SUCCESS); 1351 } 1352 1353 /* 1354 * Reinitialise data structures associated with the channel. 1355 */ 1356 static void 1357 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1358 { 1359 vsw_t *vswp = ldcp->ldc_vswp; 1360 vsw_port_t *port; 1361 vsw_ldc_list_t *ldcl; 1362 1363 D1(vswp, "%s: enter", __func__); 1364 1365 port = ldcp->ldc_port; 1366 ldcl = &port->p_ldclist; 1367 1368 READ_ENTER(&ldcl->lockrw); 1369 1370 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1371 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1372 1373 vsw_free_lane_resources(ldcp, INBOUND); 1374 vsw_free_lane_resources(ldcp, OUTBOUND); 1375 RW_EXIT(&ldcl->lockrw); 1376 1377 ldcp->lane_in.lstate = 0; 1378 ldcp->lane_out.lstate = 0; 1379 1380 /* Remove the fdb entry for this port/mac address */ 1381 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1382 1383 /* remove the port from vlans it has been assigned to */ 1384 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1385 1386 /* 1387 * Remove parent port from any multicast groups 1388 * it may have registered with. Client must resend 1389 * multicast add command after handshake completes. 1390 */ 1391 vsw_del_mcst_port(port); 1392 1393 ldcp->peer_session = 0; 1394 ldcp->session_status = 0; 1395 ldcp->hcnt = 0; 1396 ldcp->hphase = VSW_MILESTONE0; 1397 1398 vsw_reset_vnet_proto_ops(ldcp); 1399 1400 D1(vswp, "%s: exit", __func__); 1401 } 1402 1403 /* 1404 * Process a connection event. 1405 * 1406 * Note - care must be taken to ensure that this function is 1407 * not called with the dlistrw lock held. 1408 */ 1409 static void 1410 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1411 { 1412 vsw_t *vswp = ldcp->ldc_vswp; 1413 vsw_conn_evt_t *conn = NULL; 1414 1415 D1(vswp, "%s: enter", __func__); 1416 1417 /* 1418 * Check if either a reset or restart event is pending 1419 * or in progress. If so just return. 1420 * 1421 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1422 * being received by the callback handler, or a ECONNRESET error 1423 * code being returned from a ldc_read() or ldc_write() call. 1424 * 1425 * A VSW_CONN_RESTART event occurs when some error checking code 1426 * decides that there is a problem with data from the channel, 1427 * and that the handshake should be restarted. 1428 */ 1429 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1430 (ldstub((uint8_t *)&ldcp->reset_active))) 1431 return; 1432 1433 /* 1434 * If it is an LDC_UP event we first check the recorded 1435 * state of the channel. If this is UP then we know that 1436 * the channel moving to the UP state has already been dealt 1437 * with and don't need to dispatch a new task. 1438 * 1439 * The reason for this check is that when we do a ldc_up(), 1440 * depending on the state of the peer, we may or may not get 1441 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1442 * every time we do ldc_up() we explicitly check the channel 1443 * status to see has it come up (ldc_up() is asynch and will 1444 * complete at some undefined time), and take the appropriate 1445 * action. 1446 * 1447 * The flip side of this is that we may get a LDC_UP event 1448 * when we have already seen that the channel is up and have 1449 * dealt with that. 1450 */ 1451 mutex_enter(&ldcp->status_lock); 1452 if (evt == VSW_CONN_UP) { 1453 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1454 mutex_exit(&ldcp->status_lock); 1455 return; 1456 } 1457 } 1458 mutex_exit(&ldcp->status_lock); 1459 1460 /* 1461 * The transaction group id allows us to identify and discard 1462 * any tasks which are still pending on the taskq and refer 1463 * to the handshake session we are about to restart or reset. 1464 * These stale messages no longer have any real meaning. 1465 */ 1466 (void) atomic_inc_32(&ldcp->hss_id); 1467 1468 ASSERT(vswp->taskq_p != NULL); 1469 1470 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1471 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1472 " connection event", vswp->instance); 1473 goto err_exit; 1474 } 1475 1476 conn->evt = evt; 1477 conn->ldcp = ldcp; 1478 1479 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1480 DDI_NOSLEEP) != DDI_SUCCESS) { 1481 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1482 vswp->instance); 1483 1484 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1485 goto err_exit; 1486 } 1487 1488 D1(vswp, "%s: exit", __func__); 1489 return; 1490 1491 err_exit: 1492 /* 1493 * Have mostly likely failed due to memory shortage. Clear the flag so 1494 * that future requests will at least be attempted and will hopefully 1495 * succeed. 1496 */ 1497 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1498 ldcp->reset_active = 0; 1499 } 1500 1501 /* 1502 * Deal with events relating to a connection. Invoked from a taskq. 1503 */ 1504 static void 1505 vsw_conn_task(void *arg) 1506 { 1507 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1508 vsw_ldc_t *ldcp = NULL; 1509 vsw_t *vswp = NULL; 1510 uint16_t evt; 1511 ldc_status_t curr_status; 1512 1513 ldcp = conn->ldcp; 1514 evt = conn->evt; 1515 vswp = ldcp->ldc_vswp; 1516 1517 D1(vswp, "%s: enter", __func__); 1518 1519 /* can safely free now have copied out data */ 1520 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1521 1522 mutex_enter(&ldcp->status_lock); 1523 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1524 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1525 "channel %ld", vswp->instance, ldcp->ldc_id); 1526 mutex_exit(&ldcp->status_lock); 1527 return; 1528 } 1529 1530 /* 1531 * If we wish to restart the handshake on this channel, then if 1532 * the channel is UP we bring it DOWN to flush the underlying 1533 * ldc queue. 1534 */ 1535 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1536 (void) ldc_down(ldcp->ldc_handle); 1537 1538 /* 1539 * re-init all the associated data structures. 1540 */ 1541 vsw_ldc_reinit(ldcp); 1542 1543 /* 1544 * Bring the channel back up (note it does no harm to 1545 * do this even if the channel is already UP, Just 1546 * becomes effectively a no-op). 1547 */ 1548 (void) ldc_up(ldcp->ldc_handle); 1549 1550 /* 1551 * Check if channel is now UP. This will only happen if 1552 * peer has also done a ldc_up(). 1553 */ 1554 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1555 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1556 "channel %ld", vswp->instance, ldcp->ldc_id); 1557 mutex_exit(&ldcp->status_lock); 1558 return; 1559 } 1560 1561 ldcp->ldc_status = curr_status; 1562 1563 /* channel UP so restart handshake by sending version info */ 1564 if (curr_status == LDC_UP) { 1565 if (ldcp->hcnt++ > vsw_num_handshakes) { 1566 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1567 " handshake attempts (%d) on channel %ld", 1568 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1569 mutex_exit(&ldcp->status_lock); 1570 return; 1571 } 1572 1573 if (vsw_obp_ver_proto_workaround == B_FALSE && 1574 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1575 DDI_NOSLEEP) != DDI_SUCCESS)) { 1576 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1577 vswp->instance); 1578 1579 /* 1580 * Don't count as valid restart attempt if couldn't 1581 * send version msg. 1582 */ 1583 if (ldcp->hcnt > 0) 1584 ldcp->hcnt--; 1585 } 1586 } 1587 1588 /* 1589 * Mark that the process is complete by clearing the flag. 1590 * 1591 * Note is it possible that the taskq dispatch above may have failed, 1592 * most likely due to memory shortage. We still clear the flag so 1593 * future attempts will at least be attempted and will hopefully 1594 * succeed. 1595 */ 1596 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1597 ldcp->reset_active = 0; 1598 1599 mutex_exit(&ldcp->status_lock); 1600 1601 D1(vswp, "%s: exit", __func__); 1602 } 1603 1604 /* 1605 * returns 0 if legal for event signified by flag to have 1606 * occured at the time it did. Otherwise returns 1. 1607 */ 1608 int 1609 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1610 { 1611 vsw_t *vswp = ldcp->ldc_vswp; 1612 uint64_t state; 1613 uint64_t phase; 1614 1615 if (dir == INBOUND) 1616 state = ldcp->lane_in.lstate; 1617 else 1618 state = ldcp->lane_out.lstate; 1619 1620 phase = ldcp->hphase; 1621 1622 switch (flag) { 1623 case VSW_VER_INFO_RECV: 1624 if (phase > VSW_MILESTONE0) { 1625 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1626 " when in state %d\n", ldcp->ldc_id, phase); 1627 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1628 return (1); 1629 } 1630 break; 1631 1632 case VSW_VER_ACK_RECV: 1633 case VSW_VER_NACK_RECV: 1634 if (!(state & VSW_VER_INFO_SENT)) { 1635 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1636 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1637 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1638 return (1); 1639 } else 1640 state &= ~VSW_VER_INFO_SENT; 1641 break; 1642 1643 case VSW_ATTR_INFO_RECV: 1644 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1645 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1646 " when in state %d\n", ldcp->ldc_id, phase); 1647 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1648 return (1); 1649 } 1650 break; 1651 1652 case VSW_ATTR_ACK_RECV: 1653 case VSW_ATTR_NACK_RECV: 1654 if (!(state & VSW_ATTR_INFO_SENT)) { 1655 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1656 " or ATTR_NACK when in state %d\n", 1657 ldcp->ldc_id, phase); 1658 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1659 return (1); 1660 } else 1661 state &= ~VSW_ATTR_INFO_SENT; 1662 break; 1663 1664 case VSW_DRING_INFO_RECV: 1665 if (phase < VSW_MILESTONE1) { 1666 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1667 " when in state %d\n", ldcp->ldc_id, phase); 1668 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1669 return (1); 1670 } 1671 break; 1672 1673 case VSW_DRING_ACK_RECV: 1674 case VSW_DRING_NACK_RECV: 1675 if (!(state & VSW_DRING_INFO_SENT)) { 1676 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1677 " or DRING_NACK when in state %d\n", 1678 ldcp->ldc_id, phase); 1679 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1680 return (1); 1681 } else 1682 state &= ~VSW_DRING_INFO_SENT; 1683 break; 1684 1685 case VSW_RDX_INFO_RECV: 1686 if (phase < VSW_MILESTONE3) { 1687 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1688 " when in state %d\n", ldcp->ldc_id, phase); 1689 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1690 return (1); 1691 } 1692 break; 1693 1694 case VSW_RDX_ACK_RECV: 1695 case VSW_RDX_NACK_RECV: 1696 if (!(state & VSW_RDX_INFO_SENT)) { 1697 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1698 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1699 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1700 return (1); 1701 } else 1702 state &= ~VSW_RDX_INFO_SENT; 1703 break; 1704 1705 case VSW_MCST_INFO_RECV: 1706 if (phase < VSW_MILESTONE3) { 1707 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1708 " when in state %d\n", ldcp->ldc_id, phase); 1709 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1710 return (1); 1711 } 1712 break; 1713 1714 default: 1715 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1716 ldcp->ldc_id, flag); 1717 return (1); 1718 } 1719 1720 if (dir == INBOUND) 1721 ldcp->lane_in.lstate = state; 1722 else 1723 ldcp->lane_out.lstate = state; 1724 1725 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1726 1727 return (0); 1728 } 1729 1730 void 1731 vsw_next_milestone(vsw_ldc_t *ldcp) 1732 { 1733 vsw_t *vswp = ldcp->ldc_vswp; 1734 1735 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1736 ldcp->ldc_id, ldcp->hphase); 1737 1738 DUMP_FLAGS(ldcp->lane_in.lstate); 1739 DUMP_FLAGS(ldcp->lane_out.lstate); 1740 1741 switch (ldcp->hphase) { 1742 1743 case VSW_MILESTONE0: 1744 /* 1745 * If we haven't started to handshake with our peer, 1746 * start to do so now. 1747 */ 1748 if (ldcp->lane_out.lstate == 0) { 1749 D2(vswp, "%s: (chan %lld) starting handshake " 1750 "with peer", __func__, ldcp->ldc_id); 1751 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1752 } 1753 1754 /* 1755 * Only way to pass this milestone is to have successfully 1756 * negotiated version info. 1757 */ 1758 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1759 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1760 1761 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1762 __func__, ldcp->ldc_id); 1763 1764 vsw_set_vnet_proto_ops(ldcp); 1765 1766 /* 1767 * Next milestone is passed when attribute 1768 * information has been successfully exchanged. 1769 */ 1770 ldcp->hphase = VSW_MILESTONE1; 1771 vsw_send_attr(ldcp); 1772 1773 } 1774 break; 1775 1776 case VSW_MILESTONE1: 1777 /* 1778 * Only way to pass this milestone is to have successfully 1779 * negotiated attribute information. 1780 */ 1781 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1782 1783 ldcp->hphase = VSW_MILESTONE2; 1784 1785 /* 1786 * If the peer device has said it wishes to 1787 * use descriptor rings then we send it our ring 1788 * info, otherwise we just set up a private ring 1789 * which we use an internal buffer 1790 */ 1791 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1792 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1793 (VSW_VER_LT(ldcp, 1, 2) && 1794 (ldcp->lane_in.xfer_mode == 1795 VIO_DRING_MODE_V1_0))) { 1796 vsw_send_dring_info(ldcp); 1797 } 1798 } 1799 break; 1800 1801 case VSW_MILESTONE2: 1802 /* 1803 * If peer has indicated in its attribute message that 1804 * it wishes to use descriptor rings then the only way 1805 * to pass this milestone is for us to have received 1806 * valid dring info. 1807 * 1808 * If peer is not using descriptor rings then just fall 1809 * through. 1810 */ 1811 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1812 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1813 (VSW_VER_LT(ldcp, 1, 2) && 1814 (ldcp->lane_in.xfer_mode == 1815 VIO_DRING_MODE_V1_0))) { 1816 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1817 break; 1818 } 1819 1820 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1821 __func__, ldcp->ldc_id); 1822 1823 ldcp->hphase = VSW_MILESTONE3; 1824 vsw_send_rdx(ldcp); 1825 break; 1826 1827 case VSW_MILESTONE3: 1828 /* 1829 * Pass this milestone when all paramaters have been 1830 * successfully exchanged and RDX sent in both directions. 1831 * 1832 * Mark outbound lane as available to transmit data. 1833 */ 1834 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 1835 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 1836 1837 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1838 __func__, ldcp->ldc_id); 1839 D2(vswp, "%s: ** handshake complete (0x%llx : " 1840 "0x%llx) **", __func__, ldcp->lane_in.lstate, 1841 ldcp->lane_out.lstate); 1842 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 1843 ldcp->hphase = VSW_MILESTONE4; 1844 ldcp->hcnt = 0; 1845 DISPLAY_STATE(); 1846 } else { 1847 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1848 __func__, ldcp->lane_in.lstate, 1849 ldcp->lane_out.lstate); 1850 } 1851 break; 1852 1853 case VSW_MILESTONE4: 1854 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1855 ldcp->ldc_id); 1856 break; 1857 1858 default: 1859 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1860 ldcp->ldc_id, ldcp->hphase); 1861 } 1862 1863 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1864 ldcp->hphase); 1865 } 1866 1867 /* 1868 * Check if major version is supported. 1869 * 1870 * Returns 0 if finds supported major number, and if necessary 1871 * adjusts the minor field. 1872 * 1873 * Returns 1 if can't match major number exactly. Sets mjor/minor 1874 * to next lowest support values, or to zero if no other values possible. 1875 */ 1876 static int 1877 vsw_supported_version(vio_ver_msg_t *vp) 1878 { 1879 int i; 1880 1881 D1(NULL, "vsw_supported_version: enter"); 1882 1883 for (i = 0; i < VSW_NUM_VER; i++) { 1884 if (vsw_versions[i].ver_major == vp->ver_major) { 1885 /* 1886 * Matching or lower major version found. Update 1887 * minor number if necessary. 1888 */ 1889 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1890 D2(NULL, "%s: adjusting minor value from %d " 1891 "to %d", __func__, vp->ver_minor, 1892 vsw_versions[i].ver_minor); 1893 vp->ver_minor = vsw_versions[i].ver_minor; 1894 } 1895 1896 return (0); 1897 } 1898 1899 /* 1900 * If the message contains a higher major version number, set 1901 * the message's major/minor versions to the current values 1902 * and return false, so this message will get resent with 1903 * these values. 1904 */ 1905 if (vsw_versions[i].ver_major < vp->ver_major) { 1906 D2(NULL, "%s: adjusting major and minor " 1907 "values to %d, %d\n", 1908 __func__, vsw_versions[i].ver_major, 1909 vsw_versions[i].ver_minor); 1910 vp->ver_major = vsw_versions[i].ver_major; 1911 vp->ver_minor = vsw_versions[i].ver_minor; 1912 return (1); 1913 } 1914 } 1915 1916 /* No match was possible, zero out fields */ 1917 vp->ver_major = 0; 1918 vp->ver_minor = 0; 1919 1920 D1(NULL, "vsw_supported_version: exit"); 1921 1922 return (1); 1923 } 1924 1925 /* 1926 * Set vnet-protocol-version dependent functions based on version. 1927 */ 1928 static void 1929 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1930 { 1931 vsw_t *vswp = ldcp->ldc_vswp; 1932 lane_t *lp = &ldcp->lane_out; 1933 1934 if (VSW_VER_GTEQ(ldcp, 1, 3)) { 1935 /* 1936 * If the version negotiated with peer is >= 1.3, 1937 * set the mtu in our attributes to max_frame_size. 1938 */ 1939 lp->mtu = vswp->max_frame_size; 1940 } else { 1941 vsw_port_t *portp = ldcp->ldc_port; 1942 /* 1943 * Pre-1.3 peers expect max frame size of ETHERMAX. 1944 * We can negotiate that size with those peers provided the 1945 * following conditions are true: 1946 * - Our max_frame_size is greater only by VLAN_TAGSZ (4). 1947 * - Only pvid is defined for our peer and there are no vids. 1948 * If the above conditions are true, then we can send/recv only 1949 * untagged frames of max size ETHERMAX. Note that pvid of the 1950 * peer can be different, as vsw has to serve the vnet in that 1951 * vlan even if itself is not assigned to that vlan. 1952 */ 1953 if ((vswp->max_frame_size == ETHERMAX + VLAN_TAGSZ) && 1954 portp->nvids == 0) { 1955 lp->mtu = ETHERMAX; 1956 } 1957 } 1958 1959 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 1960 /* Versions >= 1.2 */ 1961 1962 if (VSW_PRI_ETH_DEFINED(vswp)) { 1963 /* 1964 * enable priority routines and pkt mode only if 1965 * at least one pri-eth-type is specified in MD. 1966 */ 1967 ldcp->tx = vsw_ldctx_pri; 1968 ldcp->rx_pktdata = vsw_process_pkt_data; 1969 1970 /* set xfer mode for vsw_send_attr() */ 1971 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 1972 } else { 1973 /* no priority eth types defined in MD */ 1974 1975 ldcp->tx = vsw_ldctx; 1976 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 1977 1978 /* set xfer mode for vsw_send_attr() */ 1979 lp->xfer_mode = VIO_DRING_MODE_V1_2; 1980 } 1981 1982 } else { 1983 /* Versions prior to 1.2 */ 1984 1985 vsw_reset_vnet_proto_ops(ldcp); 1986 } 1987 } 1988 1989 /* 1990 * Reset vnet-protocol-version dependent functions to v1.0. 1991 */ 1992 static void 1993 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 1994 { 1995 lane_t *lp = &ldcp->lane_out; 1996 1997 ldcp->tx = vsw_ldctx; 1998 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 1999 2000 /* set xfer mode for vsw_send_attr() */ 2001 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2002 } 2003 2004 /* 2005 * Main routine for processing messages received over LDC. 2006 */ 2007 static void 2008 vsw_process_pkt(void *arg) 2009 { 2010 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2011 vsw_t *vswp = ldcp->ldc_vswp; 2012 size_t msglen; 2013 vio_msg_tag_t *tagp; 2014 uint64_t *ldcmsg; 2015 int rv = 0; 2016 2017 2018 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2019 2020 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2021 2022 ldcmsg = ldcp->ldcmsg; 2023 /* 2024 * If channel is up read messages until channel is empty. 2025 */ 2026 do { 2027 msglen = ldcp->msglen; 2028 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2029 2030 if (rv != 0) { 2031 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2032 __func__, ldcp->ldc_id, rv, msglen); 2033 } 2034 2035 /* channel has been reset */ 2036 if (rv == ECONNRESET) { 2037 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2038 break; 2039 } 2040 2041 if (msglen == 0) { 2042 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2043 ldcp->ldc_id); 2044 break; 2045 } 2046 2047 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2048 ldcp->ldc_id, msglen); 2049 2050 /* 2051 * Figure out what sort of packet we have gotten by 2052 * examining the msg tag, and then switch it appropriately. 2053 */ 2054 tagp = (vio_msg_tag_t *)ldcmsg; 2055 2056 switch (tagp->vio_msgtype) { 2057 case VIO_TYPE_CTRL: 2058 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2059 break; 2060 case VIO_TYPE_DATA: 2061 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2062 break; 2063 case VIO_TYPE_ERR: 2064 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2065 break; 2066 default: 2067 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2068 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2069 break; 2070 } 2071 } while (msglen); 2072 2073 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2074 } 2075 2076 /* 2077 * Dispatch a task to process a VIO control message. 2078 */ 2079 static void 2080 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2081 { 2082 vsw_ctrl_task_t *ctaskp = NULL; 2083 vsw_port_t *port = ldcp->ldc_port; 2084 vsw_t *vswp = port->p_vswp; 2085 2086 D1(vswp, "%s: enter", __func__); 2087 2088 /* 2089 * We need to handle RDX ACK messages in-band as once they 2090 * are exchanged it is possible that we will get an 2091 * immediate (legitimate) data packet. 2092 */ 2093 if ((tagp->vio_subtype_env == VIO_RDX) && 2094 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2095 2096 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2097 return; 2098 2099 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2100 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2101 "(ostate 0x%llx : hphase %d)", __func__, 2102 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2103 vsw_next_milestone(ldcp); 2104 return; 2105 } 2106 2107 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2108 2109 if (ctaskp == NULL) { 2110 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2111 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2112 return; 2113 } 2114 2115 ctaskp->ldcp = ldcp; 2116 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2117 ctaskp->hss_id = ldcp->hss_id; 2118 2119 /* 2120 * Dispatch task to processing taskq if port is not in 2121 * the process of being detached. 2122 */ 2123 mutex_enter(&port->state_lock); 2124 if (port->state == VSW_PORT_INIT) { 2125 if ((vswp->taskq_p == NULL) || 2126 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2127 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2128 DERR(vswp, "%s: unable to dispatch task to taskq", 2129 __func__); 2130 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2131 mutex_exit(&port->state_lock); 2132 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2133 return; 2134 } 2135 } else { 2136 DWARN(vswp, "%s: port %d detaching, not dispatching " 2137 "task", __func__, port->p_instance); 2138 } 2139 2140 mutex_exit(&port->state_lock); 2141 2142 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2143 ldcp->ldc_id); 2144 D1(vswp, "%s: exit", __func__); 2145 } 2146 2147 /* 2148 * Process a VIO ctrl message. Invoked from taskq. 2149 */ 2150 static void 2151 vsw_process_ctrl_pkt(void *arg) 2152 { 2153 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2154 vsw_ldc_t *ldcp = ctaskp->ldcp; 2155 vsw_t *vswp = ldcp->ldc_vswp; 2156 vio_msg_tag_t tag; 2157 uint16_t env; 2158 2159 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2160 2161 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2162 env = tag.vio_subtype_env; 2163 2164 /* stale pkt check */ 2165 if (ctaskp->hss_id < ldcp->hss_id) { 2166 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2167 " (%ld) handshake session", __func__, ctaskp->hss_id); 2168 return; 2169 } 2170 2171 /* session id check */ 2172 if (ldcp->session_status & VSW_PEER_SESSION) { 2173 if (ldcp->peer_session != tag.vio_sid) { 2174 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2175 __func__, ldcp->ldc_id, tag.vio_sid); 2176 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2177 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2178 return; 2179 } 2180 } 2181 2182 /* 2183 * Switch on vio_subtype envelope, then let lower routines 2184 * decide if its an INFO, ACK or NACK packet. 2185 */ 2186 switch (env) { 2187 case VIO_VER_INFO: 2188 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2189 break; 2190 case VIO_DRING_REG: 2191 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2192 break; 2193 case VIO_DRING_UNREG: 2194 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2195 break; 2196 case VIO_ATTR_INFO: 2197 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2198 break; 2199 case VNET_MCAST_INFO: 2200 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2201 break; 2202 case VIO_RDX: 2203 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2204 break; 2205 default: 2206 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2207 } 2208 2209 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2210 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2211 } 2212 2213 /* 2214 * Version negotiation. We can end up here either because our peer 2215 * has responded to a handshake message we have sent it, or our peer 2216 * has initiated a handshake with us. If its the former then can only 2217 * be ACK or NACK, if its the later can only be INFO. 2218 * 2219 * If its an ACK we move to the next stage of the handshake, namely 2220 * attribute exchange. If its a NACK we see if we can specify another 2221 * version, if we can't we stop. 2222 * 2223 * If it is an INFO we reset all params associated with communication 2224 * in that direction over this channel (remember connection is 2225 * essentially 2 independent simplex channels). 2226 */ 2227 void 2228 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2229 { 2230 vio_ver_msg_t *ver_pkt; 2231 vsw_t *vswp = ldcp->ldc_vswp; 2232 2233 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2234 2235 /* 2236 * We know this is a ctrl/version packet so 2237 * cast it into the correct structure. 2238 */ 2239 ver_pkt = (vio_ver_msg_t *)pkt; 2240 2241 switch (ver_pkt->tag.vio_subtype) { 2242 case VIO_SUBTYPE_INFO: 2243 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2244 2245 /* 2246 * Record the session id, which we will use from now 2247 * until we see another VER_INFO msg. Even then the 2248 * session id in most cases will be unchanged, execpt 2249 * if channel was reset. 2250 */ 2251 if ((ldcp->session_status & VSW_PEER_SESSION) && 2252 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2253 DERR(vswp, "%s: updating session id for chan %lld " 2254 "from %llx to %llx", __func__, ldcp->ldc_id, 2255 ldcp->peer_session, ver_pkt->tag.vio_sid); 2256 } 2257 2258 ldcp->peer_session = ver_pkt->tag.vio_sid; 2259 ldcp->session_status |= VSW_PEER_SESSION; 2260 2261 /* Legal message at this time ? */ 2262 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2263 return; 2264 2265 /* 2266 * First check the device class. Currently only expect 2267 * to be talking to a network device. In the future may 2268 * also talk to another switch. 2269 */ 2270 if (ver_pkt->dev_class != VDEV_NETWORK) { 2271 DERR(vswp, "%s: illegal device class %d", __func__, 2272 ver_pkt->dev_class); 2273 2274 ver_pkt->tag.vio_sid = ldcp->local_session; 2275 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2276 2277 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2278 2279 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2280 sizeof (vio_ver_msg_t), B_TRUE); 2281 2282 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2283 vsw_next_milestone(ldcp); 2284 return; 2285 } else { 2286 ldcp->dev_class = ver_pkt->dev_class; 2287 } 2288 2289 /* 2290 * Now check the version. 2291 */ 2292 if (vsw_supported_version(ver_pkt) == 0) { 2293 /* 2294 * Support this major version and possibly 2295 * adjusted minor version. 2296 */ 2297 2298 D2(vswp, "%s: accepted ver %d:%d", __func__, 2299 ver_pkt->ver_major, ver_pkt->ver_minor); 2300 2301 /* Store accepted values */ 2302 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2303 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2304 2305 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2306 2307 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2308 2309 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2310 /* 2311 * Send a version info message 2312 * using the accepted version that 2313 * we are about to ack. Also note that 2314 * we send our ver info before we ack. 2315 * Otherwise, as soon as receiving the 2316 * ack, obp sends attr info msg, which 2317 * breaks vsw_check_flag() invoked 2318 * from vsw_process_ctrl_attr_pkt(); 2319 * as we also need VSW_VER_ACK_RECV to 2320 * be set in lane_out.lstate, before 2321 * we can receive attr info. 2322 */ 2323 vsw_send_ver(ldcp); 2324 } 2325 } else { 2326 /* 2327 * NACK back with the next lower major/minor 2328 * pairing we support (if don't suuport any more 2329 * versions then they will be set to zero. 2330 */ 2331 2332 D2(vswp, "%s: replying with ver %d:%d", __func__, 2333 ver_pkt->ver_major, ver_pkt->ver_minor); 2334 2335 /* Store updated values */ 2336 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2337 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2338 2339 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2340 2341 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2342 } 2343 2344 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2345 ver_pkt->tag.vio_sid = ldcp->local_session; 2346 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2347 sizeof (vio_ver_msg_t), B_TRUE); 2348 2349 vsw_next_milestone(ldcp); 2350 break; 2351 2352 case VIO_SUBTYPE_ACK: 2353 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2354 2355 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2356 return; 2357 2358 /* Store updated values */ 2359 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2360 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2361 2362 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2363 vsw_next_milestone(ldcp); 2364 2365 break; 2366 2367 case VIO_SUBTYPE_NACK: 2368 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2369 2370 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2371 return; 2372 2373 /* 2374 * If our peer sent us a NACK with the ver fields set to 2375 * zero then there is nothing more we can do. Otherwise see 2376 * if we support either the version suggested, or a lesser 2377 * one. 2378 */ 2379 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2380 DERR(vswp, "%s: peer unable to negotiate any " 2381 "further.", __func__); 2382 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2383 vsw_next_milestone(ldcp); 2384 return; 2385 } 2386 2387 /* 2388 * Check to see if we support this major version or 2389 * a lower one. If we don't then maj/min will be set 2390 * to zero. 2391 */ 2392 (void) vsw_supported_version(ver_pkt); 2393 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2394 /* Nothing more we can do */ 2395 DERR(vswp, "%s: version negotiation failed.\n", 2396 __func__); 2397 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2398 vsw_next_milestone(ldcp); 2399 } else { 2400 /* found a supported major version */ 2401 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2402 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2403 2404 D2(vswp, "%s: resending with updated values (%x, %x)", 2405 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2406 2407 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2408 ver_pkt->tag.vio_sid = ldcp->local_session; 2409 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2410 2411 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2412 2413 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2414 sizeof (vio_ver_msg_t), B_TRUE); 2415 2416 vsw_next_milestone(ldcp); 2417 2418 } 2419 break; 2420 2421 default: 2422 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2423 ver_pkt->tag.vio_subtype); 2424 } 2425 2426 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2427 } 2428 2429 /* 2430 * Process an attribute packet. We can end up here either because our peer 2431 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2432 * peer has sent us an attribute INFO message 2433 * 2434 * If its an ACK we then move to the next stage of the handshake which 2435 * is to send our descriptor ring info to our peer. If its a NACK then 2436 * there is nothing more we can (currently) do. 2437 * 2438 * If we get a valid/acceptable INFO packet (and we have already negotiated 2439 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2440 * NACK back and reset channel state to INACTIV. 2441 * 2442 * FUTURE: in time we will probably negotiate over attributes, but for 2443 * the moment unacceptable attributes are regarded as a fatal error. 2444 * 2445 */ 2446 void 2447 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2448 { 2449 vnet_attr_msg_t *attr_pkt; 2450 vsw_t *vswp = ldcp->ldc_vswp; 2451 vsw_port_t *port = ldcp->ldc_port; 2452 uint64_t macaddr = 0; 2453 int i; 2454 2455 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2456 2457 /* 2458 * We know this is a ctrl/attr packet so 2459 * cast it into the correct structure. 2460 */ 2461 attr_pkt = (vnet_attr_msg_t *)pkt; 2462 2463 switch (attr_pkt->tag.vio_subtype) { 2464 case VIO_SUBTYPE_INFO: 2465 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2466 2467 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2468 return; 2469 2470 /* 2471 * If the attributes are unacceptable then we NACK back. 2472 */ 2473 if (vsw_check_attr(attr_pkt, ldcp)) { 2474 2475 DERR(vswp, "%s (chan %d): invalid attributes", 2476 __func__, ldcp->ldc_id); 2477 2478 vsw_free_lane_resources(ldcp, INBOUND); 2479 2480 attr_pkt->tag.vio_sid = ldcp->local_session; 2481 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2482 2483 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2484 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2485 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2486 sizeof (vnet_attr_msg_t), B_TRUE); 2487 2488 vsw_next_milestone(ldcp); 2489 return; 2490 } 2491 2492 /* 2493 * Otherwise store attributes for this lane and update 2494 * lane state. 2495 */ 2496 ldcp->lane_in.mtu = attr_pkt->mtu; 2497 ldcp->lane_in.addr = attr_pkt->addr; 2498 ldcp->lane_in.addr_type = attr_pkt->addr_type; 2499 ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 2500 ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 2501 2502 macaddr = ldcp->lane_in.addr; 2503 for (i = ETHERADDRL - 1; i >= 0; i--) { 2504 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2505 macaddr >>= 8; 2506 } 2507 2508 /* create the fdb entry for this port/mac address */ 2509 vsw_fdbe_add(vswp, port); 2510 2511 /* add the port to the specified vlans */ 2512 vsw_vlan_add_ids(port, VSW_VNETPORT); 2513 2514 /* setup device specifc xmit routines */ 2515 mutex_enter(&port->tx_lock); 2516 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2517 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2518 (VSW_VER_LT(ldcp, 1, 2) && 2519 (ldcp->lane_in.xfer_mode == VIO_DRING_MODE_V1_0))) { 2520 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2521 port->transmit = vsw_dringsend; 2522 } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 2523 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2524 vsw_create_privring(ldcp); 2525 port->transmit = vsw_descrsend; 2526 ldcp->lane_out.xfer_mode = VIO_DESC_MODE; 2527 } 2528 mutex_exit(&port->tx_lock); 2529 2530 attr_pkt->tag.vio_sid = ldcp->local_session; 2531 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2532 2533 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2534 2535 ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 2536 2537 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2538 sizeof (vnet_attr_msg_t), B_TRUE); 2539 2540 vsw_next_milestone(ldcp); 2541 break; 2542 2543 case VIO_SUBTYPE_ACK: 2544 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2545 2546 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2547 return; 2548 2549 ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 2550 vsw_next_milestone(ldcp); 2551 break; 2552 2553 case VIO_SUBTYPE_NACK: 2554 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2555 2556 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2557 return; 2558 2559 ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 2560 vsw_next_milestone(ldcp); 2561 break; 2562 2563 default: 2564 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2565 attr_pkt->tag.vio_subtype); 2566 } 2567 2568 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2569 } 2570 2571 /* 2572 * Process a dring info packet. We can end up here either because our peer 2573 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2574 * peer has sent us a dring INFO message. 2575 * 2576 * If we get a valid/acceptable INFO packet (and we have already negotiated 2577 * a version) we ACK back and update the lane state, otherwise we NACK back. 2578 * 2579 * FUTURE: nothing to stop client from sending us info on multiple dring's 2580 * but for the moment we will just use the first one we are given. 2581 * 2582 */ 2583 void 2584 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2585 { 2586 vio_dring_reg_msg_t *dring_pkt; 2587 vsw_t *vswp = ldcp->ldc_vswp; 2588 ldc_mem_info_t minfo; 2589 dring_info_t *dp, *dbp; 2590 int dring_found = 0; 2591 2592 /* 2593 * We know this is a ctrl/dring packet so 2594 * cast it into the correct structure. 2595 */ 2596 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2597 2598 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2599 2600 switch (dring_pkt->tag.vio_subtype) { 2601 case VIO_SUBTYPE_INFO: 2602 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2603 2604 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2605 return; 2606 2607 /* 2608 * If the dring params are unacceptable then we NACK back. 2609 */ 2610 if (vsw_check_dring_info(dring_pkt)) { 2611 2612 DERR(vswp, "%s (%lld): invalid dring info", 2613 __func__, ldcp->ldc_id); 2614 2615 vsw_free_lane_resources(ldcp, INBOUND); 2616 2617 dring_pkt->tag.vio_sid = ldcp->local_session; 2618 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2619 2620 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2621 2622 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2623 2624 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2625 sizeof (vio_dring_reg_msg_t), B_TRUE); 2626 2627 vsw_next_milestone(ldcp); 2628 return; 2629 } 2630 2631 /* 2632 * Otherwise, attempt to map in the dring using the 2633 * cookie. If that succeeds we send back a unique dring 2634 * identifier that the sending side will use in future 2635 * to refer to this descriptor ring. 2636 */ 2637 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2638 2639 dp->num_descriptors = dring_pkt->num_descriptors; 2640 dp->descriptor_size = dring_pkt->descriptor_size; 2641 dp->options = dring_pkt->options; 2642 dp->ncookies = dring_pkt->ncookies; 2643 2644 /* 2645 * Note: should only get one cookie. Enforced in 2646 * the ldc layer. 2647 */ 2648 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2649 sizeof (ldc_mem_cookie_t)); 2650 2651 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2652 dp->num_descriptors, dp->descriptor_size); 2653 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2654 dp->options, dp->ncookies); 2655 2656 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2657 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2658 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 2659 2660 DERR(vswp, "%s: dring_map failed\n", __func__); 2661 2662 kmem_free(dp, sizeof (dring_info_t)); 2663 vsw_free_lane_resources(ldcp, INBOUND); 2664 2665 dring_pkt->tag.vio_sid = ldcp->local_session; 2666 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2667 2668 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2669 2670 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2671 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2672 sizeof (vio_dring_reg_msg_t), B_TRUE); 2673 2674 vsw_next_milestone(ldcp); 2675 return; 2676 } 2677 2678 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2679 2680 DERR(vswp, "%s: dring_addr failed\n", __func__); 2681 2682 kmem_free(dp, sizeof (dring_info_t)); 2683 vsw_free_lane_resources(ldcp, INBOUND); 2684 2685 dring_pkt->tag.vio_sid = ldcp->local_session; 2686 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2687 2688 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2689 2690 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2691 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2692 sizeof (vio_dring_reg_msg_t), B_TRUE); 2693 2694 vsw_next_milestone(ldcp); 2695 return; 2696 } else { 2697 /* store the address of the pub part of ring */ 2698 dp->pub_addr = minfo.vaddr; 2699 } 2700 2701 /* no private section as we are importing */ 2702 dp->priv_addr = NULL; 2703 2704 /* 2705 * Using simple mono increasing int for ident at 2706 * the moment. 2707 */ 2708 dp->ident = ldcp->next_ident; 2709 ldcp->next_ident++; 2710 2711 dp->end_idx = 0; 2712 dp->next = NULL; 2713 2714 /* 2715 * Link it onto the end of the list of drings 2716 * for this lane. 2717 */ 2718 if (ldcp->lane_in.dringp == NULL) { 2719 D2(vswp, "%s: adding first INBOUND dring", __func__); 2720 ldcp->lane_in.dringp = dp; 2721 } else { 2722 dbp = ldcp->lane_in.dringp; 2723 2724 while (dbp->next != NULL) 2725 dbp = dbp->next; 2726 2727 dbp->next = dp; 2728 } 2729 2730 /* acknowledge it */ 2731 dring_pkt->tag.vio_sid = ldcp->local_session; 2732 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2733 dring_pkt->dring_ident = dp->ident; 2734 2735 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2736 sizeof (vio_dring_reg_msg_t), B_TRUE); 2737 2738 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 2739 vsw_next_milestone(ldcp); 2740 break; 2741 2742 case VIO_SUBTYPE_ACK: 2743 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2744 2745 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 2746 return; 2747 2748 /* 2749 * Peer is acknowledging our dring info and will have 2750 * sent us a dring identifier which we will use to 2751 * refer to this ring w.r.t. our peer. 2752 */ 2753 dp = ldcp->lane_out.dringp; 2754 if (dp != NULL) { 2755 /* 2756 * Find the ring this ident should be associated 2757 * with. 2758 */ 2759 if (vsw_dring_match(dp, dring_pkt)) { 2760 dring_found = 1; 2761 2762 } else while (dp != NULL) { 2763 if (vsw_dring_match(dp, dring_pkt)) { 2764 dring_found = 1; 2765 break; 2766 } 2767 dp = dp->next; 2768 } 2769 2770 if (dring_found == 0) { 2771 DERR(NULL, "%s: unrecognised ring cookie", 2772 __func__); 2773 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2774 return; 2775 } 2776 2777 } else { 2778 DERR(vswp, "%s: DRING ACK received but no drings " 2779 "allocated", __func__); 2780 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2781 return; 2782 } 2783 2784 /* store ident */ 2785 dp->ident = dring_pkt->dring_ident; 2786 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 2787 vsw_next_milestone(ldcp); 2788 break; 2789 2790 case VIO_SUBTYPE_NACK: 2791 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2792 2793 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 2794 return; 2795 2796 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 2797 vsw_next_milestone(ldcp); 2798 break; 2799 2800 default: 2801 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2802 dring_pkt->tag.vio_subtype); 2803 } 2804 2805 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2806 } 2807 2808 /* 2809 * Process a request from peer to unregister a dring. 2810 * 2811 * For the moment we just restart the handshake if our 2812 * peer endpoint attempts to unregister a dring. 2813 */ 2814 void 2815 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 2816 { 2817 vsw_t *vswp = ldcp->ldc_vswp; 2818 vio_dring_unreg_msg_t *dring_pkt; 2819 2820 /* 2821 * We know this is a ctrl/dring packet so 2822 * cast it into the correct structure. 2823 */ 2824 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 2825 2826 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2827 2828 switch (dring_pkt->tag.vio_subtype) { 2829 case VIO_SUBTYPE_INFO: 2830 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2831 2832 DWARN(vswp, "%s: restarting handshake..", __func__); 2833 break; 2834 2835 case VIO_SUBTYPE_ACK: 2836 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2837 2838 DWARN(vswp, "%s: restarting handshake..", __func__); 2839 break; 2840 2841 case VIO_SUBTYPE_NACK: 2842 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2843 2844 DWARN(vswp, "%s: restarting handshake..", __func__); 2845 break; 2846 2847 default: 2848 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 2849 dring_pkt->tag.vio_subtype); 2850 } 2851 2852 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2853 2854 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2855 } 2856 2857 #define SND_MCST_NACK(ldcp, pkt) \ 2858 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 2859 pkt->tag.vio_sid = ldcp->local_session; \ 2860 (void) vsw_send_msg(ldcp, (void *)pkt, \ 2861 sizeof (vnet_mcast_msg_t), B_TRUE); 2862 2863 /* 2864 * Process a multicast request from a vnet. 2865 * 2866 * Vnet's specify a multicast address that they are interested in. This 2867 * address is used as a key into the hash table which forms the multicast 2868 * forwarding database (mFDB). 2869 * 2870 * The table keys are the multicast addresses, while the table entries 2871 * are pointers to lists of ports which wish to receive packets for the 2872 * specified multicast address. 2873 * 2874 * When a multicast packet is being switched we use the address as a key 2875 * into the hash table, and then walk the appropriate port list forwarding 2876 * the pkt to each port in turn. 2877 * 2878 * If a vnet is no longer interested in a particular multicast grouping 2879 * we simply find the correct location in the hash table and then delete 2880 * the relevant port from the port list. 2881 * 2882 * To deal with the case whereby a port is being deleted without first 2883 * removing itself from the lists in the hash table, we maintain a list 2884 * of multicast addresses the port has registered an interest in, within 2885 * the port structure itself. We then simply walk that list of addresses 2886 * using them as keys into the hash table and remove the port from the 2887 * appropriate lists. 2888 */ 2889 static void 2890 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 2891 { 2892 vnet_mcast_msg_t *mcst_pkt; 2893 vsw_port_t *port = ldcp->ldc_port; 2894 vsw_t *vswp = ldcp->ldc_vswp; 2895 int i; 2896 2897 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2898 2899 /* 2900 * We know this is a ctrl/mcast packet so 2901 * cast it into the correct structure. 2902 */ 2903 mcst_pkt = (vnet_mcast_msg_t *)pkt; 2904 2905 switch (mcst_pkt->tag.vio_subtype) { 2906 case VIO_SUBTYPE_INFO: 2907 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2908 2909 /* 2910 * Check if in correct state to receive a multicast 2911 * message (i.e. handshake complete). If not reset 2912 * the handshake. 2913 */ 2914 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 2915 return; 2916 2917 /* 2918 * Before attempting to add or remove address check 2919 * that they are valid multicast addresses. 2920 * If not, then NACK back. 2921 */ 2922 for (i = 0; i < mcst_pkt->count; i++) { 2923 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 2924 DERR(vswp, "%s: invalid multicast address", 2925 __func__); 2926 SND_MCST_NACK(ldcp, mcst_pkt); 2927 return; 2928 } 2929 } 2930 2931 /* 2932 * Now add/remove the addresses. If this fails we 2933 * NACK back. 2934 */ 2935 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 2936 SND_MCST_NACK(ldcp, mcst_pkt); 2937 return; 2938 } 2939 2940 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2941 mcst_pkt->tag.vio_sid = ldcp->local_session; 2942 2943 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 2944 2945 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 2946 sizeof (vnet_mcast_msg_t), B_TRUE); 2947 break; 2948 2949 case VIO_SUBTYPE_ACK: 2950 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2951 2952 /* 2953 * We shouldn't ever get a multicast ACK message as 2954 * at the moment we never request multicast addresses 2955 * to be set on some other device. This may change in 2956 * the future if we have cascading switches. 2957 */ 2958 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 2959 return; 2960 2961 /* Do nothing */ 2962 break; 2963 2964 case VIO_SUBTYPE_NACK: 2965 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2966 2967 /* 2968 * We shouldn't get a multicast NACK packet for the 2969 * same reasons as we shouldn't get a ACK packet. 2970 */ 2971 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 2972 return; 2973 2974 /* Do nothing */ 2975 break; 2976 2977 default: 2978 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2979 mcst_pkt->tag.vio_subtype); 2980 } 2981 2982 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2983 } 2984 2985 static void 2986 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 2987 { 2988 vio_rdx_msg_t *rdx_pkt; 2989 vsw_t *vswp = ldcp->ldc_vswp; 2990 2991 /* 2992 * We know this is a ctrl/rdx packet so 2993 * cast it into the correct structure. 2994 */ 2995 rdx_pkt = (vio_rdx_msg_t *)pkt; 2996 2997 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2998 2999 switch (rdx_pkt->tag.vio_subtype) { 3000 case VIO_SUBTYPE_INFO: 3001 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3002 3003 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3004 return; 3005 3006 rdx_pkt->tag.vio_sid = ldcp->local_session; 3007 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3008 3009 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3010 3011 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3012 3013 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3014 sizeof (vio_rdx_msg_t), B_TRUE); 3015 3016 vsw_next_milestone(ldcp); 3017 break; 3018 3019 case VIO_SUBTYPE_ACK: 3020 /* 3021 * Should be handled in-band by callback handler. 3022 */ 3023 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3024 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3025 break; 3026 3027 case VIO_SUBTYPE_NACK: 3028 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3029 3030 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3031 return; 3032 3033 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3034 vsw_next_milestone(ldcp); 3035 break; 3036 3037 default: 3038 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3039 rdx_pkt->tag.vio_subtype); 3040 } 3041 3042 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3043 } 3044 3045 static void 3046 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3047 uint32_t msglen) 3048 { 3049 uint16_t env = tagp->vio_subtype_env; 3050 vsw_t *vswp = ldcp->ldc_vswp; 3051 3052 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3053 3054 /* session id check */ 3055 if (ldcp->session_status & VSW_PEER_SESSION) { 3056 if (ldcp->peer_session != tagp->vio_sid) { 3057 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3058 __func__, ldcp->ldc_id, tagp->vio_sid); 3059 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3060 return; 3061 } 3062 } 3063 3064 /* 3065 * It is an error for us to be getting data packets 3066 * before the handshake has completed. 3067 */ 3068 if (ldcp->hphase != VSW_MILESTONE4) { 3069 DERR(vswp, "%s: got data packet before handshake complete " 3070 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3071 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3072 DUMP_FLAGS(ldcp->lane_in.lstate); 3073 DUMP_FLAGS(ldcp->lane_out.lstate); 3074 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3075 return; 3076 } 3077 3078 /* 3079 * To reduce the locking contention, release the 3080 * ldc_cblock here and re-acquire it once we are done 3081 * receiving packets. 3082 */ 3083 mutex_exit(&ldcp->ldc_cblock); 3084 mutex_enter(&ldcp->ldc_rxlock); 3085 3086 /* 3087 * Switch on vio_subtype envelope, then let lower routines 3088 * decide if its an INFO, ACK or NACK packet. 3089 */ 3090 if (env == VIO_DRING_DATA) { 3091 vsw_process_data_dring_pkt(ldcp, dpkt); 3092 } else if (env == VIO_PKT_DATA) { 3093 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3094 } else if (env == VIO_DESC_DATA) { 3095 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3096 } else { 3097 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3098 } 3099 3100 mutex_exit(&ldcp->ldc_rxlock); 3101 mutex_enter(&ldcp->ldc_cblock); 3102 3103 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3104 } 3105 3106 #define SND_DRING_NACK(ldcp, pkt) \ 3107 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3108 pkt->tag.vio_sid = ldcp->local_session; \ 3109 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3110 sizeof (vio_dring_msg_t), B_TRUE); 3111 3112 static void 3113 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3114 { 3115 vio_dring_msg_t *dring_pkt; 3116 vnet_public_desc_t *pub_addr = NULL; 3117 vsw_private_desc_t *priv_addr = NULL; 3118 dring_info_t *dp = NULL; 3119 vsw_t *vswp = ldcp->ldc_vswp; 3120 mblk_t *mp = NULL; 3121 mblk_t *bp = NULL; 3122 mblk_t *bpt = NULL; 3123 size_t nbytes = 0; 3124 uint64_t ncookies = 0; 3125 uint64_t chain = 0; 3126 uint64_t len; 3127 uint32_t pos, start, datalen; 3128 uint32_t range_start, range_end; 3129 int32_t end, num, cnt = 0; 3130 int i, rv, msg_rv = 0; 3131 boolean_t ack_needed = B_FALSE; 3132 boolean_t prev_desc_ack = B_FALSE; 3133 int read_attempts = 0; 3134 struct ether_header *ehp; 3135 3136 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3137 3138 /* 3139 * We know this is a data/dring packet so 3140 * cast it into the correct structure. 3141 */ 3142 dring_pkt = (vio_dring_msg_t *)dpkt; 3143 3144 /* 3145 * Switch on the vio_subtype. If its INFO then we need to 3146 * process the data. If its an ACK we need to make sure 3147 * it makes sense (i.e did we send an earlier data/info), 3148 * and if its a NACK then we maybe attempt a retry. 3149 */ 3150 switch (dring_pkt->tag.vio_subtype) { 3151 case VIO_SUBTYPE_INFO: 3152 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3153 3154 READ_ENTER(&ldcp->lane_in.dlistrw); 3155 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3156 dring_pkt->dring_ident)) == NULL) { 3157 RW_EXIT(&ldcp->lane_in.dlistrw); 3158 3159 DERR(vswp, "%s(%lld): unable to find dring from " 3160 "ident 0x%llx", __func__, ldcp->ldc_id, 3161 dring_pkt->dring_ident); 3162 3163 SND_DRING_NACK(ldcp, dring_pkt); 3164 return; 3165 } 3166 3167 start = pos = dring_pkt->start_idx; 3168 end = dring_pkt->end_idx; 3169 len = dp->num_descriptors; 3170 3171 range_start = range_end = pos; 3172 3173 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3174 __func__, ldcp->ldc_id, start, end); 3175 3176 if (end == -1) { 3177 num = -1; 3178 } else if (end >= 0) { 3179 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3180 3181 /* basic sanity check */ 3182 if (end > len) { 3183 RW_EXIT(&ldcp->lane_in.dlistrw); 3184 DERR(vswp, "%s(%lld): endpoint %lld outside " 3185 "ring length %lld", __func__, 3186 ldcp->ldc_id, end, len); 3187 3188 SND_DRING_NACK(ldcp, dring_pkt); 3189 return; 3190 } 3191 } else { 3192 RW_EXIT(&ldcp->lane_in.dlistrw); 3193 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3194 __func__, ldcp->ldc_id, end); 3195 SND_DRING_NACK(ldcp, dring_pkt); 3196 return; 3197 } 3198 3199 while (cnt != num) { 3200 vsw_recheck_desc: 3201 if ((rv = ldc_mem_dring_acquire(dp->handle, 3202 pos, pos)) != 0) { 3203 RW_EXIT(&ldcp->lane_in.dlistrw); 3204 DERR(vswp, "%s(%lld): unable to acquire " 3205 "descriptor at pos %d: err %d", 3206 __func__, pos, ldcp->ldc_id, rv); 3207 SND_DRING_NACK(ldcp, dring_pkt); 3208 ldcp->ldc_stats.ierrors++; 3209 return; 3210 } 3211 3212 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3213 3214 /* 3215 * When given a bounded range of descriptors 3216 * to process, its an error to hit a descriptor 3217 * which is not ready. In the non-bounded case 3218 * (end_idx == -1) this simply indicates we have 3219 * reached the end of the current active range. 3220 */ 3221 if (pub_addr->hdr.dstate != VIO_DESC_READY) { 3222 /* unbound - no error */ 3223 if (end == -1) { 3224 if (read_attempts == vsw_read_attempts) 3225 break; 3226 3227 delay(drv_usectohz(vsw_desc_delay)); 3228 read_attempts++; 3229 goto vsw_recheck_desc; 3230 } 3231 3232 /* bounded - error - so NACK back */ 3233 RW_EXIT(&ldcp->lane_in.dlistrw); 3234 DERR(vswp, "%s(%lld): descriptor not READY " 3235 "(%d)", __func__, ldcp->ldc_id, 3236 pub_addr->hdr.dstate); 3237 SND_DRING_NACK(ldcp, dring_pkt); 3238 return; 3239 } 3240 3241 DTRACE_PROBE1(read_attempts, int, read_attempts); 3242 3243 range_end = pos; 3244 3245 /* 3246 * If we ACK'd the previous descriptor then now 3247 * record the new range start position for later 3248 * ACK's. 3249 */ 3250 if (prev_desc_ack) { 3251 range_start = pos; 3252 3253 D2(vswp, "%s(%lld): updating range start to be " 3254 "%d", __func__, ldcp->ldc_id, range_start); 3255 3256 prev_desc_ack = B_FALSE; 3257 } 3258 3259 /* 3260 * Data is padded to align on 8 byte boundary, 3261 * datalen is actual data length, i.e. minus that 3262 * padding. 3263 */ 3264 datalen = pub_addr->nbytes; 3265 3266 /* 3267 * Does peer wish us to ACK when we have finished 3268 * with this descriptor ? 3269 */ 3270 if (pub_addr->hdr.ack) 3271 ack_needed = B_TRUE; 3272 3273 D2(vswp, "%s(%lld): processing desc %lld at pos" 3274 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3275 __func__, ldcp->ldc_id, pos, pub_addr, 3276 pub_addr->hdr.dstate, datalen); 3277 3278 /* 3279 * Mark that we are starting to process descriptor. 3280 */ 3281 pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 3282 3283 /* 3284 * Ensure that we ask ldc for an aligned 3285 * number of bytes. 3286 */ 3287 nbytes = (datalen + VNET_IPALIGN + 7) & ~7; 3288 3289 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3290 if (mp == NULL) { 3291 ldcp->ldc_stats.rx_vio_allocb_fail++; 3292 /* 3293 * No free receive buffers available, so 3294 * fallback onto allocb(9F). Make sure that 3295 * we get a data buffer which is a multiple 3296 * of 8 as this is required by ldc_mem_copy. 3297 */ 3298 DTRACE_PROBE(allocb); 3299 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 3300 BPRI_MED)) == NULL) { 3301 DERR(vswp, "%s(%ld): allocb failed", 3302 __func__, ldcp->ldc_id); 3303 pub_addr->hdr.dstate = VIO_DESC_DONE; 3304 (void) ldc_mem_dring_release(dp->handle, 3305 pos, pos); 3306 ldcp->ldc_stats.ierrors++; 3307 ldcp->ldc_stats.rx_allocb_fail++; 3308 break; 3309 } 3310 } 3311 3312 ncookies = pub_addr->ncookies; 3313 rv = ldc_mem_copy(ldcp->ldc_handle, 3314 (caddr_t)mp->b_rptr, 0, &nbytes, 3315 pub_addr->memcookie, ncookies, LDC_COPY_IN); 3316 3317 if (rv != 0) { 3318 DERR(vswp, "%s(%d): unable to copy in data " 3319 "from %d cookies in desc %d (rv %d)", 3320 __func__, ldcp->ldc_id, ncookies, pos, rv); 3321 freemsg(mp); 3322 3323 pub_addr->hdr.dstate = VIO_DESC_DONE; 3324 (void) ldc_mem_dring_release(dp->handle, 3325 pos, pos); 3326 ldcp->ldc_stats.ierrors++; 3327 break; 3328 } else { 3329 D2(vswp, "%s(%d): copied in %ld bytes" 3330 " using %d cookies", __func__, 3331 ldcp->ldc_id, nbytes, ncookies); 3332 } 3333 3334 /* adjust the read pointer to skip over the padding */ 3335 mp->b_rptr += VNET_IPALIGN; 3336 3337 /* point to the actual end of data */ 3338 mp->b_wptr = mp->b_rptr + datalen; 3339 3340 /* update statistics */ 3341 ehp = (struct ether_header *)mp->b_rptr; 3342 if (IS_BROADCAST(ehp)) 3343 ldcp->ldc_stats.brdcstrcv++; 3344 else if (IS_MULTICAST(ehp)) 3345 ldcp->ldc_stats.multircv++; 3346 3347 ldcp->ldc_stats.ipackets++; 3348 ldcp->ldc_stats.rbytes += datalen; 3349 3350 /* 3351 * IPALIGN space can be used for VLAN_TAG 3352 */ 3353 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3354 VSW_VNETPORT, mp); 3355 3356 /* build a chain of received packets */ 3357 if (bp == NULL) { 3358 /* first pkt */ 3359 bp = mp; 3360 bp->b_next = bp->b_prev = NULL; 3361 bpt = bp; 3362 chain = 1; 3363 } else { 3364 mp->b_next = mp->b_prev = NULL; 3365 bpt->b_next = mp; 3366 bpt = mp; 3367 chain++; 3368 } 3369 3370 /* mark we are finished with this descriptor */ 3371 pub_addr->hdr.dstate = VIO_DESC_DONE; 3372 3373 (void) ldc_mem_dring_release(dp->handle, pos, pos); 3374 3375 /* 3376 * Send an ACK back to peer if requested. 3377 */ 3378 if (ack_needed) { 3379 ack_needed = B_FALSE; 3380 3381 dring_pkt->start_idx = range_start; 3382 dring_pkt->end_idx = range_end; 3383 3384 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3385 " requested", __func__, ldcp->ldc_id, 3386 dring_pkt->start_idx, dring_pkt->end_idx); 3387 3388 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3389 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3390 dring_pkt->tag.vio_sid = ldcp->local_session; 3391 3392 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3393 sizeof (vio_dring_msg_t), B_FALSE); 3394 3395 /* 3396 * Check if ACK was successfully sent. If not 3397 * we break and deal with that below. 3398 */ 3399 if (msg_rv != 0) 3400 break; 3401 3402 prev_desc_ack = B_TRUE; 3403 range_start = pos; 3404 } 3405 3406 /* next descriptor */ 3407 pos = (pos + 1) % len; 3408 cnt++; 3409 3410 /* 3411 * Break out of loop here and stop processing to 3412 * allow some other network device (or disk) to 3413 * get access to the cpu. 3414 */ 3415 if (chain > vsw_chain_len) { 3416 D3(vswp, "%s(%lld): switching chain of %d " 3417 "msgs", __func__, ldcp->ldc_id, chain); 3418 break; 3419 } 3420 } 3421 RW_EXIT(&ldcp->lane_in.dlistrw); 3422 3423 /* 3424 * If when we attempted to send the ACK we found that the 3425 * channel had been reset then now handle this. We deal with 3426 * it here as we cannot reset the channel while holding the 3427 * dlistrw lock, and we don't want to acquire/release it 3428 * continuously in the above loop, as a channel reset should 3429 * be a rare event. 3430 */ 3431 if (msg_rv == ECONNRESET) { 3432 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3433 break; 3434 } 3435 3436 /* send the chain of packets to be switched */ 3437 if (bp != NULL) { 3438 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3439 D3(vswp, "%s(%lld): switching chain of %d msgs", 3440 __func__, ldcp->ldc_id, chain); 3441 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3442 ldcp->ldc_port, NULL); 3443 } 3444 3445 DTRACE_PROBE1(msg_cnt, int, cnt); 3446 3447 /* 3448 * We are now finished so ACK back with the state 3449 * set to STOPPING so our peer knows we are finished 3450 */ 3451 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3452 dring_pkt->tag.vio_sid = ldcp->local_session; 3453 3454 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3455 3456 DTRACE_PROBE(stop_process_sent); 3457 3458 /* 3459 * We have not processed any more descriptors beyond 3460 * the last one we ACK'd. 3461 */ 3462 if (prev_desc_ack) 3463 range_start = range_end; 3464 3465 dring_pkt->start_idx = range_start; 3466 dring_pkt->end_idx = range_end; 3467 3468 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3469 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3470 dring_pkt->end_idx); 3471 3472 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3473 sizeof (vio_dring_msg_t), B_TRUE); 3474 break; 3475 3476 case VIO_SUBTYPE_ACK: 3477 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3478 /* 3479 * Verify that the relevant descriptors are all 3480 * marked as DONE 3481 */ 3482 READ_ENTER(&ldcp->lane_out.dlistrw); 3483 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3484 dring_pkt->dring_ident)) == NULL) { 3485 RW_EXIT(&ldcp->lane_out.dlistrw); 3486 DERR(vswp, "%s: unknown ident in ACK", __func__); 3487 return; 3488 } 3489 3490 start = end = 0; 3491 start = dring_pkt->start_idx; 3492 end = dring_pkt->end_idx; 3493 len = dp->num_descriptors; 3494 3495 3496 mutex_enter(&dp->dlock); 3497 dp->last_ack_recv = end; 3498 ldcp->ldc_stats.dring_data_acks++; 3499 mutex_exit(&dp->dlock); 3500 3501 (void) vsw_reclaim_dring(dp, start); 3502 3503 /* 3504 * If our peer is stopping processing descriptors then 3505 * we check to make sure it has processed all the descriptors 3506 * we have updated. If not then we send it a new message 3507 * to prompt it to restart. 3508 */ 3509 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3510 DTRACE_PROBE(stop_process_recv); 3511 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3512 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3513 dring_pkt->end_idx); 3514 3515 /* 3516 * Check next descriptor in public section of ring. 3517 * If its marked as READY then we need to prompt our 3518 * peer to start processing the ring again. 3519 */ 3520 i = (end + 1) % len; 3521 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3522 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3523 3524 /* 3525 * Hold the restart lock across all of this to 3526 * make sure that its not possible for us to 3527 * decide that a msg needs to be sent in the future 3528 * but the sending code having already checked is 3529 * about to exit. 3530 */ 3531 mutex_enter(&dp->restart_lock); 3532 ldcp->ldc_stats.dring_stopped_acks++; 3533 mutex_enter(&priv_addr->dstate_lock); 3534 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3535 3536 mutex_exit(&priv_addr->dstate_lock); 3537 3538 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3539 dring_pkt->tag.vio_sid = ldcp->local_session; 3540 3541 dring_pkt->start_idx = (end + 1) % len; 3542 dring_pkt->end_idx = -1; 3543 3544 D2(vswp, "%s(%lld) : sending restart msg:" 3545 " %d : %d", __func__, ldcp->ldc_id, 3546 dring_pkt->start_idx, dring_pkt->end_idx); 3547 3548 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3549 sizeof (vio_dring_msg_t), B_FALSE); 3550 ldcp->ldc_stats.dring_data_msgs++; 3551 3552 } else { 3553 mutex_exit(&priv_addr->dstate_lock); 3554 dp->restart_reqd = B_TRUE; 3555 } 3556 mutex_exit(&dp->restart_lock); 3557 } 3558 RW_EXIT(&ldcp->lane_out.dlistrw); 3559 3560 /* only do channel reset after dropping dlistrw lock */ 3561 if (msg_rv == ECONNRESET) 3562 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3563 3564 break; 3565 3566 case VIO_SUBTYPE_NACK: 3567 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3568 __func__, ldcp->ldc_id); 3569 /* 3570 * Something is badly wrong if we are getting NACK's 3571 * for our data pkts. So reset the channel. 3572 */ 3573 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3574 3575 break; 3576 3577 default: 3578 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3579 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3580 } 3581 3582 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3583 } 3584 3585 /* 3586 * dummy pkt data handler function for vnet protocol version 1.0 3587 */ 3588 static void 3589 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3590 { 3591 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3592 } 3593 3594 /* 3595 * This function handles raw pkt data messages received over the channel. 3596 * Currently, only priority-eth-type frames are received through this mechanism. 3597 * In this case, the frame(data) is present within the message itself which 3598 * is copied into an mblk before switching it. 3599 */ 3600 static void 3601 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3602 { 3603 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3604 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3605 uint32_t size; 3606 mblk_t *mp; 3607 vsw_t *vswp = ldcp->ldc_vswp; 3608 vgen_stats_t *statsp = &ldcp->ldc_stats; 3609 lane_t *lp = &ldcp->lane_out; 3610 3611 size = msglen - VIO_PKT_DATA_HDRSIZE; 3612 if (size < ETHERMIN || size > lp->mtu) { 3613 (void) atomic_inc_32(&statsp->rx_pri_fail); 3614 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3615 ldcp->ldc_id, size); 3616 return; 3617 } 3618 3619 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3620 if (mp == NULL) { 3621 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3622 if (mp == NULL) { 3623 (void) atomic_inc_32(&statsp->rx_pri_fail); 3624 DWARN(vswp, "%s(%lld) allocb failure, " 3625 "unable to process priority frame\n", __func__, 3626 ldcp->ldc_id); 3627 return; 3628 } 3629 } 3630 3631 /* skip over the extra space for vlan tag */ 3632 mp->b_rptr += VLAN_TAGSZ; 3633 3634 /* copy the frame from the payload of raw data msg into the mblk */ 3635 bcopy(dpkt->data, mp->b_rptr, size); 3636 mp->b_wptr = mp->b_rptr + size; 3637 3638 /* update stats */ 3639 (void) atomic_inc_64(&statsp->rx_pri_packets); 3640 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3641 3642 /* 3643 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3644 */ 3645 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3646 3647 /* switch the frame to destination */ 3648 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3649 } 3650 3651 /* 3652 * Process an in-band descriptor message (most likely from 3653 * OBP). 3654 */ 3655 static void 3656 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3657 { 3658 vnet_ibnd_desc_t *ibnd_desc; 3659 dring_info_t *dp = NULL; 3660 vsw_private_desc_t *priv_addr = NULL; 3661 vsw_t *vswp = ldcp->ldc_vswp; 3662 mblk_t *mp = NULL; 3663 size_t nbytes = 0; 3664 size_t off = 0; 3665 uint64_t idx = 0; 3666 uint32_t num = 1, len, datalen = 0; 3667 uint64_t ncookies = 0; 3668 int i, rv; 3669 int j = 0; 3670 3671 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3672 3673 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3674 3675 switch (ibnd_desc->hdr.tag.vio_subtype) { 3676 case VIO_SUBTYPE_INFO: 3677 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3678 3679 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3680 return; 3681 3682 /* 3683 * Data is padded to align on a 8 byte boundary, 3684 * nbytes is actual data length, i.e. minus that 3685 * padding. 3686 */ 3687 datalen = ibnd_desc->nbytes; 3688 3689 D2(vswp, "%s(%lld): processing inband desc : " 3690 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3691 3692 ncookies = ibnd_desc->ncookies; 3693 3694 /* 3695 * allocb(9F) returns an aligned data block. We 3696 * need to ensure that we ask ldc for an aligned 3697 * number of bytes also. 3698 */ 3699 nbytes = datalen; 3700 if (nbytes & 0x7) { 3701 off = 8 - (nbytes & 0x7); 3702 nbytes += off; 3703 } 3704 3705 /* alloc extra space for VLAN_TAG */ 3706 mp = allocb(datalen + 8, BPRI_MED); 3707 if (mp == NULL) { 3708 DERR(vswp, "%s(%lld): allocb failed", 3709 __func__, ldcp->ldc_id); 3710 ldcp->ldc_stats.rx_allocb_fail++; 3711 return; 3712 } 3713 3714 /* skip over the extra space for VLAN_TAG */ 3715 mp->b_rptr += 8; 3716 3717 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3718 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3719 LDC_COPY_IN); 3720 3721 if (rv != 0) { 3722 DERR(vswp, "%s(%d): unable to copy in data from " 3723 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3724 freemsg(mp); 3725 ldcp->ldc_stats.ierrors++; 3726 return; 3727 } 3728 3729 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3730 __func__, ldcp->ldc_id, nbytes, ncookies); 3731 3732 /* point to the actual end of data */ 3733 mp->b_wptr = mp->b_rptr + datalen; 3734 ldcp->ldc_stats.ipackets++; 3735 ldcp->ldc_stats.rbytes += datalen; 3736 3737 /* 3738 * We ACK back every in-band descriptor message we process 3739 */ 3740 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3741 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3742 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3743 sizeof (vnet_ibnd_desc_t), B_TRUE); 3744 3745 /* 3746 * there is extra space alloc'd for VLAN_TAG 3747 */ 3748 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3749 3750 /* send the packet to be switched */ 3751 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3752 ldcp->ldc_port, NULL); 3753 3754 break; 3755 3756 case VIO_SUBTYPE_ACK: 3757 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3758 3759 /* Verify the ACK is valid */ 3760 idx = ibnd_desc->hdr.desc_handle; 3761 3762 if (idx >= vsw_ntxds) { 3763 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3764 "(idx %ld)", vswp->instance, idx); 3765 return; 3766 } 3767 3768 if ((dp = ldcp->lane_out.dringp) == NULL) { 3769 DERR(vswp, "%s: no dring found", __func__); 3770 return; 3771 } 3772 3773 len = dp->num_descriptors; 3774 /* 3775 * If the descriptor we are being ACK'ed for is not the 3776 * one we expected, then pkts were lost somwhere, either 3777 * when we tried to send a msg, or a previous ACK msg from 3778 * our peer. In either case we now reclaim the descriptors 3779 * in the range from the last ACK we received up to the 3780 * current ACK. 3781 */ 3782 if (idx != dp->last_ack_recv) { 3783 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3784 __func__, dp->last_ack_recv, idx); 3785 num = idx >= dp->last_ack_recv ? 3786 idx - dp->last_ack_recv + 1: 3787 (len - dp->last_ack_recv + 1) + idx; 3788 } 3789 3790 /* 3791 * When we sent the in-band message to our peer we 3792 * marked the copy in our private ring as READY. We now 3793 * check that the descriptor we are being ACK'ed for is in 3794 * fact READY, i.e. it is one we have shared with our peer. 3795 * 3796 * If its not we flag an error, but still reset the descr 3797 * back to FREE. 3798 */ 3799 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3800 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3801 mutex_enter(&priv_addr->dstate_lock); 3802 if (priv_addr->dstate != VIO_DESC_READY) { 3803 DERR(vswp, "%s: (%ld) desc at index %ld not " 3804 "READY (0x%lx)", __func__, 3805 ldcp->ldc_id, idx, priv_addr->dstate); 3806 DERR(vswp, "%s: bound %d: ncookies %ld : " 3807 "datalen %ld", __func__, 3808 priv_addr->bound, priv_addr->ncookies, 3809 priv_addr->datalen); 3810 } 3811 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3812 ldcp->ldc_id, idx); 3813 /* release resources associated with sent msg */ 3814 priv_addr->datalen = 0; 3815 priv_addr->dstate = VIO_DESC_FREE; 3816 mutex_exit(&priv_addr->dstate_lock); 3817 } 3818 /* update to next expected value */ 3819 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3820 3821 break; 3822 3823 case VIO_SUBTYPE_NACK: 3824 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3825 3826 /* 3827 * We should only get a NACK if our peer doesn't like 3828 * something about a message we have sent it. If this 3829 * happens we just release the resources associated with 3830 * the message. (We are relying on higher layers to decide 3831 * whether or not to resend. 3832 */ 3833 3834 /* limit check */ 3835 idx = ibnd_desc->hdr.desc_handle; 3836 3837 if (idx >= vsw_ntxds) { 3838 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3839 __func__, idx); 3840 return; 3841 } 3842 3843 if ((dp = ldcp->lane_out.dringp) == NULL) { 3844 DERR(vswp, "%s: no dring found", __func__); 3845 return; 3846 } 3847 3848 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3849 3850 /* move to correct location in ring */ 3851 priv_addr += idx; 3852 3853 /* release resources associated with sent msg */ 3854 mutex_enter(&priv_addr->dstate_lock); 3855 priv_addr->datalen = 0; 3856 priv_addr->dstate = VIO_DESC_FREE; 3857 mutex_exit(&priv_addr->dstate_lock); 3858 3859 break; 3860 3861 default: 3862 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3863 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3864 } 3865 3866 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3867 } 3868 3869 static void 3870 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3871 { 3872 _NOTE(ARGUNUSED(epkt)) 3873 3874 vsw_t *vswp = ldcp->ldc_vswp; 3875 uint16_t env = tagp->vio_subtype_env; 3876 3877 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3878 3879 /* 3880 * Error vio_subtypes have yet to be defined. So for 3881 * the moment we can't do anything. 3882 */ 3883 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3884 3885 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3886 } 3887 3888 /* transmit the packet over the given port */ 3889 int 3890 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 3891 { 3892 vsw_ldc_list_t *ldcl = &port->p_ldclist; 3893 vsw_ldc_t *ldcp; 3894 int status = 0; 3895 uint32_t n; 3896 3897 READ_ENTER(&ldcl->lockrw); 3898 /* 3899 * Note for now, we have a single channel. 3900 */ 3901 ldcp = ldcl->head; 3902 if (ldcp == NULL) { 3903 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 3904 freemsgchain(mp); 3905 RW_EXIT(&ldcl->lockrw); 3906 return (1); 3907 } 3908 3909 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3910 3911 count -= n; 3912 if (count == 0) { 3913 goto vsw_portsend_exit; 3914 } 3915 3916 status = ldcp->tx(ldcp, mp, mpt, count); 3917 3918 vsw_portsend_exit: 3919 RW_EXIT(&ldcl->lockrw); 3920 3921 return (status); 3922 } 3923 3924 /* 3925 * Break up frames into 2 seperate chains: normal and 3926 * priority, based on the frame type. The number of 3927 * priority frames is also counted and returned. 3928 * 3929 * Params: 3930 * vswp: pointer to the instance of vsw 3931 * np: head of packet chain to be broken 3932 * npt: tail of packet chain to be broken 3933 * 3934 * Returns: 3935 * np: head of normal data packets 3936 * npt: tail of normal data packets 3937 * hp: head of high priority packets 3938 * hpt: tail of high priority packets 3939 */ 3940 static uint32_t 3941 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 3942 mblk_t **hp, mblk_t **hpt) 3943 { 3944 mblk_t *tmp = NULL; 3945 mblk_t *smp = NULL; 3946 mblk_t *hmp = NULL; /* high prio pkts head */ 3947 mblk_t *hmpt = NULL; /* high prio pkts tail */ 3948 mblk_t *nmp = NULL; /* normal pkts head */ 3949 mblk_t *nmpt = NULL; /* normal pkts tail */ 3950 uint32_t count = 0; 3951 int i; 3952 struct ether_header *ehp; 3953 uint32_t num_types; 3954 uint16_t *types; 3955 3956 tmp = *np; 3957 while (tmp != NULL) { 3958 3959 smp = tmp; 3960 tmp = tmp->b_next; 3961 smp->b_next = NULL; 3962 smp->b_prev = NULL; 3963 3964 ehp = (struct ether_header *)smp->b_rptr; 3965 num_types = vswp->pri_num_types; 3966 types = vswp->pri_types; 3967 for (i = 0; i < num_types; i++) { 3968 if (ehp->ether_type == types[i]) { 3969 /* high priority frame */ 3970 3971 if (hmp != NULL) { 3972 hmpt->b_next = smp; 3973 hmpt = smp; 3974 } else { 3975 hmp = hmpt = smp; 3976 } 3977 count++; 3978 break; 3979 } 3980 } 3981 if (i == num_types) { 3982 /* normal data frame */ 3983 3984 if (nmp != NULL) { 3985 nmpt->b_next = smp; 3986 nmpt = smp; 3987 } else { 3988 nmp = nmpt = smp; 3989 } 3990 } 3991 } 3992 3993 *hp = hmp; 3994 *hpt = hmpt; 3995 *np = nmp; 3996 *npt = nmpt; 3997 3998 return (count); 3999 } 4000 4001 /* 4002 * Wrapper function to transmit normal and/or priority frames over the channel. 4003 */ 4004 static int 4005 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4006 { 4007 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4008 mblk_t *tmp; 4009 mblk_t *smp; 4010 mblk_t *hmp; /* high prio pkts head */ 4011 mblk_t *hmpt; /* high prio pkts tail */ 4012 mblk_t *nmp; /* normal pkts head */ 4013 mblk_t *nmpt; /* normal pkts tail */ 4014 uint32_t n = 0; 4015 vsw_t *vswp = ldcp->ldc_vswp; 4016 4017 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4018 ASSERT(count != 0); 4019 4020 nmp = mp; 4021 nmpt = mpt; 4022 4023 /* gather any priority frames from the chain of packets */ 4024 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4025 4026 /* transmit priority frames */ 4027 tmp = hmp; 4028 while (tmp != NULL) { 4029 smp = tmp; 4030 tmp = tmp->b_next; 4031 smp->b_next = NULL; 4032 vsw_ldcsend_pkt(ldcp, smp); 4033 } 4034 4035 count -= n; 4036 4037 if (count == 0) { 4038 /* no normal data frames to process */ 4039 return (0); 4040 } 4041 4042 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4043 } 4044 4045 /* 4046 * Wrapper function to transmit normal frames over the channel. 4047 */ 4048 static int 4049 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4050 { 4051 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4052 mblk_t *tmp = NULL; 4053 4054 ASSERT(count != 0); 4055 /* 4056 * If the TX thread is enabled, then queue the 4057 * ordinary frames and signal the tx thread. 4058 */ 4059 if (ldcp->tx_thread != NULL) { 4060 4061 mutex_enter(&ldcp->tx_thr_lock); 4062 4063 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4064 /* 4065 * If we reached queue limit, 4066 * do not queue new packets, 4067 * drop them. 4068 */ 4069 ldcp->ldc_stats.tx_qfull += count; 4070 mutex_exit(&ldcp->tx_thr_lock); 4071 freemsgchain(mp); 4072 goto exit; 4073 } 4074 if (ldcp->tx_mhead == NULL) { 4075 ldcp->tx_mhead = mp; 4076 ldcp->tx_mtail = mpt; 4077 cv_signal(&ldcp->tx_thr_cv); 4078 } else { 4079 ldcp->tx_mtail->b_next = mp; 4080 ldcp->tx_mtail = mpt; 4081 } 4082 ldcp->tx_cnt += count; 4083 mutex_exit(&ldcp->tx_thr_lock); 4084 } else { 4085 while (mp != NULL) { 4086 tmp = mp->b_next; 4087 mp->b_next = mp->b_prev = NULL; 4088 (void) vsw_ldcsend(ldcp, mp, 1); 4089 mp = tmp; 4090 } 4091 } 4092 4093 exit: 4094 return (0); 4095 } 4096 4097 /* 4098 * This function transmits the frame in the payload of a raw data 4099 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4100 * send special frames with high priorities, without going through 4101 * the normal data path which uses descriptor ring mechanism. 4102 */ 4103 static void 4104 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4105 { 4106 vio_raw_data_msg_t *pkt; 4107 mblk_t *bp; 4108 mblk_t *nmp = NULL; 4109 caddr_t dst; 4110 uint32_t mblksz; 4111 uint32_t size; 4112 uint32_t nbytes; 4113 int rv; 4114 vsw_t *vswp = ldcp->ldc_vswp; 4115 vgen_stats_t *statsp = &ldcp->ldc_stats; 4116 4117 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4118 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4119 (void) atomic_inc_32(&statsp->tx_pri_fail); 4120 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4121 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4122 ldcp->lane_out.lstate); 4123 goto send_pkt_exit; 4124 } 4125 4126 size = msgsize(mp); 4127 4128 /* frame size bigger than available payload len of raw data msg ? */ 4129 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4130 (void) atomic_inc_32(&statsp->tx_pri_fail); 4131 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4132 ldcp->ldc_id, size); 4133 goto send_pkt_exit; 4134 } 4135 4136 if (size < ETHERMIN) 4137 size = ETHERMIN; 4138 4139 /* alloc space for a raw data message */ 4140 nmp = vio_allocb(vswp->pri_tx_vmp); 4141 if (nmp == NULL) { 4142 (void) atomic_inc_32(&statsp->tx_pri_fail); 4143 DWARN(vswp, "vio_allocb failed\n"); 4144 goto send_pkt_exit; 4145 } 4146 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4147 4148 /* copy frame into the payload of raw data message */ 4149 dst = (caddr_t)pkt->data; 4150 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4151 mblksz = MBLKL(bp); 4152 bcopy(bp->b_rptr, dst, mblksz); 4153 dst += mblksz; 4154 } 4155 4156 /* setup the raw data msg */ 4157 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4158 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4159 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4160 pkt->tag.vio_sid = ldcp->local_session; 4161 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4162 4163 /* send the msg over ldc */ 4164 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4165 if (rv != 0) { 4166 (void) atomic_inc_32(&statsp->tx_pri_fail); 4167 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4168 ldcp->ldc_id); 4169 goto send_pkt_exit; 4170 } 4171 4172 /* update stats */ 4173 (void) atomic_inc_64(&statsp->tx_pri_packets); 4174 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4175 4176 send_pkt_exit: 4177 if (nmp != NULL) 4178 freemsg(nmp); 4179 freemsg(mp); 4180 } 4181 4182 /* 4183 * Transmit the packet over the given LDC channel. 4184 * 4185 * The 'retries' argument indicates how many times a packet 4186 * is retried before it is dropped. Note, the retry is done 4187 * only for a resource related failure, for all other failures 4188 * the packet is dropped immediately. 4189 */ 4190 static int 4191 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4192 { 4193 int i; 4194 int rc; 4195 int status = 0; 4196 vsw_port_t *port = ldcp->ldc_port; 4197 dring_info_t *dp = NULL; 4198 4199 4200 for (i = 0; i < retries; ) { 4201 /* 4202 * Send the message out using the appropriate 4203 * transmit function which will free mblock when it 4204 * is finished with it. 4205 */ 4206 mutex_enter(&port->tx_lock); 4207 if (port->transmit != NULL) { 4208 status = (*port->transmit)(ldcp, mp); 4209 } 4210 if (status == LDC_TX_SUCCESS) { 4211 mutex_exit(&port->tx_lock); 4212 break; 4213 } 4214 i++; /* increment the counter here */ 4215 4216 /* If its the last retry, then update the oerror */ 4217 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4218 ldcp->ldc_stats.oerrors++; 4219 } 4220 mutex_exit(&port->tx_lock); 4221 4222 if (status != LDC_TX_NORESOURCES) { 4223 /* 4224 * No retrying required for errors un-related 4225 * to resources. 4226 */ 4227 break; 4228 } 4229 READ_ENTER(&ldcp->lane_out.dlistrw); 4230 if (((dp = ldcp->lane_out.dringp) != NULL) && 4231 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4232 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4233 ((VSW_VER_LT(ldcp, 1, 2) && 4234 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4235 rc = vsw_reclaim_dring(dp, dp->end_idx); 4236 } else { 4237 /* 4238 * If there is no dring or the xfer_mode is 4239 * set to DESC_MODE(ie., OBP), then simply break here. 4240 */ 4241 RW_EXIT(&ldcp->lane_out.dlistrw); 4242 break; 4243 } 4244 RW_EXIT(&ldcp->lane_out.dlistrw); 4245 4246 /* 4247 * Delay only if none were reclaimed 4248 * and its not the last retry. 4249 */ 4250 if ((rc == 0) && (i < retries)) { 4251 delay(drv_usectohz(vsw_ldc_tx_delay)); 4252 } 4253 } 4254 freemsg(mp); 4255 return (status); 4256 } 4257 4258 /* 4259 * Send packet out via descriptor ring to a logical device. 4260 */ 4261 static int 4262 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4263 { 4264 vio_dring_msg_t dring_pkt; 4265 dring_info_t *dp = NULL; 4266 vsw_private_desc_t *priv_desc = NULL; 4267 vnet_public_desc_t *pub = NULL; 4268 vsw_t *vswp = ldcp->ldc_vswp; 4269 mblk_t *bp; 4270 size_t n, size; 4271 caddr_t bufp; 4272 int idx; 4273 int status = LDC_TX_SUCCESS; 4274 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4275 lane_t *lp = &ldcp->lane_out; 4276 4277 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4278 4279 /* TODO: make test a macro */ 4280 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4281 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4282 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4283 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4284 ldcp->lane_out.lstate); 4285 ldcp->ldc_stats.oerrors++; 4286 return (LDC_TX_FAILURE); 4287 } 4288 4289 /* 4290 * Note - using first ring only, this may change 4291 * in the future. 4292 */ 4293 READ_ENTER(&ldcp->lane_out.dlistrw); 4294 if ((dp = ldcp->lane_out.dringp) == NULL) { 4295 RW_EXIT(&ldcp->lane_out.dlistrw); 4296 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4297 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4298 ldcp->ldc_stats.oerrors++; 4299 return (LDC_TX_FAILURE); 4300 } 4301 4302 size = msgsize(mp); 4303 if (size > (size_t)lp->mtu) { 4304 RW_EXIT(&ldcp->lane_out.dlistrw); 4305 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4306 ldcp->ldc_id, size); 4307 ldcp->ldc_stats.oerrors++; 4308 return (LDC_TX_FAILURE); 4309 } 4310 4311 /* 4312 * Find a free descriptor 4313 * 4314 * Note: for the moment we are assuming that we will only 4315 * have one dring going from the switch to each of its 4316 * peers. This may change in the future. 4317 */ 4318 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4319 D2(vswp, "%s(%lld): no descriptor available for ring " 4320 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4321 4322 /* nothing more we can do */ 4323 status = LDC_TX_NORESOURCES; 4324 ldcp->ldc_stats.tx_no_desc++; 4325 goto vsw_dringsend_free_exit; 4326 } else { 4327 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4328 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4329 } 4330 4331 /* copy data into the descriptor */ 4332 bufp = priv_desc->datap; 4333 bufp += VNET_IPALIGN; 4334 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4335 n = MBLKL(bp); 4336 bcopy(bp->b_rptr, bufp, n); 4337 bufp += n; 4338 } 4339 4340 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4341 4342 pub = priv_desc->descp; 4343 pub->nbytes = priv_desc->datalen; 4344 4345 /* update statistics */ 4346 if (IS_BROADCAST(ehp)) 4347 ldcp->ldc_stats.brdcstxmt++; 4348 else if (IS_MULTICAST(ehp)) 4349 ldcp->ldc_stats.multixmt++; 4350 ldcp->ldc_stats.opackets++; 4351 ldcp->ldc_stats.obytes += priv_desc->datalen; 4352 4353 mutex_enter(&priv_desc->dstate_lock); 4354 pub->hdr.dstate = VIO_DESC_READY; 4355 mutex_exit(&priv_desc->dstate_lock); 4356 4357 /* 4358 * Determine whether or not we need to send a message to our 4359 * peer prompting them to read our newly updated descriptor(s). 4360 */ 4361 mutex_enter(&dp->restart_lock); 4362 if (dp->restart_reqd) { 4363 dp->restart_reqd = B_FALSE; 4364 ldcp->ldc_stats.dring_data_msgs++; 4365 mutex_exit(&dp->restart_lock); 4366 4367 /* 4368 * Send a vio_dring_msg to peer to prompt them to read 4369 * the updated descriptor ring. 4370 */ 4371 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4372 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4373 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4374 dring_pkt.tag.vio_sid = ldcp->local_session; 4375 4376 /* Note - for now using first ring */ 4377 dring_pkt.dring_ident = dp->ident; 4378 4379 /* 4380 * If last_ack_recv is -1 then we know we've not 4381 * received any ack's yet, so this must be the first 4382 * msg sent, so set the start to the begining of the ring. 4383 */ 4384 mutex_enter(&dp->dlock); 4385 if (dp->last_ack_recv == -1) { 4386 dring_pkt.start_idx = 0; 4387 } else { 4388 dring_pkt.start_idx = 4389 (dp->last_ack_recv + 1) % dp->num_descriptors; 4390 } 4391 dring_pkt.end_idx = -1; 4392 mutex_exit(&dp->dlock); 4393 4394 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4395 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4396 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4397 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4398 dring_pkt.end_idx); 4399 4400 RW_EXIT(&ldcp->lane_out.dlistrw); 4401 4402 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4403 sizeof (vio_dring_msg_t), B_TRUE); 4404 4405 return (status); 4406 4407 } else { 4408 mutex_exit(&dp->restart_lock); 4409 D2(vswp, "%s(%lld): updating descp %d", __func__, 4410 ldcp->ldc_id, idx); 4411 } 4412 4413 vsw_dringsend_free_exit: 4414 4415 RW_EXIT(&ldcp->lane_out.dlistrw); 4416 4417 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4418 return (status); 4419 } 4420 4421 /* 4422 * Send an in-band descriptor message over ldc. 4423 */ 4424 static int 4425 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4426 { 4427 vsw_t *vswp = ldcp->ldc_vswp; 4428 vnet_ibnd_desc_t ibnd_msg; 4429 vsw_private_desc_t *priv_desc = NULL; 4430 dring_info_t *dp = NULL; 4431 size_t n, size = 0; 4432 caddr_t bufp; 4433 mblk_t *bp; 4434 int idx, i; 4435 int status = LDC_TX_SUCCESS; 4436 static int warn_msg = 1; 4437 lane_t *lp = &ldcp->lane_out; 4438 4439 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4440 4441 ASSERT(mp != NULL); 4442 4443 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4444 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4445 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4446 __func__, ldcp->ldc_id, ldcp->ldc_status, 4447 ldcp->lane_out.lstate); 4448 ldcp->ldc_stats.oerrors++; 4449 return (LDC_TX_FAILURE); 4450 } 4451 4452 /* 4453 * only expect single dring to exist, which we use 4454 * as an internal buffer, rather than a transfer channel. 4455 */ 4456 READ_ENTER(&ldcp->lane_out.dlistrw); 4457 if ((dp = ldcp->lane_out.dringp) == NULL) { 4458 DERR(vswp, "%s(%lld): no dring for outbound lane", 4459 __func__, ldcp->ldc_id); 4460 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4461 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4462 RW_EXIT(&ldcp->lane_out.dlistrw); 4463 ldcp->ldc_stats.oerrors++; 4464 return (LDC_TX_FAILURE); 4465 } 4466 4467 size = msgsize(mp); 4468 if (size > (size_t)lp->mtu) { 4469 RW_EXIT(&ldcp->lane_out.dlistrw); 4470 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4471 ldcp->ldc_id, size); 4472 ldcp->ldc_stats.oerrors++; 4473 return (LDC_TX_FAILURE); 4474 } 4475 4476 /* 4477 * Find a free descriptor in our buffer ring 4478 */ 4479 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4480 RW_EXIT(&ldcp->lane_out.dlistrw); 4481 if (warn_msg) { 4482 DERR(vswp, "%s(%lld): no descriptor available for ring " 4483 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4484 warn_msg = 0; 4485 } 4486 4487 /* nothing more we can do */ 4488 status = LDC_TX_NORESOURCES; 4489 goto vsw_descrsend_free_exit; 4490 } else { 4491 D2(vswp, "%s(%lld): free private descriptor found at pos " 4492 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4493 warn_msg = 1; 4494 } 4495 4496 /* copy data into the descriptor */ 4497 bufp = priv_desc->datap; 4498 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4499 n = MBLKL(bp); 4500 bcopy(bp->b_rptr, bufp, n); 4501 bufp += n; 4502 } 4503 4504 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4505 4506 /* create and send the in-band descp msg */ 4507 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4508 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4509 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4510 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4511 4512 /* 4513 * Copy the mem cookies describing the data from the 4514 * private region of the descriptor ring into the inband 4515 * descriptor. 4516 */ 4517 for (i = 0; i < priv_desc->ncookies; i++) { 4518 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4519 sizeof (ldc_mem_cookie_t)); 4520 } 4521 4522 ibnd_msg.hdr.desc_handle = idx; 4523 ibnd_msg.ncookies = priv_desc->ncookies; 4524 ibnd_msg.nbytes = size; 4525 4526 ldcp->ldc_stats.opackets++; 4527 ldcp->ldc_stats.obytes += size; 4528 4529 RW_EXIT(&ldcp->lane_out.dlistrw); 4530 4531 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4532 sizeof (vnet_ibnd_desc_t), B_TRUE); 4533 4534 vsw_descrsend_free_exit: 4535 4536 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4537 return (status); 4538 } 4539 4540 static void 4541 vsw_send_ver(void *arg) 4542 { 4543 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4544 vsw_t *vswp = ldcp->ldc_vswp; 4545 lane_t *lp = &ldcp->lane_out; 4546 vio_ver_msg_t ver_msg; 4547 4548 D1(vswp, "%s enter", __func__); 4549 4550 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4551 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4552 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4553 ver_msg.tag.vio_sid = ldcp->local_session; 4554 4555 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4556 ver_msg.ver_major = vsw_versions[0].ver_major; 4557 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4558 } else { 4559 /* use the major,minor that we've ack'd */ 4560 lane_t *lpi = &ldcp->lane_in; 4561 ver_msg.ver_major = lpi->ver_major; 4562 ver_msg.ver_minor = lpi->ver_minor; 4563 } 4564 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4565 4566 lp->lstate |= VSW_VER_INFO_SENT; 4567 lp->ver_major = ver_msg.ver_major; 4568 lp->ver_minor = ver_msg.ver_minor; 4569 4570 DUMP_TAG(ver_msg.tag); 4571 4572 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4573 4574 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4575 } 4576 4577 static void 4578 vsw_send_attr(vsw_ldc_t *ldcp) 4579 { 4580 vsw_t *vswp = ldcp->ldc_vswp; 4581 lane_t *lp = &ldcp->lane_out; 4582 vnet_attr_msg_t attr_msg; 4583 4584 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4585 4586 /* 4587 * Subtype is set to INFO by default 4588 */ 4589 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4590 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4591 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4592 attr_msg.tag.vio_sid = ldcp->local_session; 4593 4594 /* payload copied from default settings for lane */ 4595 attr_msg.mtu = lp->mtu; 4596 attr_msg.addr_type = lp->addr_type; 4597 attr_msg.xfer_mode = lp->xfer_mode; 4598 attr_msg.ack_freq = lp->xfer_mode; 4599 4600 READ_ENTER(&vswp->if_lockrw); 4601 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4602 RW_EXIT(&vswp->if_lockrw); 4603 4604 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4605 4606 DUMP_TAG(attr_msg.tag); 4607 4608 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4609 4610 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4611 } 4612 4613 /* 4614 * Create dring info msg (which also results in the creation of 4615 * a dring). 4616 */ 4617 static vio_dring_reg_msg_t * 4618 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4619 { 4620 vio_dring_reg_msg_t *mp; 4621 dring_info_t *dp; 4622 vsw_t *vswp = ldcp->ldc_vswp; 4623 4624 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4625 4626 /* 4627 * If we can't create a dring, obviously no point sending 4628 * a message. 4629 */ 4630 if ((dp = vsw_create_dring(ldcp)) == NULL) 4631 return (NULL); 4632 4633 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4634 4635 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4636 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4637 mp->tag.vio_subtype_env = VIO_DRING_REG; 4638 mp->tag.vio_sid = ldcp->local_session; 4639 4640 /* payload */ 4641 mp->num_descriptors = dp->num_descriptors; 4642 mp->descriptor_size = dp->descriptor_size; 4643 mp->options = dp->options; 4644 mp->ncookies = dp->ncookies; 4645 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4646 4647 mp->dring_ident = 0; 4648 4649 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4650 4651 return (mp); 4652 } 4653 4654 static void 4655 vsw_send_dring_info(vsw_ldc_t *ldcp) 4656 { 4657 vio_dring_reg_msg_t *dring_msg; 4658 vsw_t *vswp = ldcp->ldc_vswp; 4659 4660 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4661 4662 dring_msg = vsw_create_dring_info_pkt(ldcp); 4663 if (dring_msg == NULL) { 4664 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4665 vswp->instance, __func__); 4666 return; 4667 } 4668 4669 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4670 4671 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4672 4673 (void) vsw_send_msg(ldcp, dring_msg, 4674 sizeof (vio_dring_reg_msg_t), B_TRUE); 4675 4676 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4677 4678 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4679 } 4680 4681 static void 4682 vsw_send_rdx(vsw_ldc_t *ldcp) 4683 { 4684 vsw_t *vswp = ldcp->ldc_vswp; 4685 vio_rdx_msg_t rdx_msg; 4686 4687 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4688 4689 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4690 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4691 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4692 rdx_msg.tag.vio_sid = ldcp->local_session; 4693 4694 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4695 4696 DUMP_TAG(rdx_msg.tag); 4697 4698 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4699 4700 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4701 } 4702 4703 /* 4704 * Generic routine to send message out over ldc channel. 4705 * 4706 * It is possible that when we attempt to write over the ldc channel 4707 * that we get notified that it has been reset. Depending on the value 4708 * of the handle_reset flag we either handle that event here or simply 4709 * notify the caller that the channel was reset. 4710 */ 4711 static int 4712 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 4713 { 4714 int rv; 4715 size_t msglen = size; 4716 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 4717 vsw_t *vswp = ldcp->ldc_vswp; 4718 vio_dring_msg_t *dmsg; 4719 vio_raw_data_msg_t *rmsg; 4720 vnet_ibnd_desc_t *imsg; 4721 boolean_t data_msg = B_FALSE; 4722 4723 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 4724 ldcp->ldc_id, size); 4725 4726 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 4727 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 4728 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 4729 4730 mutex_enter(&ldcp->ldc_txlock); 4731 4732 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 4733 if (tag->vio_subtype_env == VIO_DRING_DATA) { 4734 dmsg = (vio_dring_msg_t *)tag; 4735 dmsg->seq_num = ldcp->lane_out.seq_num; 4736 data_msg = B_TRUE; 4737 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 4738 rmsg = (vio_raw_data_msg_t *)tag; 4739 rmsg->seq_num = ldcp->lane_out.seq_num; 4740 data_msg = B_TRUE; 4741 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 4742 imsg = (vnet_ibnd_desc_t *)tag; 4743 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 4744 data_msg = B_TRUE; 4745 } 4746 } 4747 4748 do { 4749 msglen = size; 4750 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 4751 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 4752 4753 if (rv == 0 && data_msg == B_TRUE) { 4754 ldcp->lane_out.seq_num++; 4755 } 4756 4757 if ((rv != 0) || (msglen != size)) { 4758 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 4759 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 4760 ldcp->ldc_stats.oerrors++; 4761 } 4762 4763 mutex_exit(&ldcp->ldc_txlock); 4764 4765 /* 4766 * If channel has been reset we either handle it here or 4767 * simply report back that it has been reset and let caller 4768 * decide what to do. 4769 */ 4770 if (rv == ECONNRESET) { 4771 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 4772 4773 /* 4774 * N.B - must never be holding the dlistrw lock when 4775 * we do a reset of the channel. 4776 */ 4777 if (handle_reset) { 4778 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4779 } 4780 } 4781 4782 return (rv); 4783 } 4784 4785 /* 4786 * Remove the specified address from the list of address maintained 4787 * in this port node. 4788 */ 4789 mcst_addr_t * 4790 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4791 { 4792 vsw_t *vswp = NULL; 4793 vsw_port_t *port = NULL; 4794 mcst_addr_t *prev_p = NULL; 4795 mcst_addr_t *curr_p = NULL; 4796 4797 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4798 __func__, devtype, addr); 4799 4800 if (devtype == VSW_VNETPORT) { 4801 port = (vsw_port_t *)arg; 4802 mutex_enter(&port->mca_lock); 4803 prev_p = curr_p = port->mcap; 4804 } else { 4805 vswp = (vsw_t *)arg; 4806 mutex_enter(&vswp->mca_lock); 4807 prev_p = curr_p = vswp->mcap; 4808 } 4809 4810 while (curr_p != NULL) { 4811 if (curr_p->addr == addr) { 4812 D2(NULL, "%s: address found", __func__); 4813 /* match found */ 4814 if (prev_p == curr_p) { 4815 /* list head */ 4816 if (devtype == VSW_VNETPORT) 4817 port->mcap = curr_p->nextp; 4818 else 4819 vswp->mcap = curr_p->nextp; 4820 } else { 4821 prev_p->nextp = curr_p->nextp; 4822 } 4823 break; 4824 } else { 4825 prev_p = curr_p; 4826 curr_p = curr_p->nextp; 4827 } 4828 } 4829 4830 if (devtype == VSW_VNETPORT) 4831 mutex_exit(&port->mca_lock); 4832 else 4833 mutex_exit(&vswp->mca_lock); 4834 4835 D1(NULL, "%s: exit", __func__); 4836 4837 return (curr_p); 4838 } 4839 4840 /* 4841 * Creates a descriptor ring (dring) and links it into the 4842 * link of outbound drings for this channel. 4843 * 4844 * Returns NULL if creation failed. 4845 */ 4846 static dring_info_t * 4847 vsw_create_dring(vsw_ldc_t *ldcp) 4848 { 4849 vsw_private_desc_t *priv_addr = NULL; 4850 vsw_t *vswp = ldcp->ldc_vswp; 4851 ldc_mem_info_t minfo; 4852 dring_info_t *dp, *tp; 4853 int i; 4854 4855 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4856 4857 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4858 4859 /* create public section of ring */ 4860 if ((ldc_mem_dring_create(vsw_ntxds, 4861 VSW_PUB_SIZE, &dp->handle)) != 0) { 4862 4863 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 4864 "failed", ldcp->ldc_id); 4865 goto create_fail_exit; 4866 } 4867 4868 ASSERT(dp->handle != NULL); 4869 4870 /* 4871 * Get the base address of the public section of the ring. 4872 */ 4873 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 4874 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 4875 ldcp->ldc_id); 4876 goto dring_fail_exit; 4877 } else { 4878 ASSERT(minfo.vaddr != 0); 4879 dp->pub_addr = minfo.vaddr; 4880 } 4881 4882 dp->num_descriptors = vsw_ntxds; 4883 dp->descriptor_size = VSW_PUB_SIZE; 4884 dp->options = VIO_TX_DRING; 4885 dp->ncookies = 1; /* guaranteed by ldc */ 4886 4887 /* 4888 * create private portion of ring 4889 */ 4890 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 4891 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4892 4893 if (vsw_setup_ring(ldcp, dp)) { 4894 DERR(vswp, "%s: unable to setup ring", __func__); 4895 goto dring_fail_exit; 4896 } 4897 4898 /* haven't used any descriptors yet */ 4899 dp->end_idx = 0; 4900 dp->last_ack_recv = -1; 4901 4902 /* bind dring to the channel */ 4903 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 4904 LDC_SHADOW_MAP, LDC_MEM_RW, 4905 &dp->cookie[0], &dp->ncookies)) != 0) { 4906 DERR(vswp, "vsw_create_dring: unable to bind to channel " 4907 "%lld", ldcp->ldc_id); 4908 goto dring_fail_exit; 4909 } 4910 4911 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4912 dp->restart_reqd = B_TRUE; 4913 4914 /* 4915 * Only ever create rings for outgoing lane. Link it onto 4916 * end of list. 4917 */ 4918 WRITE_ENTER(&ldcp->lane_out.dlistrw); 4919 if (ldcp->lane_out.dringp == NULL) { 4920 D2(vswp, "vsw_create_dring: adding first outbound ring"); 4921 ldcp->lane_out.dringp = dp; 4922 } else { 4923 tp = ldcp->lane_out.dringp; 4924 while (tp->next != NULL) 4925 tp = tp->next; 4926 4927 tp->next = dp; 4928 } 4929 RW_EXIT(&ldcp->lane_out.dlistrw); 4930 4931 return (dp); 4932 4933 dring_fail_exit: 4934 (void) ldc_mem_dring_destroy(dp->handle); 4935 4936 create_fail_exit: 4937 if (dp->priv_addr != NULL) { 4938 priv_addr = dp->priv_addr; 4939 for (i = 0; i < vsw_ntxds; i++) { 4940 if (priv_addr->memhandle != NULL) 4941 (void) ldc_mem_free_handle( 4942 priv_addr->memhandle); 4943 priv_addr++; 4944 } 4945 kmem_free(dp->priv_addr, 4946 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 4947 } 4948 mutex_destroy(&dp->dlock); 4949 4950 kmem_free(dp, sizeof (dring_info_t)); 4951 return (NULL); 4952 } 4953 4954 /* 4955 * Create a ring consisting of just a private portion and link 4956 * it into the list of rings for the outbound lane. 4957 * 4958 * These type of rings are used primarily for temporary data 4959 * storage (i.e. as data buffers). 4960 */ 4961 void 4962 vsw_create_privring(vsw_ldc_t *ldcp) 4963 { 4964 dring_info_t *dp, *tp; 4965 vsw_t *vswp = ldcp->ldc_vswp; 4966 4967 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4968 4969 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4970 4971 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4972 4973 /* no public section */ 4974 dp->pub_addr = NULL; 4975 4976 dp->priv_addr = kmem_zalloc( 4977 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 4978 4979 dp->num_descriptors = vsw_ntxds; 4980 4981 if (vsw_setup_ring(ldcp, dp)) { 4982 DERR(vswp, "%s: setup of ring failed", __func__); 4983 kmem_free(dp->priv_addr, 4984 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 4985 mutex_destroy(&dp->dlock); 4986 kmem_free(dp, sizeof (dring_info_t)); 4987 return; 4988 } 4989 4990 /* haven't used any descriptors yet */ 4991 dp->end_idx = 0; 4992 4993 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4994 dp->restart_reqd = B_TRUE; 4995 4996 /* 4997 * Only ever create rings for outgoing lane. Link it onto 4998 * end of list. 4999 */ 5000 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5001 if (ldcp->lane_out.dringp == NULL) { 5002 D2(vswp, "%s: adding first outbound privring", __func__); 5003 ldcp->lane_out.dringp = dp; 5004 } else { 5005 tp = ldcp->lane_out.dringp; 5006 while (tp->next != NULL) 5007 tp = tp->next; 5008 5009 tp->next = dp; 5010 } 5011 RW_EXIT(&ldcp->lane_out.dlistrw); 5012 5013 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5014 } 5015 5016 /* 5017 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5018 * failure. 5019 */ 5020 int 5021 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5022 { 5023 vnet_public_desc_t *pub_addr = NULL; 5024 vsw_private_desc_t *priv_addr = NULL; 5025 vsw_t *vswp = ldcp->ldc_vswp; 5026 uint64_t *tmpp; 5027 uint64_t offset = 0; 5028 uint32_t ncookies = 0; 5029 static char *name = "vsw_setup_ring"; 5030 int i, j, nc, rv; 5031 size_t data_sz; 5032 5033 priv_addr = dp->priv_addr; 5034 pub_addr = dp->pub_addr; 5035 5036 /* public section may be null but private should never be */ 5037 ASSERT(priv_addr != NULL); 5038 5039 /* 5040 * Allocate the region of memory which will be used to hold 5041 * the data the descriptors will refer to. 5042 */ 5043 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5044 data_sz = VNET_ROUNDUP_2K(data_sz); 5045 dp->desc_data_sz = data_sz; 5046 dp->data_sz = vsw_ntxds * data_sz; 5047 dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5048 5049 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5050 dp->data_sz, dp->data_addr); 5051 5052 tmpp = (uint64_t *)dp->data_addr; 5053 offset = dp->desc_data_sz/sizeof (tmpp); 5054 5055 /* 5056 * Initialise some of the private and public (if they exist) 5057 * descriptor fields. 5058 */ 5059 for (i = 0; i < vsw_ntxds; i++) { 5060 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5061 5062 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5063 &priv_addr->memhandle)) != 0) { 5064 DERR(vswp, "%s: alloc mem handle failed", name); 5065 goto setup_ring_cleanup; 5066 } 5067 5068 priv_addr->datap = (void *)tmpp; 5069 5070 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5071 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5072 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5073 &(priv_addr->memcookie[0]), &ncookies); 5074 if (rv != 0) { 5075 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5076 "(rv %d)", name, ldcp->ldc_id, rv); 5077 goto setup_ring_cleanup; 5078 } 5079 priv_addr->bound = 1; 5080 5081 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5082 name, i, priv_addr->memcookie[0].addr, 5083 priv_addr->memcookie[0].size); 5084 5085 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5086 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5087 "invalid num of cookies (%d) for size 0x%llx", 5088 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5089 5090 goto setup_ring_cleanup; 5091 } else { 5092 for (j = 1; j < ncookies; j++) { 5093 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5094 &(priv_addr->memcookie[j])); 5095 if (rv != 0) { 5096 DERR(vswp, "%s: ldc_mem_nextcookie " 5097 "failed rv (%d)", name, rv); 5098 goto setup_ring_cleanup; 5099 } 5100 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5101 "size 0x%llx", name, j, 5102 priv_addr->memcookie[j].addr, 5103 priv_addr->memcookie[j].size); 5104 } 5105 5106 } 5107 priv_addr->ncookies = ncookies; 5108 priv_addr->dstate = VIO_DESC_FREE; 5109 5110 if (pub_addr != NULL) { 5111 5112 /* link pub and private sides */ 5113 priv_addr->descp = pub_addr; 5114 5115 pub_addr->ncookies = priv_addr->ncookies; 5116 5117 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5118 bcopy(&priv_addr->memcookie[nc], 5119 &pub_addr->memcookie[nc], 5120 sizeof (ldc_mem_cookie_t)); 5121 } 5122 5123 pub_addr->hdr.dstate = VIO_DESC_FREE; 5124 pub_addr++; 5125 } 5126 5127 /* 5128 * move to next element in the dring and the next 5129 * position in the data buffer. 5130 */ 5131 priv_addr++; 5132 tmpp += offset; 5133 } 5134 5135 return (0); 5136 5137 setup_ring_cleanup: 5138 priv_addr = dp->priv_addr; 5139 5140 for (j = 0; j < i; j++) { 5141 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5142 (void) ldc_mem_free_handle(priv_addr->memhandle); 5143 5144 mutex_destroy(&priv_addr->dstate_lock); 5145 5146 priv_addr++; 5147 } 5148 kmem_free(dp->data_addr, dp->data_sz); 5149 5150 return (1); 5151 } 5152 5153 /* 5154 * Searches the private section of a ring for a free descriptor, 5155 * starting at the location of the last free descriptor found 5156 * previously. 5157 * 5158 * Returns 0 if free descriptor is available, and updates state 5159 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5160 * 5161 * FUTURE: might need to return contiguous range of descriptors 5162 * as dring info msg assumes all will be contiguous. 5163 */ 5164 static int 5165 vsw_dring_find_free_desc(dring_info_t *dringp, 5166 vsw_private_desc_t **priv_p, int *idx) 5167 { 5168 vsw_private_desc_t *addr = NULL; 5169 int num = vsw_ntxds; 5170 int ret = 1; 5171 5172 D1(NULL, "%s enter\n", __func__); 5173 5174 ASSERT(dringp->priv_addr != NULL); 5175 5176 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5177 __func__, dringp, dringp->end_idx); 5178 5179 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5180 5181 mutex_enter(&addr->dstate_lock); 5182 if (addr->dstate == VIO_DESC_FREE) { 5183 addr->dstate = VIO_DESC_READY; 5184 *priv_p = addr; 5185 *idx = dringp->end_idx; 5186 dringp->end_idx = (dringp->end_idx + 1) % num; 5187 ret = 0; 5188 5189 } 5190 mutex_exit(&addr->dstate_lock); 5191 5192 /* ring full */ 5193 if (ret == 1) { 5194 D2(NULL, "%s: no desp free: started at %d", __func__, 5195 dringp->end_idx); 5196 } 5197 5198 D1(NULL, "%s: exit\n", __func__); 5199 5200 return (ret); 5201 } 5202 5203 /* 5204 * Map from a dring identifier to the ring itself. Returns 5205 * pointer to ring or NULL if no match found. 5206 * 5207 * Should be called with dlistrw rwlock held as reader. 5208 */ 5209 static dring_info_t * 5210 vsw_ident2dring(lane_t *lane, uint64_t ident) 5211 { 5212 dring_info_t *dp = NULL; 5213 5214 if ((dp = lane->dringp) == NULL) { 5215 return (NULL); 5216 } else { 5217 if (dp->ident == ident) 5218 return (dp); 5219 5220 while (dp != NULL) { 5221 if (dp->ident == ident) 5222 break; 5223 dp = dp->next; 5224 } 5225 } 5226 5227 return (dp); 5228 } 5229 5230 /* 5231 * Set the default lane attributes. These are copied into 5232 * the attr msg we send to our peer. If they are not acceptable 5233 * then (currently) the handshake ends. 5234 */ 5235 static void 5236 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5237 { 5238 bzero(lp, sizeof (lane_t)); 5239 5240 READ_ENTER(&vswp->if_lockrw); 5241 ether_copy(&(vswp->if_addr), &(lp->addr)); 5242 RW_EXIT(&vswp->if_lockrw); 5243 5244 lp->mtu = vswp->max_frame_size; 5245 lp->addr_type = ADDR_TYPE_MAC; 5246 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5247 lp->ack_freq = 0; /* for shared mode */ 5248 lp->seq_num = VNET_ISS; 5249 } 5250 5251 /* 5252 * Verify that the attributes are acceptable. 5253 * 5254 * FUTURE: If some attributes are not acceptable, change them 5255 * our desired values. 5256 */ 5257 static int 5258 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5259 { 5260 int ret = 0; 5261 struct ether_addr ea; 5262 vsw_port_t *port = ldcp->ldc_port; 5263 lane_t *lp = &ldcp->lane_out; 5264 5265 D1(NULL, "vsw_check_attr enter\n"); 5266 5267 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5268 (pkt->xfer_mode != lp->xfer_mode)) { 5269 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5270 ret = 1; 5271 } 5272 5273 /* Only support MAC addresses at moment. */ 5274 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5275 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5276 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5277 ret = 1; 5278 } 5279 5280 /* 5281 * MAC address supplied by device should match that stored 5282 * in the vsw-port OBP node. Need to decide what to do if they 5283 * don't match, for the moment just warn but don't fail. 5284 */ 5285 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5286 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5287 DERR(NULL, "vsw_check_attr: device supplied address " 5288 "0x%llx doesn't match node address 0x%llx\n", 5289 pkt->addr, port->p_macaddr); 5290 } 5291 5292 /* 5293 * Ack freq only makes sense in pkt mode, in shared 5294 * mode the ring descriptors say whether or not to 5295 * send back an ACK. 5296 */ 5297 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5298 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5299 (VSW_VER_LT(ldcp, 1, 2) && 5300 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5301 if (pkt->ack_freq > 0) { 5302 D2(NULL, "vsw_check_attr: non zero ack freq " 5303 " in SHM mode\n"); 5304 ret = 1; 5305 } 5306 } 5307 5308 /* 5309 * Note: for the moment we only support ETHER 5310 * frames. This may change in the future. 5311 */ 5312 if ((pkt->mtu > lp->mtu) || (pkt->mtu <= 0)) { 5313 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5314 pkt->mtu); 5315 ret = 1; 5316 } 5317 5318 D1(NULL, "vsw_check_attr exit\n"); 5319 5320 return (ret); 5321 } 5322 5323 /* 5324 * Returns 1 if there is a problem, 0 otherwise. 5325 */ 5326 static int 5327 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5328 { 5329 _NOTE(ARGUNUSED(pkt)) 5330 5331 int ret = 0; 5332 5333 D1(NULL, "vsw_check_dring_info enter\n"); 5334 5335 if ((pkt->num_descriptors == 0) || 5336 (pkt->descriptor_size == 0) || 5337 (pkt->ncookies != 1)) { 5338 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5339 ret = 1; 5340 } 5341 5342 D1(NULL, "vsw_check_dring_info exit\n"); 5343 5344 return (ret); 5345 } 5346 5347 /* 5348 * Returns 1 if two memory cookies match. Otherwise returns 0. 5349 */ 5350 static int 5351 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5352 { 5353 if ((m1->addr != m2->addr) || 5354 (m2->size != m2->size)) { 5355 return (0); 5356 } else { 5357 return (1); 5358 } 5359 } 5360 5361 /* 5362 * Returns 1 if ring described in reg message matches that 5363 * described by dring_info structure. Otherwise returns 0. 5364 */ 5365 static int 5366 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5367 { 5368 if ((msg->descriptor_size != dp->descriptor_size) || 5369 (msg->num_descriptors != dp->num_descriptors) || 5370 (msg->ncookies != dp->ncookies) || 5371 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5372 return (0); 5373 } else { 5374 return (1); 5375 } 5376 5377 } 5378 5379 static caddr_t 5380 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5381 { 5382 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5383 a[0], a[1], a[2], a[3], a[4], a[5]); 5384 return (ebuf); 5385 } 5386 5387 /* 5388 * Reset and free all the resources associated with 5389 * the channel. 5390 */ 5391 static void 5392 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5393 { 5394 dring_info_t *dp, *dpp; 5395 lane_t *lp = NULL; 5396 int rv = 0; 5397 5398 ASSERT(ldcp != NULL); 5399 5400 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5401 5402 if (dir == INBOUND) { 5403 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5404 " of channel %lld", __func__, ldcp->ldc_id); 5405 lp = &ldcp->lane_in; 5406 } else { 5407 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5408 " of channel %lld", __func__, ldcp->ldc_id); 5409 lp = &ldcp->lane_out; 5410 } 5411 5412 lp->lstate = VSW_LANE_INACTIV; 5413 lp->seq_num = VNET_ISS; 5414 5415 if (lp->dringp) { 5416 if (dir == INBOUND) { 5417 WRITE_ENTER(&lp->dlistrw); 5418 dp = lp->dringp; 5419 while (dp != NULL) { 5420 dpp = dp->next; 5421 if (dp->handle != NULL) 5422 (void) ldc_mem_dring_unmap(dp->handle); 5423 kmem_free(dp, sizeof (dring_info_t)); 5424 dp = dpp; 5425 } 5426 RW_EXIT(&lp->dlistrw); 5427 } else { 5428 /* 5429 * unbind, destroy exported dring, free dring struct 5430 */ 5431 WRITE_ENTER(&lp->dlistrw); 5432 dp = lp->dringp; 5433 rv = vsw_free_ring(dp); 5434 RW_EXIT(&lp->dlistrw); 5435 } 5436 if (rv == 0) { 5437 lp->dringp = NULL; 5438 } 5439 } 5440 5441 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5442 } 5443 5444 /* 5445 * Free ring and all associated resources. 5446 * 5447 * Should be called with dlistrw rwlock held as writer. 5448 */ 5449 static int 5450 vsw_free_ring(dring_info_t *dp) 5451 { 5452 vsw_private_desc_t *paddr = NULL; 5453 dring_info_t *dpp; 5454 int i, rv = 1; 5455 5456 while (dp != NULL) { 5457 mutex_enter(&dp->dlock); 5458 dpp = dp->next; 5459 if (dp->priv_addr != NULL) { 5460 /* 5461 * First unbind and free the memory handles 5462 * stored in each descriptor within the ring. 5463 */ 5464 for (i = 0; i < vsw_ntxds; i++) { 5465 paddr = (vsw_private_desc_t *) 5466 dp->priv_addr + i; 5467 if (paddr->memhandle != NULL) { 5468 if (paddr->bound == 1) { 5469 rv = ldc_mem_unbind_handle( 5470 paddr->memhandle); 5471 5472 if (rv != 0) { 5473 DERR(NULL, "error " 5474 "unbinding handle for " 5475 "ring 0x%llx at pos %d", 5476 dp, i); 5477 mutex_exit(&dp->dlock); 5478 return (rv); 5479 } 5480 paddr->bound = 0; 5481 } 5482 5483 rv = ldc_mem_free_handle( 5484 paddr->memhandle); 5485 if (rv != 0) { 5486 DERR(NULL, "error freeing " 5487 "handle for ring 0x%llx " 5488 "at pos %d", dp, i); 5489 mutex_exit(&dp->dlock); 5490 return (rv); 5491 } 5492 paddr->memhandle = NULL; 5493 } 5494 mutex_destroy(&paddr->dstate_lock); 5495 } 5496 kmem_free(dp->priv_addr, 5497 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5498 } 5499 5500 /* 5501 * Now unbind and destroy the ring itself. 5502 */ 5503 if (dp->handle != NULL) { 5504 (void) ldc_mem_dring_unbind(dp->handle); 5505 (void) ldc_mem_dring_destroy(dp->handle); 5506 } 5507 5508 if (dp->data_addr != NULL) { 5509 kmem_free(dp->data_addr, dp->data_sz); 5510 } 5511 5512 mutex_exit(&dp->dlock); 5513 mutex_destroy(&dp->dlock); 5514 mutex_destroy(&dp->restart_lock); 5515 kmem_free(dp, sizeof (dring_info_t)); 5516 5517 dp = dpp; 5518 } 5519 return (0); 5520 } 5521 5522 /* 5523 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5524 * This thread is woken up by the LDC interrupt handler to process 5525 * LDC packets and receive data. 5526 */ 5527 static void 5528 vsw_ldc_rx_worker(void *arg) 5529 { 5530 callb_cpr_t cprinfo; 5531 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5532 vsw_t *vswp = ldcp->ldc_vswp; 5533 5534 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5535 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5536 "vsw_rx_thread"); 5537 mutex_enter(&ldcp->rx_thr_lock); 5538 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5539 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5540 5541 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5542 /* 5543 * Wait until the data is received or a stop 5544 * request is received. 5545 */ 5546 while (!(ldcp->rx_thr_flags & 5547 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5548 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5549 } 5550 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5551 5552 /* 5553 * First process the stop request. 5554 */ 5555 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5556 D2(vswp, "%s(%lld):Rx thread stopped\n", 5557 __func__, ldcp->ldc_id); 5558 break; 5559 } 5560 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5561 mutex_exit(&ldcp->rx_thr_lock); 5562 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5563 __func__, ldcp->ldc_id); 5564 mutex_enter(&ldcp->ldc_cblock); 5565 vsw_process_pkt(ldcp); 5566 mutex_exit(&ldcp->ldc_cblock); 5567 mutex_enter(&ldcp->rx_thr_lock); 5568 } 5569 5570 /* 5571 * Update the run status and wakeup the thread that 5572 * has sent the stop request. 5573 */ 5574 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5575 cv_signal(&ldcp->rx_thr_cv); 5576 CALLB_CPR_EXIT(&cprinfo); 5577 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5578 thread_exit(); 5579 } 5580 5581 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5582 static void 5583 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5584 { 5585 vsw_t *vswp = ldcp->ldc_vswp; 5586 5587 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5588 /* 5589 * Send a stop request by setting the stop flag and 5590 * wait until the receive thread stops. 5591 */ 5592 mutex_enter(&ldcp->rx_thr_lock); 5593 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5594 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5595 cv_signal(&ldcp->rx_thr_cv); 5596 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5597 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5598 } 5599 } 5600 mutex_exit(&ldcp->rx_thr_lock); 5601 ldcp->rx_thread = NULL; 5602 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5603 } 5604 5605 /* 5606 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5607 * This thread is woken up by the vsw_portsend to transmit 5608 * packets. 5609 */ 5610 static void 5611 vsw_ldc_tx_worker(void *arg) 5612 { 5613 callb_cpr_t cprinfo; 5614 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5615 vsw_t *vswp = ldcp->ldc_vswp; 5616 mblk_t *mp; 5617 mblk_t *tmp; 5618 5619 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5620 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5621 "vnet_tx_thread"); 5622 mutex_enter(&ldcp->tx_thr_lock); 5623 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5624 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5625 5626 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5627 /* 5628 * Wait until the data is received or a stop 5629 * request is received. 5630 */ 5631 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5632 (ldcp->tx_mhead == NULL)) { 5633 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5634 } 5635 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5636 5637 /* 5638 * First process the stop request. 5639 */ 5640 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5641 D2(vswp, "%s(%lld):tx thread stopped\n", 5642 __func__, ldcp->ldc_id); 5643 break; 5644 } 5645 mp = ldcp->tx_mhead; 5646 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5647 ldcp->tx_cnt = 0; 5648 mutex_exit(&ldcp->tx_thr_lock); 5649 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5650 __func__, ldcp->ldc_id); 5651 while (mp != NULL) { 5652 tmp = mp->b_next; 5653 mp->b_next = mp->b_prev = NULL; 5654 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5655 mp = tmp; 5656 } 5657 mutex_enter(&ldcp->tx_thr_lock); 5658 } 5659 5660 /* 5661 * Update the run status and wakeup the thread that 5662 * has sent the stop request. 5663 */ 5664 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5665 cv_signal(&ldcp->tx_thr_cv); 5666 CALLB_CPR_EXIT(&cprinfo); 5667 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5668 thread_exit(); 5669 } 5670 5671 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5672 static void 5673 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5674 { 5675 vsw_t *vswp = ldcp->ldc_vswp; 5676 5677 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5678 /* 5679 * Send a stop request by setting the stop flag and 5680 * wait until the receive thread stops. 5681 */ 5682 mutex_enter(&ldcp->tx_thr_lock); 5683 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5684 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5685 cv_signal(&ldcp->tx_thr_cv); 5686 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 5687 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5688 } 5689 } 5690 mutex_exit(&ldcp->tx_thr_lock); 5691 ldcp->tx_thread = NULL; 5692 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5693 } 5694 5695 /* vsw_reclaim_dring -- reclaim descriptors */ 5696 static int 5697 vsw_reclaim_dring(dring_info_t *dp, int start) 5698 { 5699 int i, j, len; 5700 vsw_private_desc_t *priv_addr; 5701 vnet_public_desc_t *pub_addr; 5702 5703 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 5704 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 5705 len = dp->num_descriptors; 5706 5707 D2(NULL, "%s: start index %ld\n", __func__, start); 5708 5709 j = 0; 5710 for (i = start; j < len; i = (i + 1) % len, j++) { 5711 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 5712 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 5713 5714 mutex_enter(&priv_addr->dstate_lock); 5715 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 5716 mutex_exit(&priv_addr->dstate_lock); 5717 break; 5718 } 5719 pub_addr->hdr.dstate = VIO_DESC_FREE; 5720 priv_addr->dstate = VIO_DESC_FREE; 5721 /* clear all the fields */ 5722 priv_addr->datalen = 0; 5723 pub_addr->hdr.ack = 0; 5724 mutex_exit(&priv_addr->dstate_lock); 5725 5726 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 5727 i, pub_addr->hdr.dstate, priv_addr->dstate); 5728 } 5729 return (j); 5730 } 5731 5732 /* 5733 * Debugging routines 5734 */ 5735 static void 5736 display_state(void) 5737 { 5738 vsw_t *vswp; 5739 vsw_port_list_t *plist; 5740 vsw_port_t *port; 5741 vsw_ldc_list_t *ldcl; 5742 vsw_ldc_t *ldcp; 5743 extern vsw_t *vsw_head; 5744 5745 cmn_err(CE_NOTE, "***** system state *****"); 5746 5747 for (vswp = vsw_head; vswp; vswp = vswp->next) { 5748 plist = &vswp->plist; 5749 READ_ENTER(&plist->lockrw); 5750 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 5751 vswp->instance, plist->num_ports); 5752 5753 for (port = plist->head; port != NULL; port = port->p_next) { 5754 ldcl = &port->p_ldclist; 5755 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 5756 port->p_instance, port->num_ldcs); 5757 READ_ENTER(&ldcl->lockrw); 5758 ldcp = ldcl->head; 5759 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 5760 cmn_err(CE_CONT, "chan %lu : dev %d : " 5761 "status %d : phase %u\n", 5762 ldcp->ldc_id, ldcp->dev_class, 5763 ldcp->ldc_status, ldcp->hphase); 5764 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 5765 "psession %lu\n", ldcp->ldc_id, 5766 ldcp->local_session, ldcp->peer_session); 5767 5768 cmn_err(CE_CONT, "Inbound lane:\n"); 5769 display_lane(&ldcp->lane_in); 5770 cmn_err(CE_CONT, "Outbound lane:\n"); 5771 display_lane(&ldcp->lane_out); 5772 } 5773 RW_EXIT(&ldcl->lockrw); 5774 } 5775 RW_EXIT(&plist->lockrw); 5776 } 5777 cmn_err(CE_NOTE, "***** system state *****"); 5778 } 5779 5780 static void 5781 display_lane(lane_t *lp) 5782 { 5783 dring_info_t *drp; 5784 5785 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 5786 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 5787 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 5788 lp->addr_type, lp->addr, lp->xfer_mode); 5789 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 5790 5791 cmn_err(CE_CONT, "Dring info:\n"); 5792 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 5793 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 5794 drp->num_descriptors, drp->descriptor_size); 5795 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 5796 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 5797 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 5798 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 5799 drp->ident, drp->end_idx); 5800 display_ring(drp); 5801 } 5802 } 5803 5804 static void 5805 display_ring(dring_info_t *dringp) 5806 { 5807 uint64_t i; 5808 uint64_t priv_count = 0; 5809 uint64_t pub_count = 0; 5810 vnet_public_desc_t *pub_addr = NULL; 5811 vsw_private_desc_t *priv_addr = NULL; 5812 5813 for (i = 0; i < vsw_ntxds; i++) { 5814 if (dringp->pub_addr != NULL) { 5815 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 5816 5817 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 5818 pub_count++; 5819 } 5820 5821 if (dringp->priv_addr != NULL) { 5822 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 5823 5824 if (priv_addr->dstate == VIO_DESC_FREE) 5825 priv_count++; 5826 } 5827 } 5828 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 5829 i, priv_count, pub_count); 5830 } 5831 5832 static void 5833 dump_flags(uint64_t state) 5834 { 5835 int i; 5836 5837 typedef struct flag_name { 5838 int flag_val; 5839 char *flag_name; 5840 } flag_name_t; 5841 5842 flag_name_t flags[] = { 5843 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 5844 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 5845 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 5846 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 5847 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 5848 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 5849 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 5850 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 5851 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 5852 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 5853 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 5854 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 5855 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 5856 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 5857 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 5858 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 5859 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 5860 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 5861 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 5862 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 5863 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 5864 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 5865 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 5866 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 5867 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 5868 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 5869 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 5870 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 5871 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 5872 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 5873 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 5874 5875 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 5876 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 5877 if (state & flags[i].flag_val) 5878 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 5879 } 5880 } 5881