1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* Port add/deletion/etc routines */ 75 static void vsw_port_delete(vsw_port_t *port); 76 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 77 static void vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 78 static int vsw_init_ldcs(vsw_port_t *port); 79 static void vsw_uninit_ldcs(vsw_port_t *port); 80 static int vsw_ldc_init(vsw_ldc_t *ldcp); 81 static void vsw_ldc_uninit(vsw_ldc_t *ldcp); 82 static void vsw_drain_ldcs(vsw_port_t *port); 83 static void vsw_drain_port_taskq(vsw_port_t *port); 84 static void vsw_marker_task(void *); 85 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 86 void vsw_detach_ports(vsw_t *vswp); 87 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 88 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 89 int vsw_port_detach(vsw_t *vswp, int p_instance); 90 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 91 int vsw_port_attach(vsw_port_t *portp); 92 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 93 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 94 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 95 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 96 void vsw_reset_ports(vsw_t *vswp); 97 void vsw_port_reset(vsw_port_t *portp); 98 void vsw_physlink_update_ports(vsw_t *vswp); 99 static void vsw_port_physlink_update(vsw_port_t *portp); 100 101 /* Interrupt routines */ 102 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 103 104 /* Handshake routines */ 105 static void vsw_ldc_reinit(vsw_ldc_t *); 106 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 107 static void vsw_conn_task(void *); 108 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 109 static void vsw_next_milestone(vsw_ldc_t *); 110 static int vsw_supported_version(vio_ver_msg_t *); 111 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 112 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 113 114 /* Data processing routines */ 115 static void vsw_process_pkt(void *); 116 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 117 static void vsw_process_ctrl_pkt(void *); 118 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 124 static void vsw_process_physlink_msg(vsw_ldc_t *, void *); 125 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 126 uint32_t); 127 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 128 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 129 static void vsw_process_pkt_data(void *, void *, uint32_t); 130 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 131 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 132 133 /* Switching/data transmit routines */ 134 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 135 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 136 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 137 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 138 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 139 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 140 141 /* Packet creation routines */ 142 static void vsw_send_ver(void *); 143 static void vsw_send_attr(vsw_ldc_t *); 144 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 145 static void vsw_send_dring_info(vsw_ldc_t *); 146 static void vsw_send_rdx(vsw_ldc_t *); 147 static void vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state); 148 149 /* Dring routines */ 150 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 151 static void vsw_create_privring(vsw_ldc_t *); 152 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 153 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 154 int *); 155 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 156 static int vsw_reclaim_dring(dring_info_t *dp, int start); 157 158 static void vsw_set_lane_attr(vsw_t *, lane_t *); 159 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 160 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 161 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 162 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 163 164 /* Rcv/Tx thread routines */ 165 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 166 static void vsw_ldc_tx_worker(void *arg); 167 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 168 static void vsw_ldc_rx_worker(void *arg); 169 170 /* Misc support routines */ 171 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 172 static void vsw_free_ring(dring_info_t *); 173 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 174 static int vsw_get_same_dest_list(struct ether_header *ehp, 175 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 176 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 177 178 /* Debugging routines */ 179 static void dump_flags(uint64_t); 180 static void display_state(void); 181 static void display_lane(lane_t *); 182 static void display_ring(dring_info_t *); 183 184 /* 185 * Functions imported from other files. 186 */ 187 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 188 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 189 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 190 extern void vsw_del_mcst_port(vsw_port_t *port); 191 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 193 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 194 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 195 extern void vsw_create_vlans(void *arg, int type); 196 extern void vsw_destroy_vlans(void *arg, int type); 197 extern void vsw_vlan_add_ids(void *arg, int type); 198 extern void vsw_vlan_remove_ids(void *arg, int type); 199 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 200 struct ether_header *ehp, uint16_t *vidp); 201 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 202 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 203 mblk_t **npt); 204 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 205 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 207 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 208 extern void vsw_hio_stop_port(vsw_port_t *portp); 209 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 210 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 211 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 212 extern void vsw_destroy_rxpools(void *arg); 213 214 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 215 216 /* 217 * Tunables used in this file. 218 */ 219 extern int vsw_num_handshakes; 220 extern int vsw_wretries; 221 extern int vsw_desc_delay; 222 extern int vsw_read_attempts; 223 extern int vsw_ldc_tx_delay; 224 extern int vsw_ldc_tx_retries; 225 extern int vsw_ldc_retries; 226 extern int vsw_ldc_delay; 227 extern boolean_t vsw_ldc_rxthr_enabled; 228 extern boolean_t vsw_ldc_txthr_enabled; 229 extern uint32_t vsw_ntxds; 230 extern uint32_t vsw_max_tx_qcount; 231 extern uint32_t vsw_chain_len; 232 extern uint32_t vsw_mblk_size1; 233 extern uint32_t vsw_mblk_size2; 234 extern uint32_t vsw_mblk_size3; 235 extern uint32_t vsw_mblk_size4; 236 extern uint32_t vsw_num_mblks1; 237 extern uint32_t vsw_num_mblks2; 238 extern uint32_t vsw_num_mblks3; 239 extern uint32_t vsw_num_mblks4; 240 extern boolean_t vsw_obp_ver_proto_workaround; 241 extern uint32_t vsw_publish_macaddr_count; 242 extern boolean_t vsw_jumbo_rxpools; 243 244 #define LDC_ENTER_LOCK(ldcp) \ 245 mutex_enter(&((ldcp)->ldc_cblock));\ 246 mutex_enter(&((ldcp)->ldc_rxlock));\ 247 mutex_enter(&((ldcp)->ldc_txlock)); 248 #define LDC_EXIT_LOCK(ldcp) \ 249 mutex_exit(&((ldcp)->ldc_txlock));\ 250 mutex_exit(&((ldcp)->ldc_rxlock));\ 251 mutex_exit(&((ldcp)->ldc_cblock)); 252 253 #define VSW_VER_EQ(ldcp, major, minor) \ 254 ((ldcp)->lane_out.ver_major == (major) && \ 255 (ldcp)->lane_out.ver_minor == (minor)) 256 257 #define VSW_VER_LT(ldcp, major, minor) \ 258 (((ldcp)->lane_out.ver_major < (major)) || \ 259 ((ldcp)->lane_out.ver_major == (major) && \ 260 (ldcp)->lane_out.ver_minor < (minor))) 261 262 #define VSW_VER_GTEQ(ldcp, major, minor) \ 263 (((ldcp)->lane_out.ver_major > (major)) || \ 264 ((ldcp)->lane_out.ver_major == (major) && \ 265 (ldcp)->lane_out.ver_minor >= (minor))) 266 267 /* 268 * VIO Protocol Version Info: 269 * 270 * The version specified below represents the version of protocol currently 271 * supported in the driver. It means the driver can negotiate with peers with 272 * versions <= this version. Here is a summary of the feature(s) that are 273 * supported at each version of the protocol: 274 * 275 * 1.0 Basic VIO protocol. 276 * 1.1 vDisk protocol update (no virtual network update). 277 * 1.2 Support for priority frames (priority-ether-types). 278 * 1.3 VLAN and HybridIO support. 279 * 1.4 Jumbo Frame support. 280 * 1.5 Link State Notification support with optional support 281 * for Physical Link information. 282 */ 283 static ver_sup_t vsw_versions[] = { {1, 5} }; 284 285 /* 286 * For the moment the state dump routines have their own 287 * private flag. 288 */ 289 #define DUMP_STATE 0 290 291 #if DUMP_STATE 292 293 #define DUMP_TAG(tag) \ 294 { \ 295 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 296 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 297 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 298 } 299 300 #define DUMP_TAG_PTR(tag) \ 301 { \ 302 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 303 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 304 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 305 } 306 307 #define DUMP_FLAGS(flags) dump_flags(flags); 308 #define DISPLAY_STATE() display_state() 309 310 #else 311 312 #define DUMP_TAG(tag) 313 #define DUMP_TAG_PTR(tag) 314 #define DUMP_FLAGS(state) 315 #define DISPLAY_STATE() 316 317 #endif /* DUMP_STATE */ 318 319 /* 320 * Attach the specified port. 321 * 322 * Returns 0 on success, 1 on failure. 323 */ 324 int 325 vsw_port_attach(vsw_port_t *port) 326 { 327 vsw_t *vswp = port->p_vswp; 328 vsw_port_list_t *plist = &vswp->plist; 329 vsw_port_t *p, **pp; 330 int i; 331 int nids = port->num_ldcs; 332 uint64_t *ldcids; 333 int rv; 334 335 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 336 337 /* port already exists? */ 338 READ_ENTER(&plist->lockrw); 339 for (p = plist->head; p != NULL; p = p->p_next) { 340 if (p->p_instance == port->p_instance) { 341 DWARN(vswp, "%s: port instance %d already attached", 342 __func__, p->p_instance); 343 RW_EXIT(&plist->lockrw); 344 return (1); 345 } 346 } 347 RW_EXIT(&plist->lockrw); 348 349 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 350 351 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 352 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 353 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 354 355 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 356 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 357 port->state = VSW_PORT_INIT; 358 359 D2(vswp, "%s: %d nids", __func__, nids); 360 ldcids = port->ldc_ids; 361 for (i = 0; i < nids; i++) { 362 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 363 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 364 DERR(vswp, "%s: ldc_attach failed", __func__); 365 goto exit_error; 366 } 367 } 368 369 if (vswp->switching_setup_done == B_TRUE) { 370 /* 371 * If the underlying network device has been setup, 372 * then open a mac client and porgram the mac address 373 * for this port. 374 */ 375 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 376 if (rv != 0) { 377 goto exit_error; 378 } 379 } 380 381 /* create the fdb entry for this port/mac address */ 382 vsw_fdbe_add(vswp, port); 383 384 vsw_create_vlans(port, VSW_VNETPORT); 385 386 WRITE_ENTER(&plist->lockrw); 387 388 /* link it into the list of ports for this vsw instance */ 389 pp = (vsw_port_t **)(&plist->head); 390 port->p_next = *pp; 391 *pp = port; 392 plist->num_ports++; 393 394 RW_EXIT(&plist->lockrw); 395 396 /* 397 * Initialise the port and any ldc's under it. 398 */ 399 (void) vsw_init_ldcs(port); 400 401 /* announce macaddr of vnet to the physical switch */ 402 if (vsw_publish_macaddr_count != 0) { /* enabled */ 403 vsw_publish_macaddr(vswp, port); 404 } 405 406 D1(vswp, "%s: exit", __func__); 407 return (0); 408 409 exit_error: 410 rw_destroy(&port->p_ldclist.lockrw); 411 412 cv_destroy(&port->state_cv); 413 mutex_destroy(&port->state_lock); 414 415 rw_destroy(&port->maccl_rwlock); 416 mutex_destroy(&port->tx_lock); 417 mutex_destroy(&port->mca_lock); 418 kmem_free(port, sizeof (vsw_port_t)); 419 return (1); 420 } 421 422 /* 423 * Detach the specified port. 424 * 425 * Returns 0 on success, 1 on failure. 426 */ 427 int 428 vsw_port_detach(vsw_t *vswp, int p_instance) 429 { 430 vsw_port_t *port = NULL; 431 vsw_port_list_t *plist = &vswp->plist; 432 433 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 434 435 WRITE_ENTER(&plist->lockrw); 436 437 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 438 RW_EXIT(&plist->lockrw); 439 return (1); 440 } 441 442 if (vsw_plist_del_node(vswp, port)) { 443 RW_EXIT(&plist->lockrw); 444 return (1); 445 } 446 447 /* cleanup any HybridIO for this port */ 448 vsw_hio_stop_port(port); 449 450 /* 451 * No longer need to hold writer lock on port list now 452 * that we have unlinked the target port from the list. 453 */ 454 RW_EXIT(&plist->lockrw); 455 456 /* Cleanup and close the mac client */ 457 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 458 459 /* Remove the fdb entry for this port/mac address */ 460 vsw_fdbe_del(vswp, &(port->p_macaddr)); 461 vsw_destroy_vlans(port, VSW_VNETPORT); 462 463 /* Remove any multicast addresses.. */ 464 vsw_del_mcst_port(port); 465 466 vsw_port_delete(port); 467 468 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 469 return (0); 470 } 471 472 /* 473 * Detach all active ports. 474 */ 475 void 476 vsw_detach_ports(vsw_t *vswp) 477 { 478 vsw_port_list_t *plist = &vswp->plist; 479 vsw_port_t *port = NULL; 480 481 D1(vswp, "%s: enter", __func__); 482 483 WRITE_ENTER(&plist->lockrw); 484 485 while ((port = plist->head) != NULL) { 486 (void) vsw_plist_del_node(vswp, port); 487 488 /* cleanup any HybridIO for this port */ 489 vsw_hio_stop_port(port); 490 491 /* Cleanup and close the mac client */ 492 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 493 494 /* Remove the fdb entry for this port/mac address */ 495 vsw_fdbe_del(vswp, &(port->p_macaddr)); 496 vsw_destroy_vlans(port, VSW_VNETPORT); 497 498 /* Remove any multicast addresses.. */ 499 vsw_del_mcst_port(port); 500 501 /* 502 * No longer need to hold the lock on the port list 503 * now that we have unlinked the target port from the 504 * list. 505 */ 506 RW_EXIT(&plist->lockrw); 507 vsw_port_delete(port); 508 WRITE_ENTER(&plist->lockrw); 509 } 510 RW_EXIT(&plist->lockrw); 511 512 D1(vswp, "%s: exit", __func__); 513 } 514 515 /* 516 * Delete the specified port. 517 */ 518 static void 519 vsw_port_delete(vsw_port_t *port) 520 { 521 vsw_ldc_list_t *ldcl; 522 vsw_t *vswp = port->p_vswp; 523 int num_ldcs; 524 525 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 526 527 vsw_uninit_ldcs(port); 528 529 /* 530 * Wait for any pending ctrl msg tasks which reference this 531 * port to finish. 532 */ 533 vsw_drain_port_taskq(port); 534 535 /* 536 * Wait for any active callbacks to finish 537 */ 538 vsw_drain_ldcs(port); 539 540 ldcl = &port->p_ldclist; 541 num_ldcs = port->num_ldcs; 542 WRITE_ENTER(&ldcl->lockrw); 543 while (num_ldcs > 0) { 544 vsw_ldc_detach(port, ldcl->head->ldc_id); 545 num_ldcs--; 546 } 547 RW_EXIT(&ldcl->lockrw); 548 549 rw_destroy(&port->p_ldclist.lockrw); 550 551 rw_destroy(&port->maccl_rwlock); 552 mutex_destroy(&port->mca_lock); 553 mutex_destroy(&port->tx_lock); 554 555 cv_destroy(&port->state_cv); 556 mutex_destroy(&port->state_lock); 557 558 if (port->num_ldcs != 0) { 559 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 560 port->num_ldcs = 0; 561 } 562 563 if (port->nvids != 0) { 564 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 565 } 566 567 kmem_free(port, sizeof (vsw_port_t)); 568 569 D1(vswp, "%s: exit", __func__); 570 } 571 572 static int 573 vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp) 574 { 575 size_t data_sz; 576 int rv; 577 uint32_t sz1 = 0; 578 uint32_t sz2 = 0; 579 uint32_t sz3 = 0; 580 uint32_t sz4 = 0; 581 582 /* 583 * We round up the mtu specified to be a multiple of 2K to limit the 584 * number of rx buffer pools created for a given mtu. 585 */ 586 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 587 data_sz = VNET_ROUNDUP_2K(data_sz); 588 589 /* 590 * If pool sizes are specified, use them. Note that the presence of 591 * the first tunable will be used as a hint. 592 */ 593 if (vsw_mblk_size1 != 0) { 594 sz1 = vsw_mblk_size1; 595 sz2 = vsw_mblk_size2; 596 sz3 = vsw_mblk_size3; 597 sz4 = vsw_mblk_size4; 598 599 if (sz4 == 0) { /* need 3 pools */ 600 601 ldcp->max_rxpool_size = sz3; 602 rv = vio_init_multipools(&ldcp->vmp, 603 VSW_NUM_VMPOOLS, sz1, sz2, sz3, 604 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 605 606 } else { 607 608 ldcp->max_rxpool_size = sz4; 609 rv = vio_init_multipools(&ldcp->vmp, 610 VSW_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 611 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 612 vsw_num_mblks4); 613 614 } 615 616 return (rv); 617 } 618 619 /* 620 * Pool sizes are not specified. We select the pool sizes based on the 621 * mtu if vnet_jumbo_rxpools is enabled. 622 */ 623 if (vsw_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 624 /* 625 * Receive buffer pool allocation based on mtu is disabled. 626 * Use the default mechanism of standard size pool allocation. 627 */ 628 sz1 = VSW_MBLK_SZ_128; 629 sz2 = VSW_MBLK_SZ_256; 630 sz3 = VSW_MBLK_SZ_2048; 631 ldcp->max_rxpool_size = sz3; 632 633 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 634 sz1, sz2, sz3, 635 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 636 637 return (rv); 638 } 639 640 switch (data_sz) { 641 642 case VNET_4K: 643 644 sz1 = VSW_MBLK_SZ_128; 645 sz2 = VSW_MBLK_SZ_256; 646 sz3 = VSW_MBLK_SZ_2048; 647 sz4 = sz3 << 1; /* 4K */ 648 ldcp->max_rxpool_size = sz4; 649 650 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 651 sz1, sz2, sz3, sz4, 652 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 653 vsw_num_mblks4); 654 break; 655 656 default: /* data_sz: 4K+ to 16K */ 657 658 sz1 = VSW_MBLK_SZ_256; 659 sz2 = VSW_MBLK_SZ_2048; 660 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 661 sz4 = data_sz; /* Jumbo-size */ 662 ldcp->max_rxpool_size = sz4; 663 664 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 665 sz1, sz2, sz3, sz4, 666 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 667 vsw_num_mblks4); 668 break; 669 } 670 671 return (rv); 672 673 } 674 675 /* 676 * Attach a logical domain channel (ldc) under a specified port. 677 * 678 * Returns 0 on success, 1 on failure. 679 */ 680 static int 681 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 682 { 683 vsw_t *vswp = port->p_vswp; 684 vsw_ldc_list_t *ldcl = &port->p_ldclist; 685 vsw_ldc_t *ldcp = NULL; 686 ldc_attr_t attr; 687 ldc_status_t istatus; 688 int status = DDI_FAILURE; 689 char kname[MAXNAMELEN]; 690 enum { PROG_init = 0x0, 691 PROG_callback = 0x1, PROG_rx_thread = 0x2, 692 PROG_tx_thread = 0x4} 693 progress; 694 695 progress = PROG_init; 696 697 D1(vswp, "%s: enter", __func__); 698 699 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 700 if (ldcp == NULL) { 701 DERR(vswp, "%s: kmem_zalloc failed", __func__); 702 return (1); 703 } 704 ldcp->ldc_id = ldc_id; 705 706 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 707 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 708 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 709 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 710 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 711 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 712 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 713 714 /* required for handshake with peer */ 715 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 716 ldcp->peer_session = 0; 717 ldcp->session_status = 0; 718 ldcp->hss_id = 1; /* Initial handshake session id */ 719 720 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 721 722 /* only set for outbound lane, inbound set by peer */ 723 vsw_set_lane_attr(vswp, &ldcp->lane_out); 724 725 attr.devclass = LDC_DEV_NT_SVC; 726 attr.instance = ddi_get_instance(vswp->dip); 727 attr.mode = LDC_MODE_UNRELIABLE; 728 attr.mtu = VSW_LDC_MTU; 729 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 730 if (status != 0) { 731 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 732 __func__, ldc_id, status); 733 goto ldc_attach_fail; 734 } 735 736 if (vsw_ldc_rxthr_enabled) { 737 ldcp->rx_thr_flags = 0; 738 739 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 740 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 741 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 742 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 743 744 progress |= PROG_rx_thread; 745 if (ldcp->rx_thread == NULL) { 746 DWARN(vswp, "%s(%lld): Failed to create worker thread", 747 __func__, ldc_id); 748 goto ldc_attach_fail; 749 } 750 } 751 752 if (vsw_ldc_txthr_enabled) { 753 ldcp->tx_thr_flags = 0; 754 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 755 756 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 757 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 758 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 759 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 760 761 progress |= PROG_tx_thread; 762 if (ldcp->tx_thread == NULL) { 763 DWARN(vswp, "%s(%lld): Failed to create worker thread", 764 __func__, ldc_id); 765 goto ldc_attach_fail; 766 } 767 } 768 769 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 770 if (status != 0) { 771 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 772 __func__, ldc_id, status); 773 (void) ldc_fini(ldcp->ldc_handle); 774 goto ldc_attach_fail; 775 } 776 /* 777 * allocate a message for ldc_read()s, big enough to hold ctrl and 778 * data msgs, including raw data msgs used to recv priority frames. 779 */ 780 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 781 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 782 783 progress |= PROG_callback; 784 785 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 786 787 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 788 DERR(vswp, "%s: ldc_status failed", __func__); 789 mutex_destroy(&ldcp->status_lock); 790 goto ldc_attach_fail; 791 } 792 793 ldcp->ldc_status = istatus; 794 ldcp->ldc_port = port; 795 ldcp->ldc_vswp = vswp; 796 797 vsw_reset_vnet_proto_ops(ldcp); 798 799 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 800 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 801 kname, &ldcp->ldc_stats); 802 if (ldcp->ksp == NULL) { 803 DERR(vswp, "%s: kstats setup failed", __func__); 804 goto ldc_attach_fail; 805 } 806 807 /* link it into the list of channels for this port */ 808 WRITE_ENTER(&ldcl->lockrw); 809 ldcp->ldc_next = ldcl->head; 810 ldcl->head = ldcp; 811 RW_EXIT(&ldcl->lockrw); 812 813 D1(vswp, "%s: exit", __func__); 814 return (0); 815 816 ldc_attach_fail: 817 818 if (progress & PROG_callback) { 819 (void) ldc_unreg_callback(ldcp->ldc_handle); 820 kmem_free(ldcp->ldcmsg, ldcp->msglen); 821 } 822 823 if (progress & PROG_rx_thread) { 824 if (ldcp->rx_thread != NULL) { 825 vsw_stop_rx_thread(ldcp); 826 } 827 mutex_destroy(&ldcp->rx_thr_lock); 828 cv_destroy(&ldcp->rx_thr_cv); 829 } 830 831 if (progress & PROG_tx_thread) { 832 if (ldcp->tx_thread != NULL) { 833 vsw_stop_tx_thread(ldcp); 834 } 835 mutex_destroy(&ldcp->tx_thr_lock); 836 cv_destroy(&ldcp->tx_thr_cv); 837 } 838 if (ldcp->ksp != NULL) { 839 vgen_destroy_kstats(ldcp->ksp); 840 } 841 mutex_destroy(&ldcp->ldc_txlock); 842 mutex_destroy(&ldcp->ldc_rxlock); 843 mutex_destroy(&ldcp->ldc_cblock); 844 mutex_destroy(&ldcp->drain_cv_lock); 845 846 cv_destroy(&ldcp->drain_cv); 847 848 rw_destroy(&ldcp->lane_in.dlistrw); 849 rw_destroy(&ldcp->lane_out.dlistrw); 850 851 kmem_free(ldcp, sizeof (vsw_ldc_t)); 852 853 return (1); 854 } 855 856 /* 857 * Detach a logical domain channel (ldc) belonging to a 858 * particular port. 859 */ 860 static void 861 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 862 { 863 vsw_t *vswp = port->p_vswp; 864 vsw_ldc_t *ldcp, *prev_ldcp; 865 vsw_ldc_list_t *ldcl = &port->p_ldclist; 866 int rv; 867 int retries = 0; 868 vio_mblk_pool_t *fvmp = NULL; 869 870 prev_ldcp = ldcl->head; 871 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 872 if (ldcp->ldc_id == ldc_id) { 873 break; 874 } 875 } 876 877 /* specified ldc id not found */ 878 ASSERT(ldcp != NULL); 879 880 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 881 882 /* Stop the receive thread */ 883 if (ldcp->rx_thread != NULL) { 884 vsw_stop_rx_thread(ldcp); 885 mutex_destroy(&ldcp->rx_thr_lock); 886 cv_destroy(&ldcp->rx_thr_cv); 887 } 888 kmem_free(ldcp->ldcmsg, ldcp->msglen); 889 890 /* Stop the tx thread */ 891 if (ldcp->tx_thread != NULL) { 892 vsw_stop_tx_thread(ldcp); 893 mutex_destroy(&ldcp->tx_thr_lock); 894 cv_destroy(&ldcp->tx_thr_cv); 895 if (ldcp->tx_mhead != NULL) { 896 freemsgchain(ldcp->tx_mhead); 897 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 898 ldcp->tx_cnt = 0; 899 } 900 } 901 902 /* Destory kstats */ 903 vgen_destroy_kstats(ldcp->ksp); 904 905 /* 906 * Before we can close the channel we must release any mapped 907 * resources (e.g. drings). 908 */ 909 vsw_free_lane_resources(ldcp, INBOUND); 910 vsw_free_lane_resources(ldcp, OUTBOUND); 911 912 /* 913 * Close the channel, retry on EAAGIN. 914 */ 915 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 916 if (++retries > vsw_ldc_retries) { 917 break; 918 } 919 drv_usecwait(vsw_ldc_delay); 920 } 921 if (rv != 0) { 922 cmn_err(CE_NOTE, 923 "!vsw%d: Error(%d) closing the channel(0x%lx)\n", 924 vswp->instance, rv, ldcp->ldc_id); 925 } 926 927 (void) ldc_fini(ldcp->ldc_handle); 928 929 ldcp->ldc_status = LDC_INIT; 930 ldcp->ldc_handle = NULL; 931 ldcp->ldc_vswp = NULL; 932 933 934 /* 935 * If we can't destroy all the rx pools for this channel, dispatch 936 * a task to retry and clean up those rx pools. Note that we don't 937 * need to wait for the task to complete. If the vsw device itself 938 * gets detached (vsw_detach()), it will wait for the task to complete 939 * implicitly in ddi_taskq_destroy(). 940 */ 941 vio_destroy_multipools(&ldcp->vmp, &fvmp); 942 if (fvmp != NULL) { 943 (void) ddi_taskq_dispatch(vswp->rxp_taskq, 944 vsw_destroy_rxpools, fvmp, DDI_SLEEP); 945 } 946 947 /* unlink it from the list */ 948 prev_ldcp = ldcp->ldc_next; 949 950 mutex_destroy(&ldcp->ldc_txlock); 951 mutex_destroy(&ldcp->ldc_rxlock); 952 mutex_destroy(&ldcp->ldc_cblock); 953 cv_destroy(&ldcp->drain_cv); 954 mutex_destroy(&ldcp->drain_cv_lock); 955 mutex_destroy(&ldcp->status_lock); 956 rw_destroy(&ldcp->lane_in.dlistrw); 957 rw_destroy(&ldcp->lane_out.dlistrw); 958 959 kmem_free(ldcp, sizeof (vsw_ldc_t)); 960 } 961 962 /* 963 * Open and attempt to bring up the channel. Note that channel 964 * can only be brought up if peer has also opened channel. 965 * 966 * Returns 0 if can open and bring up channel, otherwise 967 * returns 1. 968 */ 969 static int 970 vsw_ldc_init(vsw_ldc_t *ldcp) 971 { 972 vsw_t *vswp = ldcp->ldc_vswp; 973 ldc_status_t istatus = 0; 974 int rv; 975 976 D1(vswp, "%s: enter", __func__); 977 978 LDC_ENTER_LOCK(ldcp); 979 980 /* don't start at 0 in case clients don't like that */ 981 ldcp->next_ident = 1; 982 983 rv = ldc_open(ldcp->ldc_handle); 984 if (rv != 0) { 985 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 986 __func__, ldcp->ldc_id, rv); 987 LDC_EXIT_LOCK(ldcp); 988 return (1); 989 } 990 991 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 992 DERR(vswp, "%s: unable to get status", __func__); 993 LDC_EXIT_LOCK(ldcp); 994 return (1); 995 996 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 997 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 998 __func__, ldcp->ldc_id, istatus); 999 LDC_EXIT_LOCK(ldcp); 1000 return (1); 1001 } 1002 1003 mutex_enter(&ldcp->status_lock); 1004 ldcp->ldc_status = istatus; 1005 mutex_exit(&ldcp->status_lock); 1006 1007 rv = ldc_up(ldcp->ldc_handle); 1008 if (rv != 0) { 1009 /* 1010 * Not a fatal error for ldc_up() to fail, as peer 1011 * end point may simply not be ready yet. 1012 */ 1013 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 1014 ldcp->ldc_id, rv); 1015 LDC_EXIT_LOCK(ldcp); 1016 return (1); 1017 } 1018 1019 /* 1020 * ldc_up() call is non-blocking so need to explicitly 1021 * check channel status to see if in fact the channel 1022 * is UP. 1023 */ 1024 mutex_enter(&ldcp->status_lock); 1025 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 1026 DERR(vswp, "%s: unable to get status", __func__); 1027 mutex_exit(&ldcp->status_lock); 1028 LDC_EXIT_LOCK(ldcp); 1029 return (1); 1030 1031 } 1032 1033 if (ldcp->ldc_status == LDC_UP) { 1034 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 1035 ldcp->ldc_id, istatus); 1036 mutex_exit(&ldcp->status_lock); 1037 LDC_EXIT_LOCK(ldcp); 1038 1039 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1040 return (0); 1041 } 1042 1043 mutex_exit(&ldcp->status_lock); 1044 LDC_EXIT_LOCK(ldcp); 1045 1046 D1(vswp, "%s: exit", __func__); 1047 return (0); 1048 } 1049 1050 /* disable callbacks on the channel */ 1051 static void 1052 vsw_ldc_uninit(vsw_ldc_t *ldcp) 1053 { 1054 vsw_t *vswp = ldcp->ldc_vswp; 1055 int rv; 1056 1057 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 1058 1059 LDC_ENTER_LOCK(ldcp); 1060 1061 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 1062 if (rv != 0) { 1063 cmn_err(CE_NOTE, "!vsw_ldc_uninit(%ld): error disabling " 1064 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 1065 } 1066 1067 mutex_enter(&ldcp->status_lock); 1068 ldcp->ldc_status = LDC_INIT; 1069 mutex_exit(&ldcp->status_lock); 1070 1071 LDC_EXIT_LOCK(ldcp); 1072 1073 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 1074 } 1075 1076 static int 1077 vsw_init_ldcs(vsw_port_t *port) 1078 { 1079 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1080 vsw_ldc_t *ldcp; 1081 1082 READ_ENTER(&ldcl->lockrw); 1083 ldcp = ldcl->head; 1084 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1085 (void) vsw_ldc_init(ldcp); 1086 } 1087 RW_EXIT(&ldcl->lockrw); 1088 1089 return (0); 1090 } 1091 1092 static void 1093 vsw_uninit_ldcs(vsw_port_t *port) 1094 { 1095 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1096 vsw_ldc_t *ldcp; 1097 1098 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1099 1100 READ_ENTER(&ldcl->lockrw); 1101 ldcp = ldcl->head; 1102 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1103 vsw_ldc_uninit(ldcp); 1104 } 1105 RW_EXIT(&ldcl->lockrw); 1106 1107 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1108 } 1109 1110 /* 1111 * Wait until the callback(s) associated with the ldcs under the specified 1112 * port have completed. 1113 * 1114 * Prior to this function being invoked each channel under this port 1115 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1116 * 1117 * A short explaination of what we are doing below.. 1118 * 1119 * The simplest approach would be to have a reference counter in 1120 * the ldc structure which is increment/decremented by the callbacks as 1121 * they use the channel. The drain function could then simply disable any 1122 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1123 * there is a tiny window here - before the callback is able to get the lock 1124 * on the channel it is interrupted and this function gets to execute. It 1125 * sees that the ref count is zero and believes its free to delete the 1126 * associated data structures. 1127 * 1128 * We get around this by taking advantage of the fact that before the ldc 1129 * framework invokes a callback it sets a flag to indicate that there is a 1130 * callback active (or about to become active). If when we attempt to 1131 * unregister a callback when this active flag is set then the unregister 1132 * will fail with EWOULDBLOCK. 1133 * 1134 * If the unregister fails we do a cv_timedwait. We will either be signaled 1135 * by the callback as it is exiting (note we have to wait a short period to 1136 * allow the callback to return fully to the ldc framework and it to clear 1137 * the active flag), or by the timer expiring. In either case we again attempt 1138 * the unregister. We repeat this until we can succesfully unregister the 1139 * callback. 1140 * 1141 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1142 * the case where the callback has finished but the ldc framework has not yet 1143 * cleared the active flag. In this case we would never get a cv_signal. 1144 */ 1145 static void 1146 vsw_drain_ldcs(vsw_port_t *port) 1147 { 1148 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1149 vsw_ldc_t *ldcp; 1150 vsw_t *vswp = port->p_vswp; 1151 1152 D1(vswp, "%s: enter", __func__); 1153 1154 READ_ENTER(&ldcl->lockrw); 1155 1156 ldcp = ldcl->head; 1157 1158 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1159 /* 1160 * If we can unregister the channel callback then we 1161 * know that there is no callback either running or 1162 * scheduled to run for this channel so move on to next 1163 * channel in the list. 1164 */ 1165 mutex_enter(&ldcp->drain_cv_lock); 1166 1167 /* prompt active callbacks to quit */ 1168 ldcp->drain_state = VSW_LDC_DRAINING; 1169 1170 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1171 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1172 ldcp->ldc_id); 1173 mutex_exit(&ldcp->drain_cv_lock); 1174 continue; 1175 } else { 1176 /* 1177 * If we end up here we know that either 1) a callback 1178 * is currently executing, 2) is about to start (i.e. 1179 * the ldc framework has set the active flag but 1180 * has not actually invoked the callback yet, or 3) 1181 * has finished and has returned to the ldc framework 1182 * but the ldc framework has not yet cleared the 1183 * active bit. 1184 * 1185 * Wait for it to finish. 1186 */ 1187 while (ldc_unreg_callback(ldcp->ldc_handle) 1188 == EWOULDBLOCK) 1189 (void) cv_reltimedwait(&ldcp->drain_cv, 1190 &ldcp->drain_cv_lock, hz, TR_CLOCK_TICK); 1191 1192 mutex_exit(&ldcp->drain_cv_lock); 1193 D2(vswp, "%s: unreg callback for chan %ld after " 1194 "timeout", __func__, ldcp->ldc_id); 1195 } 1196 } 1197 RW_EXIT(&ldcl->lockrw); 1198 1199 D1(vswp, "%s: exit", __func__); 1200 } 1201 1202 /* 1203 * Wait until all tasks which reference this port have completed. 1204 * 1205 * Prior to this function being invoked each channel under this port 1206 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1207 */ 1208 static void 1209 vsw_drain_port_taskq(vsw_port_t *port) 1210 { 1211 vsw_t *vswp = port->p_vswp; 1212 1213 D1(vswp, "%s: enter", __func__); 1214 1215 /* 1216 * Mark the port as in the process of being detached, and 1217 * dispatch a marker task to the queue so we know when all 1218 * relevant tasks have completed. 1219 */ 1220 mutex_enter(&port->state_lock); 1221 port->state = VSW_PORT_DETACHING; 1222 1223 if ((vswp->taskq_p == NULL) || 1224 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1225 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1226 cmn_err(CE_NOTE, "!vsw%d: unable to dispatch marker task", 1227 vswp->instance); 1228 mutex_exit(&port->state_lock); 1229 return; 1230 } 1231 1232 /* 1233 * Wait for the marker task to finish. 1234 */ 1235 while (port->state != VSW_PORT_DETACHABLE) 1236 cv_wait(&port->state_cv, &port->state_lock); 1237 1238 mutex_exit(&port->state_lock); 1239 1240 D1(vswp, "%s: exit", __func__); 1241 } 1242 1243 static void 1244 vsw_marker_task(void *arg) 1245 { 1246 vsw_port_t *port = arg; 1247 vsw_t *vswp = port->p_vswp; 1248 1249 D1(vswp, "%s: enter", __func__); 1250 1251 mutex_enter(&port->state_lock); 1252 1253 /* 1254 * No further tasks should be dispatched which reference 1255 * this port so ok to mark it as safe to detach. 1256 */ 1257 port->state = VSW_PORT_DETACHABLE; 1258 1259 cv_signal(&port->state_cv); 1260 1261 mutex_exit(&port->state_lock); 1262 1263 D1(vswp, "%s: exit", __func__); 1264 } 1265 1266 vsw_port_t * 1267 vsw_lookup_port(vsw_t *vswp, int p_instance) 1268 { 1269 vsw_port_list_t *plist = &vswp->plist; 1270 vsw_port_t *port; 1271 1272 for (port = plist->head; port != NULL; port = port->p_next) { 1273 if (port->p_instance == p_instance) { 1274 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1275 return (port); 1276 } 1277 } 1278 1279 return (NULL); 1280 } 1281 1282 void 1283 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1284 { 1285 vsw_ldc_list_t *ldclp; 1286 vsw_ldc_t *ldcp; 1287 1288 ldclp = &portp->p_ldclist; 1289 1290 READ_ENTER(&ldclp->lockrw); 1291 1292 /* 1293 * NOTE: for now, we will assume we have a single channel. 1294 */ 1295 if (ldclp->head == NULL) { 1296 RW_EXIT(&ldclp->lockrw); 1297 return; 1298 } 1299 ldcp = ldclp->head; 1300 1301 mutex_enter(&ldcp->ldc_cblock); 1302 1303 /* 1304 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1305 * the connection. See comments in vsw_set_vnet_proto_ops(). 1306 */ 1307 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1308 portp->nvids != 0) { 1309 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1310 } 1311 1312 mutex_exit(&ldcp->ldc_cblock); 1313 1314 RW_EXIT(&ldclp->lockrw); 1315 } 1316 1317 void 1318 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1319 { 1320 vsw_ldc_list_t *ldclp; 1321 vsw_ldc_t *ldcp; 1322 1323 ldclp = &portp->p_ldclist; 1324 1325 READ_ENTER(&ldclp->lockrw); 1326 1327 /* 1328 * NOTE: for now, we will assume we have a single channel. 1329 */ 1330 if (ldclp->head == NULL) { 1331 RW_EXIT(&ldclp->lockrw); 1332 return; 1333 } 1334 ldcp = ldclp->head; 1335 1336 mutex_enter(&ldcp->ldc_cblock); 1337 1338 /* 1339 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1340 * to trigger re-negotiation, which inturn trigger HybridIO 1341 * setup/cleanup. 1342 */ 1343 if ((ldcp->hphase == VSW_MILESTONE4) && 1344 (portp->p_hio_capable == B_TRUE)) { 1345 if (immediate == B_TRUE) { 1346 (void) ldc_down(ldcp->ldc_handle); 1347 } else { 1348 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1349 } 1350 } 1351 1352 mutex_exit(&ldcp->ldc_cblock); 1353 1354 RW_EXIT(&ldclp->lockrw); 1355 } 1356 1357 void 1358 vsw_port_reset(vsw_port_t *portp) 1359 { 1360 vsw_ldc_list_t *ldclp; 1361 vsw_ldc_t *ldcp; 1362 1363 ldclp = &portp->p_ldclist; 1364 1365 READ_ENTER(&ldclp->lockrw); 1366 1367 /* 1368 * NOTE: for now, we will assume we have a single channel. 1369 */ 1370 if (ldclp->head == NULL) { 1371 RW_EXIT(&ldclp->lockrw); 1372 return; 1373 } 1374 ldcp = ldclp->head; 1375 1376 mutex_enter(&ldcp->ldc_cblock); 1377 1378 /* 1379 * reset channel and terminate the connection. 1380 */ 1381 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1382 1383 mutex_exit(&ldcp->ldc_cblock); 1384 1385 RW_EXIT(&ldclp->lockrw); 1386 } 1387 1388 void 1389 vsw_reset_ports(vsw_t *vswp) 1390 { 1391 vsw_port_list_t *plist = &vswp->plist; 1392 vsw_port_t *portp; 1393 1394 READ_ENTER(&plist->lockrw); 1395 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1396 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1397 vsw_hio_stop_port(portp); 1398 } 1399 vsw_port_reset(portp); 1400 } 1401 RW_EXIT(&plist->lockrw); 1402 } 1403 1404 static void 1405 vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state) 1406 { 1407 vnet_physlink_msg_t msg; 1408 vnet_physlink_msg_t *msgp = &msg; 1409 uint32_t physlink_info = 0; 1410 1411 if (plink_state == LINK_STATE_UP) { 1412 physlink_info |= VNET_PHYSLINK_STATE_UP; 1413 } else { 1414 physlink_info |= VNET_PHYSLINK_STATE_DOWN; 1415 } 1416 1417 msgp->tag.vio_msgtype = VIO_TYPE_CTRL; 1418 msgp->tag.vio_subtype = VIO_SUBTYPE_INFO; 1419 msgp->tag.vio_subtype_env = VNET_PHYSLINK_INFO; 1420 msgp->tag.vio_sid = ldcp->local_session; 1421 msgp->physlink_info = physlink_info; 1422 1423 (void) vsw_send_msg(ldcp, msgp, sizeof (msg), B_TRUE); 1424 } 1425 1426 static void 1427 vsw_port_physlink_update(vsw_port_t *portp) 1428 { 1429 vsw_ldc_list_t *ldclp; 1430 vsw_ldc_t *ldcp; 1431 vsw_t *vswp; 1432 1433 vswp = portp->p_vswp; 1434 ldclp = &portp->p_ldclist; 1435 1436 READ_ENTER(&ldclp->lockrw); 1437 1438 /* 1439 * NOTE: for now, we will assume we have a single channel. 1440 */ 1441 if (ldclp->head == NULL) { 1442 RW_EXIT(&ldclp->lockrw); 1443 return; 1444 } 1445 ldcp = ldclp->head; 1446 1447 mutex_enter(&ldcp->ldc_cblock); 1448 1449 /* 1450 * If handshake has completed successfully and if the vnet device 1451 * has negotiated to get physical link state updates, send a message 1452 * with the current state. 1453 */ 1454 if (ldcp->hphase == VSW_MILESTONE4 && ldcp->pls_negotiated == B_TRUE) { 1455 vsw_send_physlink_msg(ldcp, vswp->phys_link_state); 1456 } 1457 1458 mutex_exit(&ldcp->ldc_cblock); 1459 1460 RW_EXIT(&ldclp->lockrw); 1461 } 1462 1463 void 1464 vsw_physlink_update_ports(vsw_t *vswp) 1465 { 1466 vsw_port_list_t *plist = &vswp->plist; 1467 vsw_port_t *portp; 1468 1469 READ_ENTER(&plist->lockrw); 1470 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1471 vsw_port_physlink_update(portp); 1472 } 1473 RW_EXIT(&plist->lockrw); 1474 } 1475 1476 /* 1477 * Search for and remove the specified port from the port 1478 * list. Returns 0 if able to locate and remove port, otherwise 1479 * returns 1. 1480 */ 1481 static int 1482 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1483 { 1484 vsw_port_list_t *plist = &vswp->plist; 1485 vsw_port_t *curr_p, *prev_p; 1486 1487 if (plist->head == NULL) 1488 return (1); 1489 1490 curr_p = prev_p = plist->head; 1491 1492 while (curr_p != NULL) { 1493 if (curr_p == port) { 1494 if (prev_p == curr_p) { 1495 plist->head = curr_p->p_next; 1496 } else { 1497 prev_p->p_next = curr_p->p_next; 1498 } 1499 plist->num_ports--; 1500 break; 1501 } else { 1502 prev_p = curr_p; 1503 curr_p = curr_p->p_next; 1504 } 1505 } 1506 return (0); 1507 } 1508 1509 /* 1510 * Interrupt handler for ldc messages. 1511 */ 1512 static uint_t 1513 vsw_ldc_cb(uint64_t event, caddr_t arg) 1514 { 1515 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1516 vsw_t *vswp = ldcp->ldc_vswp; 1517 1518 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1519 1520 mutex_enter(&ldcp->ldc_cblock); 1521 ldcp->ldc_stats.callbacks++; 1522 1523 mutex_enter(&ldcp->status_lock); 1524 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1525 mutex_exit(&ldcp->status_lock); 1526 mutex_exit(&ldcp->ldc_cblock); 1527 return (LDC_SUCCESS); 1528 } 1529 mutex_exit(&ldcp->status_lock); 1530 1531 if (event & LDC_EVT_UP) { 1532 /* 1533 * Channel has come up. 1534 */ 1535 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1536 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1537 1538 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1539 1540 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1541 } 1542 1543 if (event & LDC_EVT_READ) { 1544 /* 1545 * Data available for reading. 1546 */ 1547 D2(vswp, "%s: id(ld) event(%llx) data READ", 1548 __func__, ldcp->ldc_id, event); 1549 1550 if (ldcp->rx_thread != NULL) { 1551 /* 1552 * If the receive thread is enabled, then 1553 * wakeup the receive thread to process the 1554 * LDC messages. 1555 */ 1556 mutex_exit(&ldcp->ldc_cblock); 1557 mutex_enter(&ldcp->rx_thr_lock); 1558 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1559 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1560 cv_signal(&ldcp->rx_thr_cv); 1561 } 1562 mutex_exit(&ldcp->rx_thr_lock); 1563 mutex_enter(&ldcp->ldc_cblock); 1564 } else { 1565 vsw_process_pkt(ldcp); 1566 } 1567 1568 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1569 1570 goto vsw_cb_exit; 1571 } 1572 1573 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1574 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1575 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1576 1577 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1578 } 1579 1580 /* 1581 * Catch either LDC_EVT_WRITE which we don't support or any 1582 * unknown event. 1583 */ 1584 if (event & 1585 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1586 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1587 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1588 } 1589 1590 vsw_cb_exit: 1591 mutex_exit(&ldcp->ldc_cblock); 1592 1593 /* 1594 * Let the drain function know we are finishing if it 1595 * is waiting. 1596 */ 1597 mutex_enter(&ldcp->drain_cv_lock); 1598 if (ldcp->drain_state == VSW_LDC_DRAINING) 1599 cv_signal(&ldcp->drain_cv); 1600 mutex_exit(&ldcp->drain_cv_lock); 1601 1602 return (LDC_SUCCESS); 1603 } 1604 1605 /* 1606 * Reinitialise data structures associated with the channel. 1607 */ 1608 static void 1609 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1610 { 1611 vsw_t *vswp = ldcp->ldc_vswp; 1612 vsw_port_t *port; 1613 vsw_ldc_list_t *ldcl; 1614 vio_mblk_pool_t *fvmp = NULL; 1615 1616 D1(vswp, "%s: enter", __func__); 1617 1618 /* 1619 * If we can't destroy all the rx pools for this channel, dispatch 1620 * a task to retry and clean up those rx pools. Note that we don't 1621 * need to wait for the task to complete. If the vsw device itself 1622 * gets detached (vsw_detach()), it will wait for the task to complete 1623 * implicitly in ddi_taskq_destroy(). 1624 */ 1625 vio_destroy_multipools(&ldcp->vmp, &fvmp); 1626 if (fvmp != NULL) { 1627 (void) ddi_taskq_dispatch(vswp->rxp_taskq, 1628 vsw_destroy_rxpools, fvmp, DDI_SLEEP); 1629 } 1630 1631 port = ldcp->ldc_port; 1632 ldcl = &port->p_ldclist; 1633 1634 READ_ENTER(&ldcl->lockrw); 1635 1636 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1637 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1638 1639 vsw_free_lane_resources(ldcp, INBOUND); 1640 vsw_free_lane_resources(ldcp, OUTBOUND); 1641 RW_EXIT(&ldcl->lockrw); 1642 1643 ldcp->lane_in.lstate = 0; 1644 ldcp->lane_out.lstate = 0; 1645 1646 /* Remove the fdb entry for this port/mac address */ 1647 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1648 1649 /* remove the port from vlans it has been assigned to */ 1650 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1651 1652 /* 1653 * Remove parent port from any multicast groups 1654 * it may have registered with. Client must resend 1655 * multicast add command after handshake completes. 1656 */ 1657 vsw_del_mcst_port(port); 1658 1659 ldcp->peer_session = 0; 1660 ldcp->session_status = 0; 1661 ldcp->hcnt = 0; 1662 ldcp->hphase = VSW_MILESTONE0; 1663 1664 vsw_reset_vnet_proto_ops(ldcp); 1665 1666 D1(vswp, "%s: exit", __func__); 1667 } 1668 1669 /* 1670 * Process a connection event. 1671 * 1672 * Note - care must be taken to ensure that this function is 1673 * not called with the dlistrw lock held. 1674 */ 1675 static void 1676 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1677 { 1678 vsw_t *vswp = ldcp->ldc_vswp; 1679 vsw_conn_evt_t *conn = NULL; 1680 1681 D1(vswp, "%s: enter", __func__); 1682 1683 /* 1684 * Check if either a reset or restart event is pending 1685 * or in progress. If so just return. 1686 * 1687 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1688 * being received by the callback handler, or a ECONNRESET error 1689 * code being returned from a ldc_read() or ldc_write() call. 1690 * 1691 * A VSW_CONN_RESTART event occurs when some error checking code 1692 * decides that there is a problem with data from the channel, 1693 * and that the handshake should be restarted. 1694 */ 1695 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1696 (ldstub((uint8_t *)&ldcp->reset_active))) 1697 return; 1698 1699 /* 1700 * If it is an LDC_UP event we first check the recorded 1701 * state of the channel. If this is UP then we know that 1702 * the channel moving to the UP state has already been dealt 1703 * with and don't need to dispatch a new task. 1704 * 1705 * The reason for this check is that when we do a ldc_up(), 1706 * depending on the state of the peer, we may or may not get 1707 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1708 * every time we do ldc_up() we explicitly check the channel 1709 * status to see has it come up (ldc_up() is asynch and will 1710 * complete at some undefined time), and take the appropriate 1711 * action. 1712 * 1713 * The flip side of this is that we may get a LDC_UP event 1714 * when we have already seen that the channel is up and have 1715 * dealt with that. 1716 */ 1717 mutex_enter(&ldcp->status_lock); 1718 if (evt == VSW_CONN_UP) { 1719 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1720 mutex_exit(&ldcp->status_lock); 1721 return; 1722 } 1723 } 1724 mutex_exit(&ldcp->status_lock); 1725 1726 /* 1727 * The transaction group id allows us to identify and discard 1728 * any tasks which are still pending on the taskq and refer 1729 * to the handshake session we are about to restart or reset. 1730 * These stale messages no longer have any real meaning. 1731 */ 1732 (void) atomic_inc_32(&ldcp->hss_id); 1733 1734 ASSERT(vswp->taskq_p != NULL); 1735 1736 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1737 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1738 " connection event", vswp->instance); 1739 goto err_exit; 1740 } 1741 1742 conn->evt = evt; 1743 conn->ldcp = ldcp; 1744 1745 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1746 DDI_NOSLEEP) != DDI_SUCCESS) { 1747 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1748 vswp->instance); 1749 1750 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1751 goto err_exit; 1752 } 1753 1754 D1(vswp, "%s: exit", __func__); 1755 return; 1756 1757 err_exit: 1758 /* 1759 * Have mostly likely failed due to memory shortage. Clear the flag so 1760 * that future requests will at least be attempted and will hopefully 1761 * succeed. 1762 */ 1763 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1764 ldcp->reset_active = 0; 1765 } 1766 1767 /* 1768 * Deal with events relating to a connection. Invoked from a taskq. 1769 */ 1770 static void 1771 vsw_conn_task(void *arg) 1772 { 1773 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1774 vsw_ldc_t *ldcp = NULL; 1775 vsw_port_t *portp; 1776 vsw_t *vswp = NULL; 1777 uint16_t evt; 1778 ldc_status_t curr_status; 1779 1780 ldcp = conn->ldcp; 1781 evt = conn->evt; 1782 vswp = ldcp->ldc_vswp; 1783 portp = ldcp->ldc_port; 1784 1785 D1(vswp, "%s: enter", __func__); 1786 1787 /* can safely free now have copied out data */ 1788 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1789 1790 mutex_enter(&ldcp->status_lock); 1791 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1792 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1793 "channel %ld", vswp->instance, ldcp->ldc_id); 1794 mutex_exit(&ldcp->status_lock); 1795 return; 1796 } 1797 1798 /* 1799 * If we wish to restart the handshake on this channel, then if 1800 * the channel is UP we bring it DOWN to flush the underlying 1801 * ldc queue. 1802 */ 1803 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1804 (void) ldc_down(ldcp->ldc_handle); 1805 1806 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1807 vsw_hio_stop(vswp, ldcp); 1808 } 1809 1810 /* 1811 * re-init all the associated data structures. 1812 */ 1813 vsw_ldc_reinit(ldcp); 1814 1815 /* 1816 * Bring the channel back up (note it does no harm to 1817 * do this even if the channel is already UP, Just 1818 * becomes effectively a no-op). 1819 */ 1820 (void) ldc_up(ldcp->ldc_handle); 1821 1822 /* 1823 * Check if channel is now UP. This will only happen if 1824 * peer has also done a ldc_up(). 1825 */ 1826 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1827 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1828 "channel %ld", vswp->instance, ldcp->ldc_id); 1829 mutex_exit(&ldcp->status_lock); 1830 return; 1831 } 1832 1833 ldcp->ldc_status = curr_status; 1834 1835 /* channel UP so restart handshake by sending version info */ 1836 if (curr_status == LDC_UP) { 1837 if (ldcp->hcnt++ > vsw_num_handshakes) { 1838 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1839 " handshake attempts (%d) on channel %ld", 1840 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1841 mutex_exit(&ldcp->status_lock); 1842 return; 1843 } 1844 1845 if (vsw_obp_ver_proto_workaround == B_FALSE && 1846 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1847 DDI_NOSLEEP) != DDI_SUCCESS)) { 1848 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1849 vswp->instance); 1850 1851 /* 1852 * Don't count as valid restart attempt if couldn't 1853 * send version msg. 1854 */ 1855 if (ldcp->hcnt > 0) 1856 ldcp->hcnt--; 1857 } 1858 } 1859 1860 /* 1861 * Mark that the process is complete by clearing the flag. 1862 * 1863 * Note is it possible that the taskq dispatch above may have failed, 1864 * most likely due to memory shortage. We still clear the flag so 1865 * future attempts will at least be attempted and will hopefully 1866 * succeed. 1867 */ 1868 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1869 ldcp->reset_active = 0; 1870 1871 mutex_exit(&ldcp->status_lock); 1872 1873 D1(vswp, "%s: exit", __func__); 1874 } 1875 1876 /* 1877 * returns 0 if legal for event signified by flag to have 1878 * occured at the time it did. Otherwise returns 1. 1879 */ 1880 int 1881 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1882 { 1883 vsw_t *vswp = ldcp->ldc_vswp; 1884 uint64_t state; 1885 uint64_t phase; 1886 1887 if (dir == INBOUND) 1888 state = ldcp->lane_in.lstate; 1889 else 1890 state = ldcp->lane_out.lstate; 1891 1892 phase = ldcp->hphase; 1893 1894 switch (flag) { 1895 case VSW_VER_INFO_RECV: 1896 if (phase > VSW_MILESTONE0) { 1897 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1898 " when in state %d\n", ldcp->ldc_id, phase); 1899 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1900 return (1); 1901 } 1902 break; 1903 1904 case VSW_VER_ACK_RECV: 1905 case VSW_VER_NACK_RECV: 1906 if (!(state & VSW_VER_INFO_SENT)) { 1907 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1908 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1909 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1910 return (1); 1911 } else 1912 state &= ~VSW_VER_INFO_SENT; 1913 break; 1914 1915 case VSW_ATTR_INFO_RECV: 1916 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1917 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1918 " when in state %d\n", ldcp->ldc_id, phase); 1919 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1920 return (1); 1921 } 1922 break; 1923 1924 case VSW_ATTR_ACK_RECV: 1925 case VSW_ATTR_NACK_RECV: 1926 if (!(state & VSW_ATTR_INFO_SENT)) { 1927 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1928 " or ATTR_NACK when in state %d\n", 1929 ldcp->ldc_id, phase); 1930 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1931 return (1); 1932 } else 1933 state &= ~VSW_ATTR_INFO_SENT; 1934 break; 1935 1936 case VSW_DRING_INFO_RECV: 1937 if (phase < VSW_MILESTONE1) { 1938 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1939 " when in state %d\n", ldcp->ldc_id, phase); 1940 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1941 return (1); 1942 } 1943 break; 1944 1945 case VSW_DRING_ACK_RECV: 1946 case VSW_DRING_NACK_RECV: 1947 if (!(state & VSW_DRING_INFO_SENT)) { 1948 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1949 " or DRING_NACK when in state %d\n", 1950 ldcp->ldc_id, phase); 1951 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1952 return (1); 1953 } else 1954 state &= ~VSW_DRING_INFO_SENT; 1955 break; 1956 1957 case VSW_RDX_INFO_RECV: 1958 if (phase < VSW_MILESTONE3) { 1959 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1960 " when in state %d\n", ldcp->ldc_id, phase); 1961 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1962 return (1); 1963 } 1964 break; 1965 1966 case VSW_RDX_ACK_RECV: 1967 case VSW_RDX_NACK_RECV: 1968 if (!(state & VSW_RDX_INFO_SENT)) { 1969 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1970 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1971 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1972 return (1); 1973 } else 1974 state &= ~VSW_RDX_INFO_SENT; 1975 break; 1976 1977 case VSW_MCST_INFO_RECV: 1978 if (phase < VSW_MILESTONE3) { 1979 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1980 " when in state %d\n", ldcp->ldc_id, phase); 1981 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1982 return (1); 1983 } 1984 break; 1985 1986 default: 1987 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1988 ldcp->ldc_id, flag); 1989 return (1); 1990 } 1991 1992 if (dir == INBOUND) 1993 ldcp->lane_in.lstate = state; 1994 else 1995 ldcp->lane_out.lstate = state; 1996 1997 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1998 1999 return (0); 2000 } 2001 2002 void 2003 vsw_next_milestone(vsw_ldc_t *ldcp) 2004 { 2005 vsw_t *vswp = ldcp->ldc_vswp; 2006 vsw_port_t *portp = ldcp->ldc_port; 2007 2008 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 2009 ldcp->ldc_id, ldcp->hphase); 2010 2011 DUMP_FLAGS(ldcp->lane_in.lstate); 2012 DUMP_FLAGS(ldcp->lane_out.lstate); 2013 2014 switch (ldcp->hphase) { 2015 2016 case VSW_MILESTONE0: 2017 /* 2018 * If we haven't started to handshake with our peer, 2019 * start to do so now. 2020 */ 2021 if (ldcp->lane_out.lstate == 0) { 2022 D2(vswp, "%s: (chan %lld) starting handshake " 2023 "with peer", __func__, ldcp->ldc_id); 2024 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 2025 } 2026 2027 /* 2028 * Only way to pass this milestone is to have successfully 2029 * negotiated version info. 2030 */ 2031 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 2032 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 2033 2034 D2(vswp, "%s: (chan %lld) leaving milestone 0", 2035 __func__, ldcp->ldc_id); 2036 2037 vsw_set_vnet_proto_ops(ldcp); 2038 2039 /* 2040 * Next milestone is passed when attribute 2041 * information has been successfully exchanged. 2042 */ 2043 ldcp->hphase = VSW_MILESTONE1; 2044 vsw_send_attr(ldcp); 2045 2046 } 2047 break; 2048 2049 case VSW_MILESTONE1: 2050 /* 2051 * Only way to pass this milestone is to have successfully 2052 * negotiated attribute information. 2053 */ 2054 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 2055 2056 ldcp->hphase = VSW_MILESTONE2; 2057 2058 /* 2059 * If the peer device has said it wishes to 2060 * use descriptor rings then we send it our ring 2061 * info, otherwise we just set up a private ring 2062 * which we use an internal buffer 2063 */ 2064 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2065 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2066 (VSW_VER_LT(ldcp, 1, 2) && 2067 (ldcp->lane_in.xfer_mode == 2068 VIO_DRING_MODE_V1_0))) { 2069 vsw_send_dring_info(ldcp); 2070 } 2071 } 2072 break; 2073 2074 case VSW_MILESTONE2: 2075 /* 2076 * If peer has indicated in its attribute message that 2077 * it wishes to use descriptor rings then the only way 2078 * to pass this milestone is for us to have received 2079 * valid dring info. 2080 * 2081 * If peer is not using descriptor rings then just fall 2082 * through. 2083 */ 2084 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2085 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2086 (VSW_VER_LT(ldcp, 1, 2) && 2087 (ldcp->lane_in.xfer_mode == 2088 VIO_DRING_MODE_V1_0))) { 2089 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 2090 break; 2091 } 2092 2093 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2094 __func__, ldcp->ldc_id); 2095 2096 ldcp->hphase = VSW_MILESTONE3; 2097 vsw_send_rdx(ldcp); 2098 break; 2099 2100 case VSW_MILESTONE3: 2101 /* 2102 * Pass this milestone when all paramaters have been 2103 * successfully exchanged and RDX sent in both directions. 2104 * 2105 * Mark outbound lane as available to transmit data. 2106 */ 2107 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 2108 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 2109 2110 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2111 __func__, ldcp->ldc_id); 2112 D2(vswp, "%s: ** handshake complete (0x%llx : " 2113 "0x%llx) **", __func__, ldcp->lane_in.lstate, 2114 ldcp->lane_out.lstate); 2115 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2116 ldcp->hphase = VSW_MILESTONE4; 2117 ldcp->hcnt = 0; 2118 DISPLAY_STATE(); 2119 /* Start HIO if enabled and capable */ 2120 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 2121 D2(vswp, "%s: start HybridIO setup", __func__); 2122 vsw_hio_start(vswp, ldcp); 2123 } 2124 2125 if (ldcp->pls_negotiated == B_TRUE) { 2126 /* 2127 * The vnet device has negotiated to get phys 2128 * link updates. Now that the handshake with 2129 * the vnet device is complete, send an initial 2130 * update with the current physical link state. 2131 */ 2132 vsw_send_physlink_msg(ldcp, 2133 vswp->phys_link_state); 2134 } 2135 2136 } else { 2137 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 2138 __func__, ldcp->lane_in.lstate, 2139 ldcp->lane_out.lstate); 2140 } 2141 break; 2142 2143 case VSW_MILESTONE4: 2144 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2145 ldcp->ldc_id); 2146 break; 2147 2148 default: 2149 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2150 ldcp->ldc_id, ldcp->hphase); 2151 } 2152 2153 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2154 ldcp->hphase); 2155 } 2156 2157 /* 2158 * Check if major version is supported. 2159 * 2160 * Returns 0 if finds supported major number, and if necessary 2161 * adjusts the minor field. 2162 * 2163 * Returns 1 if can't match major number exactly. Sets mjor/minor 2164 * to next lowest support values, or to zero if no other values possible. 2165 */ 2166 static int 2167 vsw_supported_version(vio_ver_msg_t *vp) 2168 { 2169 int i; 2170 2171 D1(NULL, "vsw_supported_version: enter"); 2172 2173 for (i = 0; i < VSW_NUM_VER; i++) { 2174 if (vsw_versions[i].ver_major == vp->ver_major) { 2175 /* 2176 * Matching or lower major version found. Update 2177 * minor number if necessary. 2178 */ 2179 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2180 D2(NULL, "%s: adjusting minor value from %d " 2181 "to %d", __func__, vp->ver_minor, 2182 vsw_versions[i].ver_minor); 2183 vp->ver_minor = vsw_versions[i].ver_minor; 2184 } 2185 2186 return (0); 2187 } 2188 2189 /* 2190 * If the message contains a higher major version number, set 2191 * the message's major/minor versions to the current values 2192 * and return false, so this message will get resent with 2193 * these values. 2194 */ 2195 if (vsw_versions[i].ver_major < vp->ver_major) { 2196 D2(NULL, "%s: adjusting major and minor " 2197 "values to %d, %d\n", 2198 __func__, vsw_versions[i].ver_major, 2199 vsw_versions[i].ver_minor); 2200 vp->ver_major = vsw_versions[i].ver_major; 2201 vp->ver_minor = vsw_versions[i].ver_minor; 2202 return (1); 2203 } 2204 } 2205 2206 /* No match was possible, zero out fields */ 2207 vp->ver_major = 0; 2208 vp->ver_minor = 0; 2209 2210 D1(NULL, "vsw_supported_version: exit"); 2211 2212 return (1); 2213 } 2214 2215 /* 2216 * Set vnet-protocol-version dependent functions based on version. 2217 */ 2218 static void 2219 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 2220 { 2221 vsw_t *vswp = ldcp->ldc_vswp; 2222 lane_t *lp = &ldcp->lane_out; 2223 2224 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2225 /* 2226 * If the version negotiated with peer is >= 1.4(Jumbo Frame 2227 * Support), set the mtu in our attributes to max_frame_size. 2228 */ 2229 lp->mtu = vswp->max_frame_size; 2230 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 2231 /* 2232 * If the version negotiated with peer is == 1.3 (Vlan Tag 2233 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 2234 */ 2235 lp->mtu = ETHERMAX + VLAN_TAGSZ; 2236 } else { 2237 vsw_port_t *portp = ldcp->ldc_port; 2238 /* 2239 * Pre-1.3 peers expect max frame size of ETHERMAX. 2240 * We can negotiate that size with those peers provided only 2241 * pvid is defined for our peer and there are no vids. Then we 2242 * can send/recv only untagged frames of max size ETHERMAX. 2243 * Note that pvid of the peer can be different, as vsw has to 2244 * serve the vnet in that vlan even if itself is not assigned 2245 * to that vlan. 2246 */ 2247 if (portp->nvids == 0) { 2248 lp->mtu = ETHERMAX; 2249 } 2250 } 2251 2252 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2253 /* Versions >= 1.2 */ 2254 2255 if (VSW_PRI_ETH_DEFINED(vswp)) { 2256 /* 2257 * enable priority routines and pkt mode only if 2258 * at least one pri-eth-type is specified in MD. 2259 */ 2260 ldcp->tx = vsw_ldctx_pri; 2261 ldcp->rx_pktdata = vsw_process_pkt_data; 2262 2263 /* set xfer mode for vsw_send_attr() */ 2264 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2265 } else { 2266 /* no priority eth types defined in MD */ 2267 2268 ldcp->tx = vsw_ldctx; 2269 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2270 2271 /* set xfer mode for vsw_send_attr() */ 2272 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2273 } 2274 2275 } else { 2276 /* Versions prior to 1.2 */ 2277 2278 vsw_reset_vnet_proto_ops(ldcp); 2279 } 2280 } 2281 2282 /* 2283 * Reset vnet-protocol-version dependent functions to v1.0. 2284 */ 2285 static void 2286 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2287 { 2288 lane_t *lp = &ldcp->lane_out; 2289 2290 ldcp->tx = vsw_ldctx; 2291 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2292 2293 /* set xfer mode for vsw_send_attr() */ 2294 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2295 } 2296 2297 /* 2298 * Main routine for processing messages received over LDC. 2299 */ 2300 static void 2301 vsw_process_pkt(void *arg) 2302 { 2303 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2304 vsw_t *vswp = ldcp->ldc_vswp; 2305 size_t msglen; 2306 vio_msg_tag_t *tagp; 2307 uint64_t *ldcmsg; 2308 int rv = 0; 2309 2310 2311 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2312 2313 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2314 2315 ldcmsg = ldcp->ldcmsg; 2316 /* 2317 * If channel is up read messages until channel is empty. 2318 */ 2319 do { 2320 msglen = ldcp->msglen; 2321 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2322 2323 if (rv != 0) { 2324 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2325 __func__, ldcp->ldc_id, rv, msglen); 2326 } 2327 2328 /* channel has been reset */ 2329 if (rv == ECONNRESET) { 2330 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2331 break; 2332 } 2333 2334 if (msglen == 0) { 2335 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2336 ldcp->ldc_id); 2337 break; 2338 } 2339 2340 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2341 ldcp->ldc_id, msglen); 2342 2343 /* 2344 * Figure out what sort of packet we have gotten by 2345 * examining the msg tag, and then switch it appropriately. 2346 */ 2347 tagp = (vio_msg_tag_t *)ldcmsg; 2348 2349 switch (tagp->vio_msgtype) { 2350 case VIO_TYPE_CTRL: 2351 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2352 break; 2353 case VIO_TYPE_DATA: 2354 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2355 break; 2356 case VIO_TYPE_ERR: 2357 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2358 break; 2359 default: 2360 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2361 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2362 break; 2363 } 2364 } while (msglen); 2365 2366 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2367 } 2368 2369 /* 2370 * Dispatch a task to process a VIO control message. 2371 */ 2372 static void 2373 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2374 { 2375 vsw_ctrl_task_t *ctaskp = NULL; 2376 vsw_port_t *port = ldcp->ldc_port; 2377 vsw_t *vswp = port->p_vswp; 2378 2379 D1(vswp, "%s: enter", __func__); 2380 2381 /* 2382 * We need to handle RDX ACK messages in-band as once they 2383 * are exchanged it is possible that we will get an 2384 * immediate (legitimate) data packet. 2385 */ 2386 if ((tagp->vio_subtype_env == VIO_RDX) && 2387 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2388 2389 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2390 return; 2391 2392 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2393 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2394 "(ostate 0x%llx : hphase %d)", __func__, 2395 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2396 vsw_next_milestone(ldcp); 2397 return; 2398 } 2399 2400 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2401 2402 if (ctaskp == NULL) { 2403 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2404 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2405 return; 2406 } 2407 2408 ctaskp->ldcp = ldcp; 2409 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2410 ctaskp->hss_id = ldcp->hss_id; 2411 2412 /* 2413 * Dispatch task to processing taskq if port is not in 2414 * the process of being detached. 2415 */ 2416 mutex_enter(&port->state_lock); 2417 if (port->state == VSW_PORT_INIT) { 2418 if ((vswp->taskq_p == NULL) || 2419 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2420 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2421 mutex_exit(&port->state_lock); 2422 DERR(vswp, "%s: unable to dispatch task to taskq", 2423 __func__); 2424 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2425 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2426 return; 2427 } 2428 } else { 2429 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2430 DWARN(vswp, "%s: port %d detaching, not dispatching " 2431 "task", __func__, port->p_instance); 2432 } 2433 2434 mutex_exit(&port->state_lock); 2435 2436 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2437 ldcp->ldc_id); 2438 D1(vswp, "%s: exit", __func__); 2439 } 2440 2441 /* 2442 * Process a VIO ctrl message. Invoked from taskq. 2443 */ 2444 static void 2445 vsw_process_ctrl_pkt(void *arg) 2446 { 2447 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2448 vsw_ldc_t *ldcp = ctaskp->ldcp; 2449 vsw_t *vswp = ldcp->ldc_vswp; 2450 vio_msg_tag_t tag; 2451 uint16_t env; 2452 2453 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2454 2455 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2456 env = tag.vio_subtype_env; 2457 2458 /* stale pkt check */ 2459 if (ctaskp->hss_id < ldcp->hss_id) { 2460 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2461 " (%ld) handshake session", __func__, ctaskp->hss_id); 2462 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2463 return; 2464 } 2465 2466 /* session id check */ 2467 if (ldcp->session_status & VSW_PEER_SESSION) { 2468 if (ldcp->peer_session != tag.vio_sid) { 2469 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2470 __func__, ldcp->ldc_id, tag.vio_sid); 2471 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2472 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2473 return; 2474 } 2475 } 2476 2477 /* 2478 * Switch on vio_subtype envelope, then let lower routines 2479 * decide if its an INFO, ACK or NACK packet. 2480 */ 2481 switch (env) { 2482 case VIO_VER_INFO: 2483 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2484 break; 2485 case VIO_DRING_REG: 2486 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2487 break; 2488 case VIO_DRING_UNREG: 2489 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2490 break; 2491 case VIO_ATTR_INFO: 2492 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2493 break; 2494 case VNET_MCAST_INFO: 2495 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2496 break; 2497 case VIO_RDX: 2498 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2499 break; 2500 case VIO_DDS_INFO: 2501 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2502 break; 2503 2504 case VNET_PHYSLINK_INFO: 2505 vsw_process_physlink_msg(ldcp, &ctaskp->pktp); 2506 break; 2507 default: 2508 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2509 } 2510 2511 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2512 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2513 } 2514 2515 /* 2516 * Version negotiation. We can end up here either because our peer 2517 * has responded to a handshake message we have sent it, or our peer 2518 * has initiated a handshake with us. If its the former then can only 2519 * be ACK or NACK, if its the later can only be INFO. 2520 * 2521 * If its an ACK we move to the next stage of the handshake, namely 2522 * attribute exchange. If its a NACK we see if we can specify another 2523 * version, if we can't we stop. 2524 * 2525 * If it is an INFO we reset all params associated with communication 2526 * in that direction over this channel (remember connection is 2527 * essentially 2 independent simplex channels). 2528 */ 2529 void 2530 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2531 { 2532 vio_ver_msg_t *ver_pkt; 2533 vsw_t *vswp = ldcp->ldc_vswp; 2534 2535 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2536 2537 /* 2538 * We know this is a ctrl/version packet so 2539 * cast it into the correct structure. 2540 */ 2541 ver_pkt = (vio_ver_msg_t *)pkt; 2542 2543 switch (ver_pkt->tag.vio_subtype) { 2544 case VIO_SUBTYPE_INFO: 2545 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2546 2547 /* 2548 * Record the session id, which we will use from now 2549 * until we see another VER_INFO msg. Even then the 2550 * session id in most cases will be unchanged, execpt 2551 * if channel was reset. 2552 */ 2553 if ((ldcp->session_status & VSW_PEER_SESSION) && 2554 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2555 DERR(vswp, "%s: updating session id for chan %lld " 2556 "from %llx to %llx", __func__, ldcp->ldc_id, 2557 ldcp->peer_session, ver_pkt->tag.vio_sid); 2558 } 2559 2560 ldcp->peer_session = ver_pkt->tag.vio_sid; 2561 ldcp->session_status |= VSW_PEER_SESSION; 2562 2563 /* Legal message at this time ? */ 2564 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2565 return; 2566 2567 /* 2568 * First check the device class. Currently only expect 2569 * to be talking to a network device. In the future may 2570 * also talk to another switch. 2571 */ 2572 if (ver_pkt->dev_class != VDEV_NETWORK) { 2573 DERR(vswp, "%s: illegal device class %d", __func__, 2574 ver_pkt->dev_class); 2575 2576 ver_pkt->tag.vio_sid = ldcp->local_session; 2577 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2578 2579 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2580 2581 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2582 sizeof (vio_ver_msg_t), B_TRUE); 2583 2584 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2585 vsw_next_milestone(ldcp); 2586 return; 2587 } else { 2588 ldcp->dev_class = ver_pkt->dev_class; 2589 } 2590 2591 /* 2592 * Now check the version. 2593 */ 2594 if (vsw_supported_version(ver_pkt) == 0) { 2595 /* 2596 * Support this major version and possibly 2597 * adjusted minor version. 2598 */ 2599 2600 D2(vswp, "%s: accepted ver %d:%d", __func__, 2601 ver_pkt->ver_major, ver_pkt->ver_minor); 2602 2603 /* Store accepted values */ 2604 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2605 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2606 2607 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2608 2609 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2610 2611 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2612 /* 2613 * Send a version info message 2614 * using the accepted version that 2615 * we are about to ack. Also note that 2616 * we send our ver info before we ack. 2617 * Otherwise, as soon as receiving the 2618 * ack, obp sends attr info msg, which 2619 * breaks vsw_check_flag() invoked 2620 * from vsw_process_ctrl_attr_pkt(); 2621 * as we also need VSW_VER_ACK_RECV to 2622 * be set in lane_out.lstate, before 2623 * we can receive attr info. 2624 */ 2625 vsw_send_ver(ldcp); 2626 } 2627 } else { 2628 /* 2629 * NACK back with the next lower major/minor 2630 * pairing we support (if don't suuport any more 2631 * versions then they will be set to zero. 2632 */ 2633 2634 D2(vswp, "%s: replying with ver %d:%d", __func__, 2635 ver_pkt->ver_major, ver_pkt->ver_minor); 2636 2637 /* Store updated values */ 2638 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2639 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2640 2641 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2642 2643 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2644 } 2645 2646 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2647 ver_pkt->tag.vio_sid = ldcp->local_session; 2648 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2649 sizeof (vio_ver_msg_t), B_TRUE); 2650 2651 vsw_next_milestone(ldcp); 2652 break; 2653 2654 case VIO_SUBTYPE_ACK: 2655 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2656 2657 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2658 return; 2659 2660 /* Store updated values */ 2661 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2662 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2663 2664 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2665 vsw_next_milestone(ldcp); 2666 2667 break; 2668 2669 case VIO_SUBTYPE_NACK: 2670 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2671 2672 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2673 return; 2674 2675 /* 2676 * If our peer sent us a NACK with the ver fields set to 2677 * zero then there is nothing more we can do. Otherwise see 2678 * if we support either the version suggested, or a lesser 2679 * one. 2680 */ 2681 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2682 DERR(vswp, "%s: peer unable to negotiate any " 2683 "further.", __func__); 2684 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2685 vsw_next_milestone(ldcp); 2686 return; 2687 } 2688 2689 /* 2690 * Check to see if we support this major version or 2691 * a lower one. If we don't then maj/min will be set 2692 * to zero. 2693 */ 2694 (void) vsw_supported_version(ver_pkt); 2695 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2696 /* Nothing more we can do */ 2697 DERR(vswp, "%s: version negotiation failed.\n", 2698 __func__); 2699 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2700 vsw_next_milestone(ldcp); 2701 } else { 2702 /* found a supported major version */ 2703 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2704 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2705 2706 D2(vswp, "%s: resending with updated values (%x, %x)", 2707 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2708 2709 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2710 ver_pkt->tag.vio_sid = ldcp->local_session; 2711 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2712 2713 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2714 2715 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2716 sizeof (vio_ver_msg_t), B_TRUE); 2717 2718 vsw_next_milestone(ldcp); 2719 2720 } 2721 break; 2722 2723 default: 2724 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2725 ver_pkt->tag.vio_subtype); 2726 } 2727 2728 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2729 } 2730 2731 /* 2732 * Process an attribute packet. We can end up here either because our peer 2733 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2734 * peer has sent us an attribute INFO message 2735 * 2736 * If its an ACK we then move to the next stage of the handshake which 2737 * is to send our descriptor ring info to our peer. If its a NACK then 2738 * there is nothing more we can (currently) do. 2739 * 2740 * If we get a valid/acceptable INFO packet (and we have already negotiated 2741 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2742 * NACK back and reset channel state to INACTIV. 2743 * 2744 * FUTURE: in time we will probably negotiate over attributes, but for 2745 * the moment unacceptable attributes are regarded as a fatal error. 2746 * 2747 */ 2748 void 2749 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2750 { 2751 vnet_attr_msg_t *attr_pkt; 2752 vsw_t *vswp = ldcp->ldc_vswp; 2753 vsw_port_t *port = ldcp->ldc_port; 2754 uint64_t macaddr = 0; 2755 lane_t *lane_out = &ldcp->lane_out; 2756 lane_t *lane_in = &ldcp->lane_in; 2757 uint32_t mtu; 2758 boolean_t ack = B_TRUE; 2759 int i; 2760 2761 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2762 2763 /* 2764 * We know this is a ctrl/attr packet so 2765 * cast it into the correct structure. 2766 */ 2767 attr_pkt = (vnet_attr_msg_t *)pkt; 2768 2769 switch (attr_pkt->tag.vio_subtype) { 2770 case VIO_SUBTYPE_INFO: 2771 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2772 2773 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2774 return; 2775 2776 /* 2777 * If the attributes are unacceptable then we NACK back. 2778 */ 2779 if (vsw_check_attr(attr_pkt, ldcp)) { 2780 ack = B_FALSE; 2781 2782 DERR(vswp, "%s (chan %d): invalid attributes", 2783 __func__, ldcp->ldc_id); 2784 2785 } else { 2786 2787 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2788 /* 2789 * Versions >= 1.4: 2790 * The mtu is negotiated down to the 2791 * minimum of our mtu and peer's mtu. 2792 */ 2793 mtu = MIN(attr_pkt->mtu, vswp->max_frame_size); 2794 2795 /* 2796 * If we have received an ack for the attr info 2797 * that we sent, then check if the mtu computed 2798 * above matches the mtu that the peer had ack'd 2799 * (saved in local hparams). If they don't 2800 * match, we fail the handshake. 2801 */ 2802 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2803 if (mtu != lane_out->mtu) { 2804 /* send NACK */ 2805 ack = B_FALSE; 2806 } 2807 } else { 2808 /* 2809 * Save the mtu computed above in our 2810 * attr parameters, so it gets sent in 2811 * the attr info from us to the peer. 2812 */ 2813 lane_out->mtu = mtu; 2814 } 2815 } 2816 2817 } 2818 2819 if (ack == B_FALSE) { 2820 2821 vsw_free_lane_resources(ldcp, INBOUND); 2822 2823 attr_pkt->tag.vio_sid = ldcp->local_session; 2824 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2825 2826 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2827 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2828 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2829 sizeof (vnet_attr_msg_t), B_TRUE); 2830 2831 vsw_next_milestone(ldcp); 2832 return; 2833 } 2834 2835 /* 2836 * Otherwise store attributes for this lane and update 2837 * lane state. 2838 */ 2839 lane_in->mtu = attr_pkt->mtu; 2840 lane_in->addr = attr_pkt->addr; 2841 lane_in->addr_type = attr_pkt->addr_type; 2842 lane_in->xfer_mode = attr_pkt->xfer_mode; 2843 lane_in->ack_freq = attr_pkt->ack_freq; 2844 lane_in->physlink_update = attr_pkt->physlink_update; 2845 2846 /* 2847 * Check if the client has requested physlink state updates. 2848 * If there is a physical device bound to this vswitch (L2 2849 * mode), set the ack bits to indicate it is supported. 2850 * Otherwise, set the nack bits. 2851 */ 2852 if (VSW_VER_GTEQ(ldcp, 1, 5)) { /* Protocol ver >= 1.5 */ 2853 2854 /* Does the vnet need phys link state updates ? */ 2855 if ((lane_in->physlink_update & 2856 PHYSLINK_UPDATE_STATE_MASK) == 2857 PHYSLINK_UPDATE_STATE) { 2858 2859 if (vswp->smode & VSW_LAYER2) { 2860 /* is a net-dev assigned to us ? */ 2861 attr_pkt->physlink_update = 2862 PHYSLINK_UPDATE_STATE_ACK; 2863 ldcp->pls_negotiated = B_TRUE; 2864 } else { 2865 /* not in L2 mode */ 2866 attr_pkt->physlink_update = 2867 PHYSLINK_UPDATE_STATE_NACK; 2868 ldcp->pls_negotiated = B_FALSE; 2869 } 2870 2871 } else { 2872 attr_pkt->physlink_update = 2873 PHYSLINK_UPDATE_NONE; 2874 ldcp->pls_negotiated = B_FALSE; 2875 } 2876 2877 } else { 2878 /* 2879 * physlink_update bits are ignored 2880 * if set by clients < v1.5 protocol. 2881 */ 2882 attr_pkt->physlink_update = PHYSLINK_UPDATE_NONE; 2883 ldcp->pls_negotiated = B_FALSE; 2884 } 2885 2886 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2887 /* save the MIN mtu in the msg to be replied */ 2888 attr_pkt->mtu = mtu; 2889 } 2890 2891 macaddr = lane_in->addr; 2892 for (i = ETHERADDRL - 1; i >= 0; i--) { 2893 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2894 macaddr >>= 8; 2895 } 2896 2897 /* create the fdb entry for this port/mac address */ 2898 vsw_fdbe_add(vswp, port); 2899 2900 /* add the port to the specified vlans */ 2901 vsw_vlan_add_ids(port, VSW_VNETPORT); 2902 2903 /* setup device specifc xmit routines */ 2904 mutex_enter(&port->tx_lock); 2905 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2906 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2907 (VSW_VER_LT(ldcp, 1, 2) && 2908 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2909 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2910 port->transmit = vsw_dringsend; 2911 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2912 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2913 vsw_create_privring(ldcp); 2914 port->transmit = vsw_descrsend; 2915 lane_out->xfer_mode = VIO_DESC_MODE; 2916 } 2917 2918 /* 2919 * HybridIO is supported only vnet, not by OBP. 2920 * So, set hio_capable to true only when in DRING mode. 2921 */ 2922 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2923 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2924 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2925 } else { 2926 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2927 } 2928 2929 mutex_exit(&port->tx_lock); 2930 2931 attr_pkt->tag.vio_sid = ldcp->local_session; 2932 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2933 2934 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2935 2936 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2937 2938 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2939 sizeof (vnet_attr_msg_t), B_TRUE); 2940 2941 vsw_next_milestone(ldcp); 2942 break; 2943 2944 case VIO_SUBTYPE_ACK: 2945 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2946 2947 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2948 return; 2949 2950 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2951 /* 2952 * Versions >= 1.4: 2953 * The ack msg sent by the peer contains the minimum of 2954 * our mtu (that we had sent in our attr info) and the 2955 * peer's mtu. 2956 * 2957 * If we have sent an ack for the attr info msg from 2958 * the peer, check if the mtu that was computed then 2959 * (saved in lane_out params) matches the mtu that the 2960 * peer has ack'd. If they don't match, we fail the 2961 * handshake. 2962 */ 2963 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2964 if (lane_out->mtu != attr_pkt->mtu) { 2965 return; 2966 } 2967 } else { 2968 /* 2969 * If the mtu ack'd by the peer is > our mtu 2970 * fail handshake. Otherwise, save the mtu, so 2971 * we can validate it when we receive attr info 2972 * from our peer. 2973 */ 2974 if (attr_pkt->mtu > lane_out->mtu) { 2975 return; 2976 } 2977 if (attr_pkt->mtu <= lane_out->mtu) { 2978 lane_out->mtu = attr_pkt->mtu; 2979 } 2980 } 2981 } 2982 2983 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2984 vsw_next_milestone(ldcp); 2985 break; 2986 2987 case VIO_SUBTYPE_NACK: 2988 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2989 2990 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2991 return; 2992 2993 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2994 vsw_next_milestone(ldcp); 2995 break; 2996 2997 default: 2998 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2999 attr_pkt->tag.vio_subtype); 3000 } 3001 3002 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3003 } 3004 3005 /* 3006 * Process a dring info packet. We can end up here either because our peer 3007 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3008 * peer has sent us a dring INFO message. 3009 * 3010 * If we get a valid/acceptable INFO packet (and we have already negotiated 3011 * a version) we ACK back and update the lane state, otherwise we NACK back. 3012 * 3013 * FUTURE: nothing to stop client from sending us info on multiple dring's 3014 * but for the moment we will just use the first one we are given. 3015 * 3016 */ 3017 void 3018 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3019 { 3020 vio_dring_reg_msg_t *dring_pkt; 3021 vsw_t *vswp = ldcp->ldc_vswp; 3022 ldc_mem_info_t minfo; 3023 dring_info_t *dp, *dbp; 3024 int dring_found = 0; 3025 3026 /* 3027 * We know this is a ctrl/dring packet so 3028 * cast it into the correct structure. 3029 */ 3030 dring_pkt = (vio_dring_reg_msg_t *)pkt; 3031 3032 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3033 3034 switch (dring_pkt->tag.vio_subtype) { 3035 case VIO_SUBTYPE_INFO: 3036 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3037 3038 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3039 return; 3040 3041 /* 3042 * If the dring params are unacceptable then we NACK back. 3043 */ 3044 if (vsw_check_dring_info(dring_pkt)) { 3045 3046 DERR(vswp, "%s (%lld): invalid dring info", 3047 __func__, ldcp->ldc_id); 3048 3049 vsw_free_lane_resources(ldcp, INBOUND); 3050 3051 dring_pkt->tag.vio_sid = ldcp->local_session; 3052 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3053 3054 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3055 3056 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3057 3058 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3059 sizeof (vio_dring_reg_msg_t), B_TRUE); 3060 3061 vsw_next_milestone(ldcp); 3062 return; 3063 } 3064 3065 /* 3066 * Otherwise, attempt to map in the dring using the 3067 * cookie. If that succeeds we send back a unique dring 3068 * identifier that the sending side will use in future 3069 * to refer to this descriptor ring. 3070 */ 3071 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 3072 3073 dp->num_descriptors = dring_pkt->num_descriptors; 3074 dp->descriptor_size = dring_pkt->descriptor_size; 3075 dp->options = dring_pkt->options; 3076 dp->ncookies = dring_pkt->ncookies; 3077 3078 /* 3079 * Note: should only get one cookie. Enforced in 3080 * the ldc layer. 3081 */ 3082 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 3083 sizeof (ldc_mem_cookie_t)); 3084 3085 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 3086 dp->num_descriptors, dp->descriptor_size); 3087 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 3088 dp->options, dp->ncookies); 3089 3090 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 3091 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 3092 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 3093 3094 DERR(vswp, "%s: dring_map failed\n", __func__); 3095 3096 kmem_free(dp, sizeof (dring_info_t)); 3097 vsw_free_lane_resources(ldcp, INBOUND); 3098 3099 dring_pkt->tag.vio_sid = ldcp->local_session; 3100 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3101 3102 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3103 3104 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3105 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3106 sizeof (vio_dring_reg_msg_t), B_TRUE); 3107 3108 vsw_next_milestone(ldcp); 3109 return; 3110 } 3111 3112 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 3113 3114 DERR(vswp, "%s: dring_addr failed\n", __func__); 3115 3116 kmem_free(dp, sizeof (dring_info_t)); 3117 vsw_free_lane_resources(ldcp, INBOUND); 3118 3119 dring_pkt->tag.vio_sid = ldcp->local_session; 3120 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3121 3122 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3123 3124 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3125 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3126 sizeof (vio_dring_reg_msg_t), B_TRUE); 3127 3128 vsw_next_milestone(ldcp); 3129 return; 3130 } else { 3131 /* store the address of the pub part of ring */ 3132 dp->pub_addr = minfo.vaddr; 3133 3134 /* cache the dring mtype */ 3135 dp->dring_mtype = minfo.mtype; 3136 } 3137 3138 /* no private section as we are importing */ 3139 dp->priv_addr = NULL; 3140 3141 /* 3142 * Using simple mono increasing int for ident at 3143 * the moment. 3144 */ 3145 dp->ident = ldcp->next_ident; 3146 ldcp->next_ident++; 3147 3148 dp->end_idx = 0; 3149 dp->next = NULL; 3150 3151 /* 3152 * Link it onto the end of the list of drings 3153 * for this lane. 3154 */ 3155 if (ldcp->lane_in.dringp == NULL) { 3156 D2(vswp, "%s: adding first INBOUND dring", __func__); 3157 ldcp->lane_in.dringp = dp; 3158 } else { 3159 dbp = ldcp->lane_in.dringp; 3160 3161 while (dbp->next != NULL) 3162 dbp = dbp->next; 3163 3164 dbp->next = dp; 3165 } 3166 3167 /* acknowledge it */ 3168 dring_pkt->tag.vio_sid = ldcp->local_session; 3169 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3170 dring_pkt->dring_ident = dp->ident; 3171 3172 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3173 sizeof (vio_dring_reg_msg_t), B_TRUE); 3174 3175 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3176 vsw_next_milestone(ldcp); 3177 break; 3178 3179 case VIO_SUBTYPE_ACK: 3180 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3181 3182 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3183 return; 3184 3185 /* 3186 * Peer is acknowledging our dring info and will have 3187 * sent us a dring identifier which we will use to 3188 * refer to this ring w.r.t. our peer. 3189 */ 3190 dp = ldcp->lane_out.dringp; 3191 if (dp != NULL) { 3192 /* 3193 * Find the ring this ident should be associated 3194 * with. 3195 */ 3196 if (vsw_dring_match(dp, dring_pkt)) { 3197 dring_found = 1; 3198 3199 } else while (dp != NULL) { 3200 if (vsw_dring_match(dp, dring_pkt)) { 3201 dring_found = 1; 3202 break; 3203 } 3204 dp = dp->next; 3205 } 3206 3207 if (dring_found == 0) { 3208 DERR(NULL, "%s: unrecognised ring cookie", 3209 __func__); 3210 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3211 return; 3212 } 3213 3214 } else { 3215 DERR(vswp, "%s: DRING ACK received but no drings " 3216 "allocated", __func__); 3217 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3218 return; 3219 } 3220 3221 /* store ident */ 3222 dp->ident = dring_pkt->dring_ident; 3223 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3224 vsw_next_milestone(ldcp); 3225 break; 3226 3227 case VIO_SUBTYPE_NACK: 3228 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3229 3230 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3231 return; 3232 3233 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3234 vsw_next_milestone(ldcp); 3235 break; 3236 3237 default: 3238 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3239 dring_pkt->tag.vio_subtype); 3240 } 3241 3242 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3243 } 3244 3245 /* 3246 * Process a request from peer to unregister a dring. 3247 * 3248 * For the moment we just restart the handshake if our 3249 * peer endpoint attempts to unregister a dring. 3250 */ 3251 void 3252 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3253 { 3254 vsw_t *vswp = ldcp->ldc_vswp; 3255 vio_dring_unreg_msg_t *dring_pkt; 3256 3257 /* 3258 * We know this is a ctrl/dring packet so 3259 * cast it into the correct structure. 3260 */ 3261 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3262 3263 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3264 3265 switch (dring_pkt->tag.vio_subtype) { 3266 case VIO_SUBTYPE_INFO: 3267 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3268 3269 DWARN(vswp, "%s: restarting handshake..", __func__); 3270 break; 3271 3272 case VIO_SUBTYPE_ACK: 3273 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3274 3275 DWARN(vswp, "%s: restarting handshake..", __func__); 3276 break; 3277 3278 case VIO_SUBTYPE_NACK: 3279 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3280 3281 DWARN(vswp, "%s: restarting handshake..", __func__); 3282 break; 3283 3284 default: 3285 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3286 dring_pkt->tag.vio_subtype); 3287 } 3288 3289 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3290 3291 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3292 } 3293 3294 #define SND_MCST_NACK(ldcp, pkt) \ 3295 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3296 pkt->tag.vio_sid = ldcp->local_session; \ 3297 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3298 sizeof (vnet_mcast_msg_t), B_TRUE); 3299 3300 /* 3301 * Process a multicast request from a vnet. 3302 * 3303 * Vnet's specify a multicast address that they are interested in. This 3304 * address is used as a key into the hash table which forms the multicast 3305 * forwarding database (mFDB). 3306 * 3307 * The table keys are the multicast addresses, while the table entries 3308 * are pointers to lists of ports which wish to receive packets for the 3309 * specified multicast address. 3310 * 3311 * When a multicast packet is being switched we use the address as a key 3312 * into the hash table, and then walk the appropriate port list forwarding 3313 * the pkt to each port in turn. 3314 * 3315 * If a vnet is no longer interested in a particular multicast grouping 3316 * we simply find the correct location in the hash table and then delete 3317 * the relevant port from the port list. 3318 * 3319 * To deal with the case whereby a port is being deleted without first 3320 * removing itself from the lists in the hash table, we maintain a list 3321 * of multicast addresses the port has registered an interest in, within 3322 * the port structure itself. We then simply walk that list of addresses 3323 * using them as keys into the hash table and remove the port from the 3324 * appropriate lists. 3325 */ 3326 static void 3327 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3328 { 3329 vnet_mcast_msg_t *mcst_pkt; 3330 vsw_port_t *port = ldcp->ldc_port; 3331 vsw_t *vswp = ldcp->ldc_vswp; 3332 int i; 3333 3334 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3335 3336 /* 3337 * We know this is a ctrl/mcast packet so 3338 * cast it into the correct structure. 3339 */ 3340 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3341 3342 switch (mcst_pkt->tag.vio_subtype) { 3343 case VIO_SUBTYPE_INFO: 3344 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3345 3346 /* 3347 * Check if in correct state to receive a multicast 3348 * message (i.e. handshake complete). If not reset 3349 * the handshake. 3350 */ 3351 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3352 return; 3353 3354 /* 3355 * Before attempting to add or remove address check 3356 * that they are valid multicast addresses. 3357 * If not, then NACK back. 3358 */ 3359 for (i = 0; i < mcst_pkt->count; i++) { 3360 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3361 DERR(vswp, "%s: invalid multicast address", 3362 __func__); 3363 SND_MCST_NACK(ldcp, mcst_pkt); 3364 return; 3365 } 3366 } 3367 3368 /* 3369 * Now add/remove the addresses. If this fails we 3370 * NACK back. 3371 */ 3372 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3373 SND_MCST_NACK(ldcp, mcst_pkt); 3374 return; 3375 } 3376 3377 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3378 mcst_pkt->tag.vio_sid = ldcp->local_session; 3379 3380 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3381 3382 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3383 sizeof (vnet_mcast_msg_t), B_TRUE); 3384 break; 3385 3386 case VIO_SUBTYPE_ACK: 3387 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3388 3389 /* 3390 * We shouldn't ever get a multicast ACK message as 3391 * at the moment we never request multicast addresses 3392 * to be set on some other device. This may change in 3393 * the future if we have cascading switches. 3394 */ 3395 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3396 return; 3397 3398 /* Do nothing */ 3399 break; 3400 3401 case VIO_SUBTYPE_NACK: 3402 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3403 3404 /* 3405 * We shouldn't get a multicast NACK packet for the 3406 * same reasons as we shouldn't get a ACK packet. 3407 */ 3408 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3409 return; 3410 3411 /* Do nothing */ 3412 break; 3413 3414 default: 3415 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3416 mcst_pkt->tag.vio_subtype); 3417 } 3418 3419 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3420 } 3421 3422 static void 3423 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3424 { 3425 vio_rdx_msg_t *rdx_pkt; 3426 vsw_t *vswp = ldcp->ldc_vswp; 3427 3428 /* 3429 * We know this is a ctrl/rdx packet so 3430 * cast it into the correct structure. 3431 */ 3432 rdx_pkt = (vio_rdx_msg_t *)pkt; 3433 3434 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3435 3436 switch (rdx_pkt->tag.vio_subtype) { 3437 case VIO_SUBTYPE_INFO: 3438 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3439 3440 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3441 return; 3442 3443 rdx_pkt->tag.vio_sid = ldcp->local_session; 3444 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3445 3446 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3447 3448 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3449 3450 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3451 sizeof (vio_rdx_msg_t), B_TRUE); 3452 3453 vsw_next_milestone(ldcp); 3454 break; 3455 3456 case VIO_SUBTYPE_ACK: 3457 /* 3458 * Should be handled in-band by callback handler. 3459 */ 3460 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3461 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3462 break; 3463 3464 case VIO_SUBTYPE_NACK: 3465 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3466 3467 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3468 return; 3469 3470 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3471 vsw_next_milestone(ldcp); 3472 break; 3473 3474 default: 3475 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3476 rdx_pkt->tag.vio_subtype); 3477 } 3478 3479 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3480 } 3481 3482 static void 3483 vsw_process_physlink_msg(vsw_ldc_t *ldcp, void *pkt) 3484 { 3485 vnet_physlink_msg_t *msgp; 3486 vsw_t *vswp = ldcp->ldc_vswp; 3487 3488 msgp = (vnet_physlink_msg_t *)pkt; 3489 3490 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3491 3492 switch (msgp->tag.vio_subtype) { 3493 case VIO_SUBTYPE_INFO: 3494 3495 /* vsw shouldn't recv physlink info */ 3496 DWARN(vswp, "%s: Unexpected VIO_SUBTYPE_INFO", __func__); 3497 break; 3498 3499 case VIO_SUBTYPE_ACK: 3500 3501 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3502 break; 3503 3504 case VIO_SUBTYPE_NACK: 3505 3506 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3507 break; 3508 3509 default: 3510 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3511 msgp->tag.vio_subtype); 3512 } 3513 3514 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3515 } 3516 3517 static void 3518 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3519 uint32_t msglen) 3520 { 3521 uint16_t env = tagp->vio_subtype_env; 3522 vsw_t *vswp = ldcp->ldc_vswp; 3523 3524 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3525 3526 /* session id check */ 3527 if (ldcp->session_status & VSW_PEER_SESSION) { 3528 if (ldcp->peer_session != tagp->vio_sid) { 3529 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3530 __func__, ldcp->ldc_id, tagp->vio_sid); 3531 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3532 return; 3533 } 3534 } 3535 3536 /* 3537 * It is an error for us to be getting data packets 3538 * before the handshake has completed. 3539 */ 3540 if (ldcp->hphase != VSW_MILESTONE4) { 3541 DERR(vswp, "%s: got data packet before handshake complete " 3542 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3543 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3544 DUMP_FLAGS(ldcp->lane_in.lstate); 3545 DUMP_FLAGS(ldcp->lane_out.lstate); 3546 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3547 return; 3548 } 3549 3550 /* 3551 * To reduce the locking contention, release the 3552 * ldc_cblock here and re-acquire it once we are done 3553 * receiving packets. 3554 */ 3555 mutex_exit(&ldcp->ldc_cblock); 3556 mutex_enter(&ldcp->ldc_rxlock); 3557 3558 /* 3559 * Switch on vio_subtype envelope, then let lower routines 3560 * decide if its an INFO, ACK or NACK packet. 3561 */ 3562 if (env == VIO_DRING_DATA) { 3563 vsw_process_data_dring_pkt(ldcp, dpkt); 3564 } else if (env == VIO_PKT_DATA) { 3565 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3566 } else if (env == VIO_DESC_DATA) { 3567 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3568 } else { 3569 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3570 } 3571 3572 mutex_exit(&ldcp->ldc_rxlock); 3573 mutex_enter(&ldcp->ldc_cblock); 3574 3575 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3576 } 3577 3578 #define SND_DRING_NACK(ldcp, pkt) \ 3579 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3580 pkt->tag.vio_sid = ldcp->local_session; \ 3581 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3582 sizeof (vio_dring_msg_t), B_TRUE); 3583 3584 static void 3585 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3586 { 3587 vio_dring_msg_t *dring_pkt; 3588 vnet_public_desc_t desc, *pub_addr = NULL; 3589 vsw_private_desc_t *priv_addr = NULL; 3590 dring_info_t *dp = NULL; 3591 vsw_t *vswp = ldcp->ldc_vswp; 3592 mblk_t *mp = NULL; 3593 mblk_t *bp = NULL; 3594 mblk_t *bpt = NULL; 3595 size_t nbytes = 0; 3596 uint64_t chain = 0; 3597 uint64_t len; 3598 uint32_t pos, start; 3599 uint32_t range_start, range_end; 3600 int32_t end, num, cnt = 0; 3601 int i, rv, rng_rv = 0, msg_rv = 0; 3602 boolean_t prev_desc_ack = B_FALSE; 3603 int read_attempts = 0; 3604 struct ether_header *ehp; 3605 lane_t *lp = &ldcp->lane_out; 3606 3607 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3608 3609 /* 3610 * We know this is a data/dring packet so 3611 * cast it into the correct structure. 3612 */ 3613 dring_pkt = (vio_dring_msg_t *)dpkt; 3614 3615 /* 3616 * Switch on the vio_subtype. If its INFO then we need to 3617 * process the data. If its an ACK we need to make sure 3618 * it makes sense (i.e did we send an earlier data/info), 3619 * and if its a NACK then we maybe attempt a retry. 3620 */ 3621 switch (dring_pkt->tag.vio_subtype) { 3622 case VIO_SUBTYPE_INFO: 3623 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3624 3625 READ_ENTER(&ldcp->lane_in.dlistrw); 3626 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3627 dring_pkt->dring_ident)) == NULL) { 3628 RW_EXIT(&ldcp->lane_in.dlistrw); 3629 3630 DERR(vswp, "%s(%lld): unable to find dring from " 3631 "ident 0x%llx", __func__, ldcp->ldc_id, 3632 dring_pkt->dring_ident); 3633 3634 SND_DRING_NACK(ldcp, dring_pkt); 3635 return; 3636 } 3637 3638 start = pos = dring_pkt->start_idx; 3639 end = dring_pkt->end_idx; 3640 len = dp->num_descriptors; 3641 3642 range_start = range_end = pos; 3643 3644 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3645 __func__, ldcp->ldc_id, start, end); 3646 3647 if (end == -1) { 3648 num = -1; 3649 } else if (end >= 0) { 3650 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3651 3652 /* basic sanity check */ 3653 if (end > len) { 3654 RW_EXIT(&ldcp->lane_in.dlistrw); 3655 DERR(vswp, "%s(%lld): endpoint %lld outside " 3656 "ring length %lld", __func__, 3657 ldcp->ldc_id, end, len); 3658 3659 SND_DRING_NACK(ldcp, dring_pkt); 3660 return; 3661 } 3662 } else { 3663 RW_EXIT(&ldcp->lane_in.dlistrw); 3664 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3665 __func__, ldcp->ldc_id, end); 3666 SND_DRING_NACK(ldcp, dring_pkt); 3667 return; 3668 } 3669 3670 while (cnt != num) { 3671 vsw_recheck_desc: 3672 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3673 3674 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3675 &desc, dp->dring_mtype, dp->handle, 3676 pos, pos)) != 0) { 3677 DERR(vswp, "%s(%lld): unable to copy " 3678 "descriptor at pos %d: err %d", 3679 __func__, pos, ldcp->ldc_id, rng_rv); 3680 ldcp->ldc_stats.ierrors++; 3681 break; 3682 } 3683 3684 /* 3685 * When given a bounded range of descriptors 3686 * to process, its an error to hit a descriptor 3687 * which is not ready. In the non-bounded case 3688 * (end_idx == -1) this simply indicates we have 3689 * reached the end of the current active range. 3690 */ 3691 if (desc.hdr.dstate != VIO_DESC_READY) { 3692 /* unbound - no error */ 3693 if (end == -1) { 3694 if (read_attempts == vsw_read_attempts) 3695 break; 3696 3697 delay(drv_usectohz(vsw_desc_delay)); 3698 read_attempts++; 3699 goto vsw_recheck_desc; 3700 } 3701 3702 /* bounded - error - so NACK back */ 3703 RW_EXIT(&ldcp->lane_in.dlistrw); 3704 DERR(vswp, "%s(%lld): descriptor not READY " 3705 "(%d)", __func__, ldcp->ldc_id, 3706 desc.hdr.dstate); 3707 SND_DRING_NACK(ldcp, dring_pkt); 3708 return; 3709 } 3710 3711 DTRACE_PROBE1(read_attempts, int, read_attempts); 3712 3713 range_end = pos; 3714 3715 /* 3716 * If we ACK'd the previous descriptor then now 3717 * record the new range start position for later 3718 * ACK's. 3719 */ 3720 if (prev_desc_ack) { 3721 range_start = pos; 3722 3723 D2(vswp, "%s(%lld): updating range start to be " 3724 "%d", __func__, ldcp->ldc_id, range_start); 3725 3726 prev_desc_ack = B_FALSE; 3727 } 3728 3729 D2(vswp, "%s(%lld): processing desc %lld at pos" 3730 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3731 __func__, ldcp->ldc_id, pos, &desc, 3732 desc.hdr.dstate, desc.nbytes); 3733 3734 if ((desc.nbytes < ETHERMIN) || 3735 (desc.nbytes > lp->mtu)) { 3736 /* invalid size; drop the packet */ 3737 ldcp->ldc_stats.ierrors++; 3738 goto vsw_process_desc_done; 3739 } 3740 3741 /* 3742 * Ensure that we ask ldc for an aligned 3743 * number of bytes. Data is padded to align on 8 3744 * byte boundary, desc.nbytes is actual data length, 3745 * i.e. minus that padding. 3746 */ 3747 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3748 if (nbytes > ldcp->max_rxpool_size) { 3749 mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3750 BPRI_MED); 3751 } else { 3752 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3753 if (mp == NULL) { 3754 ldcp->ldc_stats.rx_vio_allocb_fail++; 3755 /* 3756 * No free receive buffers available, 3757 * so fallback onto allocb(9F). Make 3758 * sure that we get a data buffer which 3759 * is a multiple of 8 as this is 3760 * required by ldc_mem_copy. 3761 */ 3762 DTRACE_PROBE(allocb); 3763 mp = allocb(desc.nbytes + 3764 VNET_IPALIGN + 8, BPRI_MED); 3765 } 3766 } 3767 if (mp == NULL) { 3768 DERR(vswp, "%s(%ld): allocb failed", 3769 __func__, ldcp->ldc_id); 3770 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3771 dp->dring_mtype, dp->handle, pos, pos, 3772 VIO_DESC_DONE); 3773 ldcp->ldc_stats.ierrors++; 3774 ldcp->ldc_stats.rx_allocb_fail++; 3775 break; 3776 } 3777 3778 rv = ldc_mem_copy(ldcp->ldc_handle, 3779 (caddr_t)mp->b_rptr, 0, &nbytes, 3780 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3781 if (rv != 0) { 3782 DERR(vswp, "%s(%d): unable to copy in data " 3783 "from %d cookies in desc %d (rv %d)", 3784 __func__, ldcp->ldc_id, desc.ncookies, 3785 pos, rv); 3786 freemsg(mp); 3787 3788 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3789 dp->dring_mtype, dp->handle, pos, pos, 3790 VIO_DESC_DONE); 3791 ldcp->ldc_stats.ierrors++; 3792 break; 3793 } else { 3794 D2(vswp, "%s(%d): copied in %ld bytes" 3795 " using %d cookies", __func__, 3796 ldcp->ldc_id, nbytes, desc.ncookies); 3797 } 3798 3799 /* adjust the read pointer to skip over the padding */ 3800 mp->b_rptr += VNET_IPALIGN; 3801 3802 /* point to the actual end of data */ 3803 mp->b_wptr = mp->b_rptr + desc.nbytes; 3804 3805 /* update statistics */ 3806 ehp = (struct ether_header *)mp->b_rptr; 3807 if (IS_BROADCAST(ehp)) 3808 ldcp->ldc_stats.brdcstrcv++; 3809 else if (IS_MULTICAST(ehp)) 3810 ldcp->ldc_stats.multircv++; 3811 3812 ldcp->ldc_stats.ipackets++; 3813 ldcp->ldc_stats.rbytes += desc.nbytes; 3814 3815 /* 3816 * IPALIGN space can be used for VLAN_TAG 3817 */ 3818 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3819 VSW_VNETPORT, mp); 3820 3821 /* build a chain of received packets */ 3822 if (bp == NULL) { 3823 /* first pkt */ 3824 bp = mp; 3825 bp->b_next = bp->b_prev = NULL; 3826 bpt = bp; 3827 chain = 1; 3828 } else { 3829 mp->b_next = mp->b_prev = NULL; 3830 bpt->b_next = mp; 3831 bpt = mp; 3832 chain++; 3833 } 3834 3835 vsw_process_desc_done: 3836 /* mark we are finished with this descriptor */ 3837 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3838 dp->dring_mtype, dp->handle, pos, pos, 3839 VIO_DESC_DONE)) != 0) { 3840 DERR(vswp, "%s(%lld): unable to update " 3841 "dstate at pos %d: err %d", 3842 __func__, pos, ldcp->ldc_id, rng_rv); 3843 ldcp->ldc_stats.ierrors++; 3844 break; 3845 } 3846 3847 /* 3848 * Send an ACK back to peer if requested. 3849 */ 3850 if (desc.hdr.ack) { 3851 dring_pkt->start_idx = range_start; 3852 dring_pkt->end_idx = range_end; 3853 3854 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3855 " requested", __func__, ldcp->ldc_id, 3856 dring_pkt->start_idx, dring_pkt->end_idx); 3857 3858 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3859 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3860 dring_pkt->tag.vio_sid = ldcp->local_session; 3861 3862 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3863 sizeof (vio_dring_msg_t), B_FALSE); 3864 3865 /* 3866 * Check if ACK was successfully sent. If not 3867 * we break and deal with that below. 3868 */ 3869 if (msg_rv != 0) 3870 break; 3871 3872 prev_desc_ack = B_TRUE; 3873 range_start = pos; 3874 } 3875 3876 /* next descriptor */ 3877 pos = (pos + 1) % len; 3878 cnt++; 3879 3880 /* 3881 * Break out of loop here and stop processing to 3882 * allow some other network device (or disk) to 3883 * get access to the cpu. 3884 */ 3885 if (chain > vsw_chain_len) { 3886 D3(vswp, "%s(%lld): switching chain of %d " 3887 "msgs", __func__, ldcp->ldc_id, chain); 3888 break; 3889 } 3890 } 3891 RW_EXIT(&ldcp->lane_in.dlistrw); 3892 3893 /* send the chain of packets to be switched */ 3894 if (bp != NULL) { 3895 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3896 D3(vswp, "%s(%lld): switching chain of %d msgs", 3897 __func__, ldcp->ldc_id, chain); 3898 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3899 ldcp->ldc_port, NULL); 3900 } 3901 3902 /* 3903 * If when we encountered an error when attempting to 3904 * access an imported dring, initiate a connection reset. 3905 */ 3906 if (rng_rv != 0) { 3907 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3908 break; 3909 } 3910 3911 /* 3912 * If when we attempted to send the ACK we found that the 3913 * channel had been reset then now handle this. We deal with 3914 * it here as we cannot reset the channel while holding the 3915 * dlistrw lock, and we don't want to acquire/release it 3916 * continuously in the above loop, as a channel reset should 3917 * be a rare event. 3918 */ 3919 if (msg_rv == ECONNRESET) { 3920 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3921 break; 3922 } 3923 3924 DTRACE_PROBE1(msg_cnt, int, cnt); 3925 3926 /* 3927 * We are now finished so ACK back with the state 3928 * set to STOPPING so our peer knows we are finished 3929 */ 3930 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3931 dring_pkt->tag.vio_sid = ldcp->local_session; 3932 3933 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3934 3935 DTRACE_PROBE(stop_process_sent); 3936 3937 /* 3938 * We have not processed any more descriptors beyond 3939 * the last one we ACK'd. 3940 */ 3941 if (prev_desc_ack) 3942 range_start = range_end; 3943 3944 dring_pkt->start_idx = range_start; 3945 dring_pkt->end_idx = range_end; 3946 3947 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3948 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3949 dring_pkt->end_idx); 3950 3951 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3952 sizeof (vio_dring_msg_t), B_TRUE); 3953 break; 3954 3955 case VIO_SUBTYPE_ACK: 3956 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3957 /* 3958 * Verify that the relevant descriptors are all 3959 * marked as DONE 3960 */ 3961 READ_ENTER(&ldcp->lane_out.dlistrw); 3962 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3963 dring_pkt->dring_ident)) == NULL) { 3964 RW_EXIT(&ldcp->lane_out.dlistrw); 3965 DERR(vswp, "%s: unknown ident in ACK", __func__); 3966 return; 3967 } 3968 3969 start = end = 0; 3970 start = dring_pkt->start_idx; 3971 end = dring_pkt->end_idx; 3972 len = dp->num_descriptors; 3973 3974 3975 mutex_enter(&dp->dlock); 3976 dp->last_ack_recv = end; 3977 ldcp->ldc_stats.dring_data_acks++; 3978 mutex_exit(&dp->dlock); 3979 3980 (void) vsw_reclaim_dring(dp, start); 3981 3982 /* 3983 * If our peer is stopping processing descriptors then 3984 * we check to make sure it has processed all the descriptors 3985 * we have updated. If not then we send it a new message 3986 * to prompt it to restart. 3987 */ 3988 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3989 DTRACE_PROBE(stop_process_recv); 3990 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3991 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3992 dring_pkt->end_idx); 3993 3994 /* 3995 * Check next descriptor in public section of ring. 3996 * If its marked as READY then we need to prompt our 3997 * peer to start processing the ring again. 3998 */ 3999 i = (end + 1) % len; 4000 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 4001 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4002 4003 /* 4004 * Hold the restart lock across all of this to 4005 * make sure that its not possible for us to 4006 * decide that a msg needs to be sent in the future 4007 * but the sending code having already checked is 4008 * about to exit. 4009 */ 4010 mutex_enter(&dp->restart_lock); 4011 ldcp->ldc_stats.dring_stopped_acks++; 4012 mutex_enter(&priv_addr->dstate_lock); 4013 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 4014 4015 mutex_exit(&priv_addr->dstate_lock); 4016 4017 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4018 dring_pkt->tag.vio_sid = ldcp->local_session; 4019 4020 dring_pkt->start_idx = (end + 1) % len; 4021 dring_pkt->end_idx = -1; 4022 4023 D2(vswp, "%s(%lld) : sending restart msg:" 4024 " %d : %d", __func__, ldcp->ldc_id, 4025 dring_pkt->start_idx, dring_pkt->end_idx); 4026 4027 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 4028 sizeof (vio_dring_msg_t), B_FALSE); 4029 ldcp->ldc_stats.dring_data_msgs++; 4030 4031 } else { 4032 mutex_exit(&priv_addr->dstate_lock); 4033 dp->restart_reqd = B_TRUE; 4034 } 4035 mutex_exit(&dp->restart_lock); 4036 } 4037 RW_EXIT(&ldcp->lane_out.dlistrw); 4038 4039 /* only do channel reset after dropping dlistrw lock */ 4040 if (msg_rv == ECONNRESET) 4041 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4042 4043 break; 4044 4045 case VIO_SUBTYPE_NACK: 4046 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 4047 __func__, ldcp->ldc_id); 4048 /* 4049 * Something is badly wrong if we are getting NACK's 4050 * for our data pkts. So reset the channel. 4051 */ 4052 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 4053 4054 break; 4055 4056 default: 4057 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4058 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 4059 } 4060 4061 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4062 } 4063 4064 /* 4065 * dummy pkt data handler function for vnet protocol version 1.0 4066 */ 4067 static void 4068 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 4069 { 4070 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 4071 } 4072 4073 /* 4074 * This function handles raw pkt data messages received over the channel. 4075 * Currently, only priority-eth-type frames are received through this mechanism. 4076 * In this case, the frame(data) is present within the message itself which 4077 * is copied into an mblk before switching it. 4078 */ 4079 static void 4080 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 4081 { 4082 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 4083 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 4084 uint32_t size; 4085 mblk_t *mp; 4086 vsw_t *vswp = ldcp->ldc_vswp; 4087 vgen_stats_t *statsp = &ldcp->ldc_stats; 4088 lane_t *lp = &ldcp->lane_out; 4089 4090 size = msglen - VIO_PKT_DATA_HDRSIZE; 4091 if (size < ETHERMIN || size > lp->mtu) { 4092 (void) atomic_inc_32(&statsp->rx_pri_fail); 4093 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4094 ldcp->ldc_id, size); 4095 return; 4096 } 4097 4098 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 4099 if (mp == NULL) { 4100 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 4101 if (mp == NULL) { 4102 (void) atomic_inc_32(&statsp->rx_pri_fail); 4103 DWARN(vswp, "%s(%lld) allocb failure, " 4104 "unable to process priority frame\n", __func__, 4105 ldcp->ldc_id); 4106 return; 4107 } 4108 } 4109 4110 /* skip over the extra space for vlan tag */ 4111 mp->b_rptr += VLAN_TAGSZ; 4112 4113 /* copy the frame from the payload of raw data msg into the mblk */ 4114 bcopy(dpkt->data, mp->b_rptr, size); 4115 mp->b_wptr = mp->b_rptr + size; 4116 4117 /* update stats */ 4118 (void) atomic_inc_64(&statsp->rx_pri_packets); 4119 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 4120 4121 /* 4122 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 4123 */ 4124 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4125 4126 /* switch the frame to destination */ 4127 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 4128 } 4129 4130 /* 4131 * Process an in-band descriptor message (most likely from 4132 * OBP). 4133 */ 4134 static void 4135 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 4136 { 4137 vnet_ibnd_desc_t *ibnd_desc; 4138 dring_info_t *dp = NULL; 4139 vsw_private_desc_t *priv_addr = NULL; 4140 vsw_t *vswp = ldcp->ldc_vswp; 4141 mblk_t *mp = NULL; 4142 size_t nbytes = 0; 4143 size_t off = 0; 4144 uint64_t idx = 0; 4145 uint32_t num = 1, len, datalen = 0; 4146 uint64_t ncookies = 0; 4147 int i, rv; 4148 int j = 0; 4149 4150 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4151 4152 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 4153 4154 switch (ibnd_desc->hdr.tag.vio_subtype) { 4155 case VIO_SUBTYPE_INFO: 4156 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4157 4158 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 4159 return; 4160 4161 /* 4162 * Data is padded to align on a 8 byte boundary, 4163 * nbytes is actual data length, i.e. minus that 4164 * padding. 4165 */ 4166 datalen = ibnd_desc->nbytes; 4167 4168 D2(vswp, "%s(%lld): processing inband desc : " 4169 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4170 4171 ncookies = ibnd_desc->ncookies; 4172 4173 /* 4174 * allocb(9F) returns an aligned data block. We 4175 * need to ensure that we ask ldc for an aligned 4176 * number of bytes also. 4177 */ 4178 nbytes = datalen; 4179 if (nbytes & 0x7) { 4180 off = 8 - (nbytes & 0x7); 4181 nbytes += off; 4182 } 4183 4184 /* alloc extra space for VLAN_TAG */ 4185 mp = allocb(datalen + 8, BPRI_MED); 4186 if (mp == NULL) { 4187 DERR(vswp, "%s(%lld): allocb failed", 4188 __func__, ldcp->ldc_id); 4189 ldcp->ldc_stats.rx_allocb_fail++; 4190 return; 4191 } 4192 4193 /* skip over the extra space for VLAN_TAG */ 4194 mp->b_rptr += 8; 4195 4196 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4197 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4198 LDC_COPY_IN); 4199 4200 if (rv != 0) { 4201 DERR(vswp, "%s(%d): unable to copy in data from " 4202 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 4203 freemsg(mp); 4204 ldcp->ldc_stats.ierrors++; 4205 return; 4206 } 4207 4208 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 4209 __func__, ldcp->ldc_id, nbytes, ncookies); 4210 4211 /* point to the actual end of data */ 4212 mp->b_wptr = mp->b_rptr + datalen; 4213 ldcp->ldc_stats.ipackets++; 4214 ldcp->ldc_stats.rbytes += datalen; 4215 4216 /* 4217 * We ACK back every in-band descriptor message we process 4218 */ 4219 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4220 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4221 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 4222 sizeof (vnet_ibnd_desc_t), B_TRUE); 4223 4224 /* 4225 * there is extra space alloc'd for VLAN_TAG 4226 */ 4227 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4228 4229 /* send the packet to be switched */ 4230 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4231 ldcp->ldc_port, NULL); 4232 4233 break; 4234 4235 case VIO_SUBTYPE_ACK: 4236 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4237 4238 /* Verify the ACK is valid */ 4239 idx = ibnd_desc->hdr.desc_handle; 4240 4241 if (idx >= vsw_ntxds) { 4242 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 4243 "(idx %ld)", vswp->instance, idx); 4244 return; 4245 } 4246 4247 if ((dp = ldcp->lane_out.dringp) == NULL) { 4248 DERR(vswp, "%s: no dring found", __func__); 4249 return; 4250 } 4251 4252 len = dp->num_descriptors; 4253 /* 4254 * If the descriptor we are being ACK'ed for is not the 4255 * one we expected, then pkts were lost somwhere, either 4256 * when we tried to send a msg, or a previous ACK msg from 4257 * our peer. In either case we now reclaim the descriptors 4258 * in the range from the last ACK we received up to the 4259 * current ACK. 4260 */ 4261 if (idx != dp->last_ack_recv) { 4262 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 4263 __func__, dp->last_ack_recv, idx); 4264 num = idx >= dp->last_ack_recv ? 4265 idx - dp->last_ack_recv + 1: 4266 (len - dp->last_ack_recv + 1) + idx; 4267 } 4268 4269 /* 4270 * When we sent the in-band message to our peer we 4271 * marked the copy in our private ring as READY. We now 4272 * check that the descriptor we are being ACK'ed for is in 4273 * fact READY, i.e. it is one we have shared with our peer. 4274 * 4275 * If its not we flag an error, but still reset the descr 4276 * back to FREE. 4277 */ 4278 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 4279 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4280 mutex_enter(&priv_addr->dstate_lock); 4281 if (priv_addr->dstate != VIO_DESC_READY) { 4282 DERR(vswp, "%s: (%ld) desc at index %ld not " 4283 "READY (0x%lx)", __func__, 4284 ldcp->ldc_id, idx, priv_addr->dstate); 4285 DERR(vswp, "%s: bound %d: ncookies %ld : " 4286 "datalen %ld", __func__, 4287 priv_addr->bound, priv_addr->ncookies, 4288 priv_addr->datalen); 4289 } 4290 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4291 ldcp->ldc_id, idx); 4292 /* release resources associated with sent msg */ 4293 priv_addr->datalen = 0; 4294 priv_addr->dstate = VIO_DESC_FREE; 4295 mutex_exit(&priv_addr->dstate_lock); 4296 } 4297 /* update to next expected value */ 4298 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 4299 4300 break; 4301 4302 case VIO_SUBTYPE_NACK: 4303 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4304 4305 /* 4306 * We should only get a NACK if our peer doesn't like 4307 * something about a message we have sent it. If this 4308 * happens we just release the resources associated with 4309 * the message. (We are relying on higher layers to decide 4310 * whether or not to resend. 4311 */ 4312 4313 /* limit check */ 4314 idx = ibnd_desc->hdr.desc_handle; 4315 4316 if (idx >= vsw_ntxds) { 4317 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4318 __func__, idx); 4319 return; 4320 } 4321 4322 if ((dp = ldcp->lane_out.dringp) == NULL) { 4323 DERR(vswp, "%s: no dring found", __func__); 4324 return; 4325 } 4326 4327 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4328 4329 /* move to correct location in ring */ 4330 priv_addr += idx; 4331 4332 /* release resources associated with sent msg */ 4333 mutex_enter(&priv_addr->dstate_lock); 4334 priv_addr->datalen = 0; 4335 priv_addr->dstate = VIO_DESC_FREE; 4336 mutex_exit(&priv_addr->dstate_lock); 4337 4338 break; 4339 4340 default: 4341 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4342 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4343 } 4344 4345 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4346 } 4347 4348 static void 4349 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 4350 { 4351 _NOTE(ARGUNUSED(epkt)) 4352 4353 vsw_t *vswp = ldcp->ldc_vswp; 4354 uint16_t env = tagp->vio_subtype_env; 4355 4356 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4357 4358 /* 4359 * Error vio_subtypes have yet to be defined. So for 4360 * the moment we can't do anything. 4361 */ 4362 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4363 4364 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4365 } 4366 4367 /* transmit the packet over the given port */ 4368 int 4369 vsw_portsend(vsw_port_t *port, mblk_t *mp) 4370 { 4371 vsw_ldc_list_t *ldcl = &port->p_ldclist; 4372 vsw_ldc_t *ldcp; 4373 mblk_t *mpt; 4374 int count; 4375 int status = 0; 4376 4377 READ_ENTER(&ldcl->lockrw); 4378 /* 4379 * Note for now, we have a single channel. 4380 */ 4381 ldcp = ldcl->head; 4382 if (ldcp == NULL) { 4383 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 4384 freemsgchain(mp); 4385 RW_EXIT(&ldcl->lockrw); 4386 return (1); 4387 } 4388 4389 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 4390 4391 if (count != 0) { 4392 status = ldcp->tx(ldcp, mp, mpt, count); 4393 } 4394 4395 RW_EXIT(&ldcl->lockrw); 4396 return (status); 4397 } 4398 4399 /* 4400 * Break up frames into 2 seperate chains: normal and 4401 * priority, based on the frame type. The number of 4402 * priority frames is also counted and returned. 4403 * 4404 * Params: 4405 * vswp: pointer to the instance of vsw 4406 * np: head of packet chain to be broken 4407 * npt: tail of packet chain to be broken 4408 * 4409 * Returns: 4410 * np: head of normal data packets 4411 * npt: tail of normal data packets 4412 * hp: head of high priority packets 4413 * hpt: tail of high priority packets 4414 */ 4415 static uint32_t 4416 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4417 mblk_t **hp, mblk_t **hpt) 4418 { 4419 mblk_t *tmp = NULL; 4420 mblk_t *smp = NULL; 4421 mblk_t *hmp = NULL; /* high prio pkts head */ 4422 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4423 mblk_t *nmp = NULL; /* normal pkts head */ 4424 mblk_t *nmpt = NULL; /* normal pkts tail */ 4425 uint32_t count = 0; 4426 int i; 4427 struct ether_header *ehp; 4428 uint32_t num_types; 4429 uint16_t *types; 4430 4431 tmp = *np; 4432 while (tmp != NULL) { 4433 4434 smp = tmp; 4435 tmp = tmp->b_next; 4436 smp->b_next = NULL; 4437 smp->b_prev = NULL; 4438 4439 ehp = (struct ether_header *)smp->b_rptr; 4440 num_types = vswp->pri_num_types; 4441 types = vswp->pri_types; 4442 for (i = 0; i < num_types; i++) { 4443 if (ehp->ether_type == types[i]) { 4444 /* high priority frame */ 4445 4446 if (hmp != NULL) { 4447 hmpt->b_next = smp; 4448 hmpt = smp; 4449 } else { 4450 hmp = hmpt = smp; 4451 } 4452 count++; 4453 break; 4454 } 4455 } 4456 if (i == num_types) { 4457 /* normal data frame */ 4458 4459 if (nmp != NULL) { 4460 nmpt->b_next = smp; 4461 nmpt = smp; 4462 } else { 4463 nmp = nmpt = smp; 4464 } 4465 } 4466 } 4467 4468 *hp = hmp; 4469 *hpt = hmpt; 4470 *np = nmp; 4471 *npt = nmpt; 4472 4473 return (count); 4474 } 4475 4476 /* 4477 * Wrapper function to transmit normal and/or priority frames over the channel. 4478 */ 4479 static int 4480 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4481 { 4482 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4483 mblk_t *tmp; 4484 mblk_t *smp; 4485 mblk_t *hmp; /* high prio pkts head */ 4486 mblk_t *hmpt; /* high prio pkts tail */ 4487 mblk_t *nmp; /* normal pkts head */ 4488 mblk_t *nmpt; /* normal pkts tail */ 4489 uint32_t n = 0; 4490 vsw_t *vswp = ldcp->ldc_vswp; 4491 4492 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4493 ASSERT(count != 0); 4494 4495 nmp = mp; 4496 nmpt = mpt; 4497 4498 /* gather any priority frames from the chain of packets */ 4499 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4500 4501 /* transmit priority frames */ 4502 tmp = hmp; 4503 while (tmp != NULL) { 4504 smp = tmp; 4505 tmp = tmp->b_next; 4506 smp->b_next = NULL; 4507 vsw_ldcsend_pkt(ldcp, smp); 4508 } 4509 4510 count -= n; 4511 4512 if (count == 0) { 4513 /* no normal data frames to process */ 4514 return (0); 4515 } 4516 4517 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4518 } 4519 4520 /* 4521 * Wrapper function to transmit normal frames over the channel. 4522 */ 4523 static int 4524 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4525 { 4526 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4527 mblk_t *tmp = NULL; 4528 4529 ASSERT(count != 0); 4530 /* 4531 * If the TX thread is enabled, then queue the 4532 * ordinary frames and signal the tx thread. 4533 */ 4534 if (ldcp->tx_thread != NULL) { 4535 4536 mutex_enter(&ldcp->tx_thr_lock); 4537 4538 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4539 /* 4540 * If we reached queue limit, 4541 * do not queue new packets, 4542 * drop them. 4543 */ 4544 ldcp->ldc_stats.tx_qfull += count; 4545 mutex_exit(&ldcp->tx_thr_lock); 4546 freemsgchain(mp); 4547 goto exit; 4548 } 4549 if (ldcp->tx_mhead == NULL) { 4550 ldcp->tx_mhead = mp; 4551 ldcp->tx_mtail = mpt; 4552 cv_signal(&ldcp->tx_thr_cv); 4553 } else { 4554 ldcp->tx_mtail->b_next = mp; 4555 ldcp->tx_mtail = mpt; 4556 } 4557 ldcp->tx_cnt += count; 4558 mutex_exit(&ldcp->tx_thr_lock); 4559 } else { 4560 while (mp != NULL) { 4561 tmp = mp->b_next; 4562 mp->b_next = mp->b_prev = NULL; 4563 (void) vsw_ldcsend(ldcp, mp, 1); 4564 mp = tmp; 4565 } 4566 } 4567 4568 exit: 4569 return (0); 4570 } 4571 4572 /* 4573 * This function transmits the frame in the payload of a raw data 4574 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4575 * send special frames with high priorities, without going through 4576 * the normal data path which uses descriptor ring mechanism. 4577 */ 4578 static void 4579 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4580 { 4581 vio_raw_data_msg_t *pkt; 4582 mblk_t *bp; 4583 mblk_t *nmp = NULL; 4584 caddr_t dst; 4585 uint32_t mblksz; 4586 uint32_t size; 4587 uint32_t nbytes; 4588 int rv; 4589 vsw_t *vswp = ldcp->ldc_vswp; 4590 vgen_stats_t *statsp = &ldcp->ldc_stats; 4591 4592 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4593 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4594 (void) atomic_inc_32(&statsp->tx_pri_fail); 4595 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4596 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4597 ldcp->lane_out.lstate); 4598 goto send_pkt_exit; 4599 } 4600 4601 size = msgsize(mp); 4602 4603 /* frame size bigger than available payload len of raw data msg ? */ 4604 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4605 (void) atomic_inc_32(&statsp->tx_pri_fail); 4606 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4607 ldcp->ldc_id, size); 4608 goto send_pkt_exit; 4609 } 4610 4611 if (size < ETHERMIN) 4612 size = ETHERMIN; 4613 4614 /* alloc space for a raw data message */ 4615 nmp = vio_allocb(vswp->pri_tx_vmp); 4616 if (nmp == NULL) { 4617 (void) atomic_inc_32(&statsp->tx_pri_fail); 4618 DWARN(vswp, "vio_allocb failed\n"); 4619 goto send_pkt_exit; 4620 } 4621 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4622 4623 /* copy frame into the payload of raw data message */ 4624 dst = (caddr_t)pkt->data; 4625 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4626 mblksz = MBLKL(bp); 4627 bcopy(bp->b_rptr, dst, mblksz); 4628 dst += mblksz; 4629 } 4630 4631 /* setup the raw data msg */ 4632 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4633 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4634 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4635 pkt->tag.vio_sid = ldcp->local_session; 4636 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4637 4638 /* send the msg over ldc */ 4639 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4640 if (rv != 0) { 4641 (void) atomic_inc_32(&statsp->tx_pri_fail); 4642 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4643 ldcp->ldc_id); 4644 goto send_pkt_exit; 4645 } 4646 4647 /* update stats */ 4648 (void) atomic_inc_64(&statsp->tx_pri_packets); 4649 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4650 4651 send_pkt_exit: 4652 if (nmp != NULL) 4653 freemsg(nmp); 4654 freemsg(mp); 4655 } 4656 4657 /* 4658 * Transmit the packet over the given LDC channel. 4659 * 4660 * The 'retries' argument indicates how many times a packet 4661 * is retried before it is dropped. Note, the retry is done 4662 * only for a resource related failure, for all other failures 4663 * the packet is dropped immediately. 4664 */ 4665 static int 4666 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4667 { 4668 int i; 4669 int rc; 4670 int status = 0; 4671 vsw_port_t *port = ldcp->ldc_port; 4672 dring_info_t *dp = NULL; 4673 4674 4675 for (i = 0; i < retries; ) { 4676 /* 4677 * Send the message out using the appropriate 4678 * transmit function which will free mblock when it 4679 * is finished with it. 4680 */ 4681 mutex_enter(&port->tx_lock); 4682 if (port->transmit != NULL) { 4683 status = (*port->transmit)(ldcp, mp); 4684 } 4685 if (status == LDC_TX_SUCCESS) { 4686 mutex_exit(&port->tx_lock); 4687 break; 4688 } 4689 i++; /* increment the counter here */ 4690 4691 /* If its the last retry, then update the oerror */ 4692 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4693 ldcp->ldc_stats.oerrors++; 4694 } 4695 mutex_exit(&port->tx_lock); 4696 4697 if (status != LDC_TX_NORESOURCES) { 4698 /* 4699 * No retrying required for errors un-related 4700 * to resources. 4701 */ 4702 break; 4703 } 4704 READ_ENTER(&ldcp->lane_out.dlistrw); 4705 if (((dp = ldcp->lane_out.dringp) != NULL) && 4706 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4707 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4708 ((VSW_VER_LT(ldcp, 1, 2) && 4709 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4710 rc = vsw_reclaim_dring(dp, dp->end_idx); 4711 } else { 4712 /* 4713 * If there is no dring or the xfer_mode is 4714 * set to DESC_MODE(ie., OBP), then simply break here. 4715 */ 4716 RW_EXIT(&ldcp->lane_out.dlistrw); 4717 break; 4718 } 4719 RW_EXIT(&ldcp->lane_out.dlistrw); 4720 4721 /* 4722 * Delay only if none were reclaimed 4723 * and its not the last retry. 4724 */ 4725 if ((rc == 0) && (i < retries)) { 4726 delay(drv_usectohz(vsw_ldc_tx_delay)); 4727 } 4728 } 4729 freemsg(mp); 4730 return (status); 4731 } 4732 4733 /* 4734 * Send packet out via descriptor ring to a logical device. 4735 */ 4736 static int 4737 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4738 { 4739 vio_dring_msg_t dring_pkt; 4740 dring_info_t *dp = NULL; 4741 vsw_private_desc_t *priv_desc = NULL; 4742 vnet_public_desc_t *pub = NULL; 4743 vsw_t *vswp = ldcp->ldc_vswp; 4744 mblk_t *bp; 4745 size_t n, size; 4746 caddr_t bufp; 4747 int idx; 4748 int status = LDC_TX_SUCCESS; 4749 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4750 lane_t *lp = &ldcp->lane_out; 4751 4752 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4753 4754 /* TODO: make test a macro */ 4755 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4756 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4757 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4758 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4759 ldcp->lane_out.lstate); 4760 ldcp->ldc_stats.oerrors++; 4761 return (LDC_TX_FAILURE); 4762 } 4763 4764 /* 4765 * Note - using first ring only, this may change 4766 * in the future. 4767 */ 4768 READ_ENTER(&ldcp->lane_out.dlistrw); 4769 if ((dp = ldcp->lane_out.dringp) == NULL) { 4770 RW_EXIT(&ldcp->lane_out.dlistrw); 4771 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4772 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4773 ldcp->ldc_stats.oerrors++; 4774 return (LDC_TX_FAILURE); 4775 } 4776 4777 size = msgsize(mp); 4778 if (size > (size_t)lp->mtu) { 4779 RW_EXIT(&ldcp->lane_out.dlistrw); 4780 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4781 ldcp->ldc_id, size); 4782 ldcp->ldc_stats.oerrors++; 4783 return (LDC_TX_FAILURE); 4784 } 4785 4786 /* 4787 * Find a free descriptor 4788 * 4789 * Note: for the moment we are assuming that we will only 4790 * have one dring going from the switch to each of its 4791 * peers. This may change in the future. 4792 */ 4793 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4794 D2(vswp, "%s(%lld): no descriptor available for ring " 4795 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4796 4797 /* nothing more we can do */ 4798 status = LDC_TX_NORESOURCES; 4799 ldcp->ldc_stats.tx_no_desc++; 4800 goto vsw_dringsend_free_exit; 4801 } else { 4802 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4803 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4804 } 4805 4806 /* copy data into the descriptor */ 4807 bufp = priv_desc->datap; 4808 bufp += VNET_IPALIGN; 4809 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4810 n = MBLKL(bp); 4811 bcopy(bp->b_rptr, bufp, n); 4812 bufp += n; 4813 } 4814 4815 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4816 4817 pub = priv_desc->descp; 4818 pub->nbytes = priv_desc->datalen; 4819 4820 /* update statistics */ 4821 if (IS_BROADCAST(ehp)) 4822 ldcp->ldc_stats.brdcstxmt++; 4823 else if (IS_MULTICAST(ehp)) 4824 ldcp->ldc_stats.multixmt++; 4825 ldcp->ldc_stats.opackets++; 4826 ldcp->ldc_stats.obytes += priv_desc->datalen; 4827 4828 mutex_enter(&priv_desc->dstate_lock); 4829 pub->hdr.dstate = VIO_DESC_READY; 4830 mutex_exit(&priv_desc->dstate_lock); 4831 4832 /* 4833 * Determine whether or not we need to send a message to our 4834 * peer prompting them to read our newly updated descriptor(s). 4835 */ 4836 mutex_enter(&dp->restart_lock); 4837 if (dp->restart_reqd) { 4838 dp->restart_reqd = B_FALSE; 4839 ldcp->ldc_stats.dring_data_msgs++; 4840 mutex_exit(&dp->restart_lock); 4841 4842 /* 4843 * Send a vio_dring_msg to peer to prompt them to read 4844 * the updated descriptor ring. 4845 */ 4846 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4847 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4848 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4849 dring_pkt.tag.vio_sid = ldcp->local_session; 4850 4851 /* Note - for now using first ring */ 4852 dring_pkt.dring_ident = dp->ident; 4853 4854 /* 4855 * If last_ack_recv is -1 then we know we've not 4856 * received any ack's yet, so this must be the first 4857 * msg sent, so set the start to the begining of the ring. 4858 */ 4859 mutex_enter(&dp->dlock); 4860 if (dp->last_ack_recv == -1) { 4861 dring_pkt.start_idx = 0; 4862 } else { 4863 dring_pkt.start_idx = 4864 (dp->last_ack_recv + 1) % dp->num_descriptors; 4865 } 4866 dring_pkt.end_idx = -1; 4867 mutex_exit(&dp->dlock); 4868 4869 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4870 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4871 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4872 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4873 dring_pkt.end_idx); 4874 4875 RW_EXIT(&ldcp->lane_out.dlistrw); 4876 4877 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4878 sizeof (vio_dring_msg_t), B_TRUE); 4879 4880 return (status); 4881 4882 } else { 4883 mutex_exit(&dp->restart_lock); 4884 D2(vswp, "%s(%lld): updating descp %d", __func__, 4885 ldcp->ldc_id, idx); 4886 } 4887 4888 vsw_dringsend_free_exit: 4889 4890 RW_EXIT(&ldcp->lane_out.dlistrw); 4891 4892 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4893 return (status); 4894 } 4895 4896 /* 4897 * Send an in-band descriptor message over ldc. 4898 */ 4899 static int 4900 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4901 { 4902 vsw_t *vswp = ldcp->ldc_vswp; 4903 vnet_ibnd_desc_t ibnd_msg; 4904 vsw_private_desc_t *priv_desc = NULL; 4905 dring_info_t *dp = NULL; 4906 size_t n, size = 0; 4907 caddr_t bufp; 4908 mblk_t *bp; 4909 int idx, i; 4910 int status = LDC_TX_SUCCESS; 4911 static int warn_msg = 1; 4912 lane_t *lp = &ldcp->lane_out; 4913 4914 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4915 4916 ASSERT(mp != NULL); 4917 4918 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4919 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4920 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4921 __func__, ldcp->ldc_id, ldcp->ldc_status, 4922 ldcp->lane_out.lstate); 4923 ldcp->ldc_stats.oerrors++; 4924 return (LDC_TX_FAILURE); 4925 } 4926 4927 /* 4928 * only expect single dring to exist, which we use 4929 * as an internal buffer, rather than a transfer channel. 4930 */ 4931 READ_ENTER(&ldcp->lane_out.dlistrw); 4932 if ((dp = ldcp->lane_out.dringp) == NULL) { 4933 DERR(vswp, "%s(%lld): no dring for outbound lane", 4934 __func__, ldcp->ldc_id); 4935 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4936 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4937 RW_EXIT(&ldcp->lane_out.dlistrw); 4938 ldcp->ldc_stats.oerrors++; 4939 return (LDC_TX_FAILURE); 4940 } 4941 4942 size = msgsize(mp); 4943 if (size > (size_t)lp->mtu) { 4944 RW_EXIT(&ldcp->lane_out.dlistrw); 4945 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4946 ldcp->ldc_id, size); 4947 ldcp->ldc_stats.oerrors++; 4948 return (LDC_TX_FAILURE); 4949 } 4950 4951 /* 4952 * Find a free descriptor in our buffer ring 4953 */ 4954 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4955 RW_EXIT(&ldcp->lane_out.dlistrw); 4956 if (warn_msg) { 4957 DERR(vswp, "%s(%lld): no descriptor available for ring " 4958 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4959 warn_msg = 0; 4960 } 4961 4962 /* nothing more we can do */ 4963 status = LDC_TX_NORESOURCES; 4964 goto vsw_descrsend_free_exit; 4965 } else { 4966 D2(vswp, "%s(%lld): free private descriptor found at pos " 4967 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4968 warn_msg = 1; 4969 } 4970 4971 /* copy data into the descriptor */ 4972 bufp = priv_desc->datap; 4973 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4974 n = MBLKL(bp); 4975 bcopy(bp->b_rptr, bufp, n); 4976 bufp += n; 4977 } 4978 4979 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4980 4981 /* create and send the in-band descp msg */ 4982 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4983 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4984 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4985 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4986 4987 /* 4988 * Copy the mem cookies describing the data from the 4989 * private region of the descriptor ring into the inband 4990 * descriptor. 4991 */ 4992 for (i = 0; i < priv_desc->ncookies; i++) { 4993 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4994 sizeof (ldc_mem_cookie_t)); 4995 } 4996 4997 ibnd_msg.hdr.desc_handle = idx; 4998 ibnd_msg.ncookies = priv_desc->ncookies; 4999 ibnd_msg.nbytes = size; 5000 5001 ldcp->ldc_stats.opackets++; 5002 ldcp->ldc_stats.obytes += size; 5003 5004 RW_EXIT(&ldcp->lane_out.dlistrw); 5005 5006 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 5007 sizeof (vnet_ibnd_desc_t), B_TRUE); 5008 5009 vsw_descrsend_free_exit: 5010 5011 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5012 return (status); 5013 } 5014 5015 static void 5016 vsw_send_ver(void *arg) 5017 { 5018 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5019 vsw_t *vswp = ldcp->ldc_vswp; 5020 lane_t *lp = &ldcp->lane_out; 5021 vio_ver_msg_t ver_msg; 5022 5023 D1(vswp, "%s enter", __func__); 5024 5025 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5026 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5027 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 5028 ver_msg.tag.vio_sid = ldcp->local_session; 5029 5030 if (vsw_obp_ver_proto_workaround == B_FALSE) { 5031 ver_msg.ver_major = vsw_versions[0].ver_major; 5032 ver_msg.ver_minor = vsw_versions[0].ver_minor; 5033 } else { 5034 /* use the major,minor that we've ack'd */ 5035 lane_t *lpi = &ldcp->lane_in; 5036 ver_msg.ver_major = lpi->ver_major; 5037 ver_msg.ver_minor = lpi->ver_minor; 5038 } 5039 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 5040 5041 lp->lstate |= VSW_VER_INFO_SENT; 5042 lp->ver_major = ver_msg.ver_major; 5043 lp->ver_minor = ver_msg.ver_minor; 5044 5045 DUMP_TAG(ver_msg.tag); 5046 5047 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 5048 5049 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 5050 } 5051 5052 static void 5053 vsw_send_attr(vsw_ldc_t *ldcp) 5054 { 5055 vsw_t *vswp = ldcp->ldc_vswp; 5056 lane_t *lp = &ldcp->lane_out; 5057 vnet_attr_msg_t attr_msg; 5058 5059 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5060 5061 /* 5062 * Subtype is set to INFO by default 5063 */ 5064 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5065 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5066 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 5067 attr_msg.tag.vio_sid = ldcp->local_session; 5068 5069 /* payload copied from default settings for lane */ 5070 attr_msg.mtu = lp->mtu; 5071 attr_msg.addr_type = lp->addr_type; 5072 attr_msg.xfer_mode = lp->xfer_mode; 5073 attr_msg.ack_freq = lp->xfer_mode; 5074 5075 READ_ENTER(&vswp->if_lockrw); 5076 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 5077 RW_EXIT(&vswp->if_lockrw); 5078 5079 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 5080 5081 DUMP_TAG(attr_msg.tag); 5082 5083 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 5084 5085 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5086 } 5087 5088 /* 5089 * Create dring info msg (which also results in the creation of 5090 * a dring). 5091 */ 5092 static vio_dring_reg_msg_t * 5093 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 5094 { 5095 vio_dring_reg_msg_t *mp; 5096 dring_info_t *dp; 5097 vsw_t *vswp = ldcp->ldc_vswp; 5098 int rv; 5099 5100 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 5101 5102 /* 5103 * If we can't create a dring, obviously no point sending 5104 * a message. 5105 */ 5106 if ((dp = vsw_create_dring(ldcp)) == NULL) 5107 return (NULL); 5108 5109 /* Allocate pools of receive mblks */ 5110 rv = vsw_init_multipools(ldcp, vswp); 5111 if (rv) { 5112 /* 5113 * We do not return failure if receive mblk pools can't be 5114 * allocated, instead allocb(9F) will be used to dynamically 5115 * allocate buffers during receive. 5116 */ 5117 DWARN(vswp, "%s: unable to create free mblk pools for" 5118 " channel %ld (rv %d)", __func__, ldcp->ldc_id, rv); 5119 } 5120 5121 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 5122 5123 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 5124 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 5125 mp->tag.vio_subtype_env = VIO_DRING_REG; 5126 mp->tag.vio_sid = ldcp->local_session; 5127 5128 /* payload */ 5129 mp->num_descriptors = dp->num_descriptors; 5130 mp->descriptor_size = dp->descriptor_size; 5131 mp->options = dp->options; 5132 mp->ncookies = dp->ncookies; 5133 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 5134 5135 mp->dring_ident = 0; 5136 5137 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 5138 5139 return (mp); 5140 } 5141 5142 static void 5143 vsw_send_dring_info(vsw_ldc_t *ldcp) 5144 { 5145 vio_dring_reg_msg_t *dring_msg; 5146 vsw_t *vswp = ldcp->ldc_vswp; 5147 5148 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 5149 5150 dring_msg = vsw_create_dring_info_pkt(ldcp); 5151 if (dring_msg == NULL) { 5152 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 5153 vswp->instance, __func__); 5154 return; 5155 } 5156 5157 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 5158 5159 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 5160 5161 (void) vsw_send_msg(ldcp, dring_msg, 5162 sizeof (vio_dring_reg_msg_t), B_TRUE); 5163 5164 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 5165 5166 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 5167 } 5168 5169 static void 5170 vsw_send_rdx(vsw_ldc_t *ldcp) 5171 { 5172 vsw_t *vswp = ldcp->ldc_vswp; 5173 vio_rdx_msg_t rdx_msg; 5174 5175 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5176 5177 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5178 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5179 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5180 rdx_msg.tag.vio_sid = ldcp->local_session; 5181 5182 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 5183 5184 DUMP_TAG(rdx_msg.tag); 5185 5186 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 5187 5188 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5189 } 5190 5191 /* 5192 * Generic routine to send message out over ldc channel. 5193 * 5194 * It is possible that when we attempt to write over the ldc channel 5195 * that we get notified that it has been reset. Depending on the value 5196 * of the handle_reset flag we either handle that event here or simply 5197 * notify the caller that the channel was reset. 5198 */ 5199 int 5200 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 5201 { 5202 int rv; 5203 size_t msglen = size; 5204 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5205 vsw_t *vswp = ldcp->ldc_vswp; 5206 vio_dring_msg_t *dmsg; 5207 vio_raw_data_msg_t *rmsg; 5208 vnet_ibnd_desc_t *imsg; 5209 boolean_t data_msg = B_FALSE; 5210 5211 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5212 ldcp->ldc_id, size); 5213 5214 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5215 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5216 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5217 5218 mutex_enter(&ldcp->ldc_txlock); 5219 5220 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 5221 if (tag->vio_subtype_env == VIO_DRING_DATA) { 5222 dmsg = (vio_dring_msg_t *)tag; 5223 dmsg->seq_num = ldcp->lane_out.seq_num; 5224 data_msg = B_TRUE; 5225 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 5226 rmsg = (vio_raw_data_msg_t *)tag; 5227 rmsg->seq_num = ldcp->lane_out.seq_num; 5228 data_msg = B_TRUE; 5229 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 5230 imsg = (vnet_ibnd_desc_t *)tag; 5231 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 5232 data_msg = B_TRUE; 5233 } 5234 } 5235 5236 do { 5237 msglen = size; 5238 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5239 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5240 5241 if (rv == 0 && data_msg == B_TRUE) { 5242 ldcp->lane_out.seq_num++; 5243 } 5244 5245 if ((rv != 0) || (msglen != size)) { 5246 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 5247 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 5248 ldcp->ldc_stats.oerrors++; 5249 } 5250 5251 mutex_exit(&ldcp->ldc_txlock); 5252 5253 /* 5254 * If channel has been reset we either handle it here or 5255 * simply report back that it has been reset and let caller 5256 * decide what to do. 5257 */ 5258 if (rv == ECONNRESET) { 5259 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 5260 5261 /* 5262 * N.B - must never be holding the dlistrw lock when 5263 * we do a reset of the channel. 5264 */ 5265 if (handle_reset) { 5266 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 5267 } 5268 } 5269 5270 return (rv); 5271 } 5272 5273 /* 5274 * Remove the specified address from the list of address maintained 5275 * in this port node. 5276 */ 5277 mcst_addr_t * 5278 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 5279 { 5280 vsw_t *vswp = NULL; 5281 vsw_port_t *port = NULL; 5282 mcst_addr_t *prev_p = NULL; 5283 mcst_addr_t *curr_p = NULL; 5284 5285 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 5286 __func__, devtype, addr); 5287 5288 if (devtype == VSW_VNETPORT) { 5289 port = (vsw_port_t *)arg; 5290 mutex_enter(&port->mca_lock); 5291 prev_p = curr_p = port->mcap; 5292 } else { 5293 vswp = (vsw_t *)arg; 5294 mutex_enter(&vswp->mca_lock); 5295 prev_p = curr_p = vswp->mcap; 5296 } 5297 5298 while (curr_p != NULL) { 5299 if (curr_p->addr == addr) { 5300 D2(NULL, "%s: address found", __func__); 5301 /* match found */ 5302 if (prev_p == curr_p) { 5303 /* list head */ 5304 if (devtype == VSW_VNETPORT) 5305 port->mcap = curr_p->nextp; 5306 else 5307 vswp->mcap = curr_p->nextp; 5308 } else { 5309 prev_p->nextp = curr_p->nextp; 5310 } 5311 break; 5312 } else { 5313 prev_p = curr_p; 5314 curr_p = curr_p->nextp; 5315 } 5316 } 5317 5318 if (devtype == VSW_VNETPORT) 5319 mutex_exit(&port->mca_lock); 5320 else 5321 mutex_exit(&vswp->mca_lock); 5322 5323 D1(NULL, "%s: exit", __func__); 5324 5325 return (curr_p); 5326 } 5327 5328 /* 5329 * Creates a descriptor ring (dring) and links it into the 5330 * link of outbound drings for this channel. 5331 * 5332 * Returns NULL if creation failed. 5333 */ 5334 static dring_info_t * 5335 vsw_create_dring(vsw_ldc_t *ldcp) 5336 { 5337 vsw_private_desc_t *priv_addr = NULL; 5338 vsw_t *vswp = ldcp->ldc_vswp; 5339 ldc_mem_info_t minfo; 5340 dring_info_t *dp, *tp; 5341 int i; 5342 5343 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5344 5345 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5346 5347 /* create public section of ring */ 5348 if ((ldc_mem_dring_create(vsw_ntxds, 5349 VSW_PUB_SIZE, &dp->handle)) != 0) { 5350 5351 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 5352 "failed", ldcp->ldc_id); 5353 goto create_fail_exit; 5354 } 5355 5356 ASSERT(dp->handle != NULL); 5357 5358 /* 5359 * Get the base address of the public section of the ring. 5360 */ 5361 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5362 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 5363 ldcp->ldc_id); 5364 goto dring_fail_exit; 5365 } else { 5366 ASSERT(minfo.vaddr != 0); 5367 dp->pub_addr = minfo.vaddr; 5368 } 5369 5370 dp->num_descriptors = vsw_ntxds; 5371 dp->descriptor_size = VSW_PUB_SIZE; 5372 dp->options = VIO_TX_DRING; 5373 dp->ncookies = 1; /* guaranteed by ldc */ 5374 5375 /* 5376 * create private portion of ring 5377 */ 5378 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 5379 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5380 5381 if (vsw_setup_ring(ldcp, dp)) { 5382 DERR(vswp, "%s: unable to setup ring", __func__); 5383 goto dring_fail_exit; 5384 } 5385 5386 /* haven't used any descriptors yet */ 5387 dp->end_idx = 0; 5388 dp->last_ack_recv = -1; 5389 5390 /* bind dring to the channel */ 5391 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 5392 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 5393 &dp->cookie[0], &dp->ncookies)) != 0) { 5394 DERR(vswp, "vsw_create_dring: unable to bind to channel " 5395 "%lld", ldcp->ldc_id); 5396 goto dring_fail_exit; 5397 } 5398 5399 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5400 dp->restart_reqd = B_TRUE; 5401 5402 /* 5403 * Only ever create rings for outgoing lane. Link it onto 5404 * end of list. 5405 */ 5406 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5407 if (ldcp->lane_out.dringp == NULL) { 5408 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5409 ldcp->lane_out.dringp = dp; 5410 } else { 5411 tp = ldcp->lane_out.dringp; 5412 while (tp->next != NULL) 5413 tp = tp->next; 5414 5415 tp->next = dp; 5416 } 5417 RW_EXIT(&ldcp->lane_out.dlistrw); 5418 5419 return (dp); 5420 5421 dring_fail_exit: 5422 (void) ldc_mem_dring_destroy(dp->handle); 5423 5424 create_fail_exit: 5425 if (dp->priv_addr != NULL) { 5426 priv_addr = dp->priv_addr; 5427 for (i = 0; i < vsw_ntxds; i++) { 5428 if (priv_addr->memhandle != NULL) 5429 (void) ldc_mem_free_handle( 5430 priv_addr->memhandle); 5431 priv_addr++; 5432 } 5433 kmem_free(dp->priv_addr, 5434 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5435 } 5436 mutex_destroy(&dp->dlock); 5437 5438 kmem_free(dp, sizeof (dring_info_t)); 5439 return (NULL); 5440 } 5441 5442 /* 5443 * Create a ring consisting of just a private portion and link 5444 * it into the list of rings for the outbound lane. 5445 * 5446 * These type of rings are used primarily for temporary data 5447 * storage (i.e. as data buffers). 5448 */ 5449 void 5450 vsw_create_privring(vsw_ldc_t *ldcp) 5451 { 5452 dring_info_t *dp, *tp; 5453 vsw_t *vswp = ldcp->ldc_vswp; 5454 5455 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5456 5457 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5458 5459 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5460 5461 /* no public section */ 5462 dp->pub_addr = NULL; 5463 5464 dp->priv_addr = kmem_zalloc( 5465 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5466 5467 dp->num_descriptors = vsw_ntxds; 5468 5469 if (vsw_setup_ring(ldcp, dp)) { 5470 DERR(vswp, "%s: setup of ring failed", __func__); 5471 kmem_free(dp->priv_addr, 5472 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5473 mutex_destroy(&dp->dlock); 5474 kmem_free(dp, sizeof (dring_info_t)); 5475 return; 5476 } 5477 5478 /* haven't used any descriptors yet */ 5479 dp->end_idx = 0; 5480 5481 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5482 dp->restart_reqd = B_TRUE; 5483 5484 /* 5485 * Only ever create rings for outgoing lane. Link it onto 5486 * end of list. 5487 */ 5488 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5489 if (ldcp->lane_out.dringp == NULL) { 5490 D2(vswp, "%s: adding first outbound privring", __func__); 5491 ldcp->lane_out.dringp = dp; 5492 } else { 5493 tp = ldcp->lane_out.dringp; 5494 while (tp->next != NULL) 5495 tp = tp->next; 5496 5497 tp->next = dp; 5498 } 5499 RW_EXIT(&ldcp->lane_out.dlistrw); 5500 5501 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5502 } 5503 5504 /* 5505 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5506 * failure. 5507 */ 5508 int 5509 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5510 { 5511 vnet_public_desc_t *pub_addr = NULL; 5512 vsw_private_desc_t *priv_addr = NULL; 5513 vsw_t *vswp = ldcp->ldc_vswp; 5514 uint64_t *tmpp; 5515 uint64_t offset = 0; 5516 uint32_t ncookies = 0; 5517 static char *name = "vsw_setup_ring"; 5518 int i, j, nc, rv; 5519 size_t data_sz; 5520 void *data_addr; 5521 5522 priv_addr = dp->priv_addr; 5523 pub_addr = dp->pub_addr; 5524 5525 /* public section may be null but private should never be */ 5526 ASSERT(priv_addr != NULL); 5527 5528 /* 5529 * Allocate the region of memory which will be used to hold 5530 * the data the descriptors will refer to. 5531 */ 5532 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5533 5534 /* 5535 * In order to ensure that the number of ldc cookies per descriptor is 5536 * limited to be within the default MAX_COOKIES (2), we take the steps 5537 * outlined below: 5538 * 5539 * Align the entire data buffer area to 8K and carve out per descriptor 5540 * data buffers starting from this 8K aligned base address. 5541 * 5542 * We round up the mtu specified to be a multiple of 2K or 4K. 5543 * For sizes up to 12K we round up the size to the next 2K. 5544 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 5545 * 14K could end up needing 3 cookies, with the buffer spread across 5546 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 5547 */ 5548 if (data_sz <= VNET_12K) { 5549 data_sz = VNET_ROUNDUP_2K(data_sz); 5550 } else { 5551 data_sz = VNET_ROUNDUP_4K(data_sz); 5552 } 5553 5554 dp->desc_data_sz = data_sz; 5555 5556 /* allocate extra 8K bytes for alignment */ 5557 dp->data_sz = (vsw_ntxds * data_sz) + VNET_8K; 5558 data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5559 dp->data_addr = data_addr; 5560 5561 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5562 dp->data_sz, dp->data_addr); 5563 5564 /* align the starting address of the data area to 8K */ 5565 data_addr = (void *)VNET_ROUNDUP_8K((uintptr_t)data_addr); 5566 5567 tmpp = (uint64_t *)data_addr; 5568 offset = dp->desc_data_sz/sizeof (tmpp); 5569 5570 /* 5571 * Initialise some of the private and public (if they exist) 5572 * descriptor fields. 5573 */ 5574 for (i = 0; i < vsw_ntxds; i++) { 5575 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5576 5577 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5578 &priv_addr->memhandle)) != 0) { 5579 DERR(vswp, "%s: alloc mem handle failed", name); 5580 goto setup_ring_cleanup; 5581 } 5582 5583 priv_addr->datap = (void *)tmpp; 5584 5585 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5586 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5587 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5588 &(priv_addr->memcookie[0]), &ncookies); 5589 if (rv != 0) { 5590 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5591 "(rv %d)", name, ldcp->ldc_id, rv); 5592 goto setup_ring_cleanup; 5593 } 5594 priv_addr->bound = 1; 5595 5596 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5597 name, i, priv_addr->memcookie[0].addr, 5598 priv_addr->memcookie[0].size); 5599 5600 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5601 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5602 "invalid num of cookies (%d) for size 0x%llx", 5603 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5604 5605 goto setup_ring_cleanup; 5606 } else { 5607 for (j = 1; j < ncookies; j++) { 5608 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5609 &(priv_addr->memcookie[j])); 5610 if (rv != 0) { 5611 DERR(vswp, "%s: ldc_mem_nextcookie " 5612 "failed rv (%d)", name, rv); 5613 goto setup_ring_cleanup; 5614 } 5615 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5616 "size 0x%llx", name, j, 5617 priv_addr->memcookie[j].addr, 5618 priv_addr->memcookie[j].size); 5619 } 5620 5621 } 5622 priv_addr->ncookies = ncookies; 5623 priv_addr->dstate = VIO_DESC_FREE; 5624 5625 if (pub_addr != NULL) { 5626 5627 /* link pub and private sides */ 5628 priv_addr->descp = pub_addr; 5629 5630 pub_addr->ncookies = priv_addr->ncookies; 5631 5632 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5633 bcopy(&priv_addr->memcookie[nc], 5634 &pub_addr->memcookie[nc], 5635 sizeof (ldc_mem_cookie_t)); 5636 } 5637 5638 pub_addr->hdr.dstate = VIO_DESC_FREE; 5639 pub_addr++; 5640 } 5641 5642 /* 5643 * move to next element in the dring and the next 5644 * position in the data buffer. 5645 */ 5646 priv_addr++; 5647 tmpp += offset; 5648 } 5649 5650 return (0); 5651 5652 setup_ring_cleanup: 5653 priv_addr = dp->priv_addr; 5654 5655 for (j = 0; j < i; j++) { 5656 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5657 (void) ldc_mem_free_handle(priv_addr->memhandle); 5658 5659 mutex_destroy(&priv_addr->dstate_lock); 5660 5661 priv_addr++; 5662 } 5663 kmem_free(dp->data_addr, dp->data_sz); 5664 5665 return (1); 5666 } 5667 5668 /* 5669 * Searches the private section of a ring for a free descriptor, 5670 * starting at the location of the last free descriptor found 5671 * previously. 5672 * 5673 * Returns 0 if free descriptor is available, and updates state 5674 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5675 * 5676 * FUTURE: might need to return contiguous range of descriptors 5677 * as dring info msg assumes all will be contiguous. 5678 */ 5679 static int 5680 vsw_dring_find_free_desc(dring_info_t *dringp, 5681 vsw_private_desc_t **priv_p, int *idx) 5682 { 5683 vsw_private_desc_t *addr = NULL; 5684 int num = vsw_ntxds; 5685 int ret = 1; 5686 5687 D1(NULL, "%s enter\n", __func__); 5688 5689 ASSERT(dringp->priv_addr != NULL); 5690 5691 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5692 __func__, dringp, dringp->end_idx); 5693 5694 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5695 5696 mutex_enter(&addr->dstate_lock); 5697 if (addr->dstate == VIO_DESC_FREE) { 5698 addr->dstate = VIO_DESC_READY; 5699 *priv_p = addr; 5700 *idx = dringp->end_idx; 5701 dringp->end_idx = (dringp->end_idx + 1) % num; 5702 ret = 0; 5703 5704 } 5705 mutex_exit(&addr->dstate_lock); 5706 5707 /* ring full */ 5708 if (ret == 1) { 5709 D2(NULL, "%s: no desp free: started at %d", __func__, 5710 dringp->end_idx); 5711 } 5712 5713 D1(NULL, "%s: exit\n", __func__); 5714 5715 return (ret); 5716 } 5717 5718 /* 5719 * Map from a dring identifier to the ring itself. Returns 5720 * pointer to ring or NULL if no match found. 5721 * 5722 * Should be called with dlistrw rwlock held as reader. 5723 */ 5724 static dring_info_t * 5725 vsw_ident2dring(lane_t *lane, uint64_t ident) 5726 { 5727 dring_info_t *dp = NULL; 5728 5729 if ((dp = lane->dringp) == NULL) { 5730 return (NULL); 5731 } else { 5732 if (dp->ident == ident) 5733 return (dp); 5734 5735 while (dp != NULL) { 5736 if (dp->ident == ident) 5737 break; 5738 dp = dp->next; 5739 } 5740 } 5741 5742 return (dp); 5743 } 5744 5745 /* 5746 * Set the default lane attributes. These are copied into 5747 * the attr msg we send to our peer. If they are not acceptable 5748 * then (currently) the handshake ends. 5749 */ 5750 static void 5751 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5752 { 5753 bzero(lp, sizeof (lane_t)); 5754 5755 READ_ENTER(&vswp->if_lockrw); 5756 ether_copy(&(vswp->if_addr), &(lp->addr)); 5757 RW_EXIT(&vswp->if_lockrw); 5758 5759 lp->mtu = vswp->max_frame_size; 5760 lp->addr_type = ADDR_TYPE_MAC; 5761 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5762 lp->ack_freq = 0; /* for shared mode */ 5763 lp->seq_num = VNET_ISS; 5764 } 5765 5766 /* 5767 * Verify that the attributes are acceptable. 5768 * 5769 * FUTURE: If some attributes are not acceptable, change them 5770 * our desired values. 5771 */ 5772 static int 5773 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5774 { 5775 int ret = 0; 5776 struct ether_addr ea; 5777 vsw_port_t *port = ldcp->ldc_port; 5778 lane_t *lp = &ldcp->lane_out; 5779 5780 D1(NULL, "vsw_check_attr enter\n"); 5781 5782 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5783 (pkt->xfer_mode != lp->xfer_mode)) { 5784 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5785 ret = 1; 5786 } 5787 5788 /* Only support MAC addresses at moment. */ 5789 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5790 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5791 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5792 ret = 1; 5793 } 5794 5795 /* 5796 * MAC address supplied by device should match that stored 5797 * in the vsw-port OBP node. Need to decide what to do if they 5798 * don't match, for the moment just warn but don't fail. 5799 */ 5800 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5801 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5802 DERR(NULL, "vsw_check_attr: device supplied address " 5803 "0x%llx doesn't match node address 0x%llx\n", 5804 pkt->addr, port->p_macaddr); 5805 } 5806 5807 /* 5808 * Ack freq only makes sense in pkt mode, in shared 5809 * mode the ring descriptors say whether or not to 5810 * send back an ACK. 5811 */ 5812 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5813 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5814 (VSW_VER_LT(ldcp, 1, 2) && 5815 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5816 if (pkt->ack_freq > 0) { 5817 D2(NULL, "vsw_check_attr: non zero ack freq " 5818 " in SHM mode\n"); 5819 ret = 1; 5820 } 5821 } 5822 5823 if (VSW_VER_LT(ldcp, 1, 4)) { 5824 /* versions < 1.4, mtu must match */ 5825 if (pkt->mtu != lp->mtu) { 5826 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5827 pkt->mtu); 5828 ret = 1; 5829 } 5830 } else { 5831 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5832 if (pkt->mtu < ETHERMAX) { 5833 ret = 1; 5834 } 5835 } 5836 5837 D1(NULL, "vsw_check_attr exit\n"); 5838 5839 return (ret); 5840 } 5841 5842 /* 5843 * Returns 1 if there is a problem, 0 otherwise. 5844 */ 5845 static int 5846 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5847 { 5848 _NOTE(ARGUNUSED(pkt)) 5849 5850 int ret = 0; 5851 5852 D1(NULL, "vsw_check_dring_info enter\n"); 5853 5854 if ((pkt->num_descriptors == 0) || 5855 (pkt->descriptor_size == 0) || 5856 (pkt->ncookies != 1)) { 5857 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5858 ret = 1; 5859 } 5860 5861 D1(NULL, "vsw_check_dring_info exit\n"); 5862 5863 return (ret); 5864 } 5865 5866 /* 5867 * Returns 1 if two memory cookies match. Otherwise returns 0. 5868 */ 5869 static int 5870 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5871 { 5872 if ((m1->addr != m2->addr) || 5873 (m2->size != m2->size)) { 5874 return (0); 5875 } else { 5876 return (1); 5877 } 5878 } 5879 5880 /* 5881 * Returns 1 if ring described in reg message matches that 5882 * described by dring_info structure. Otherwise returns 0. 5883 */ 5884 static int 5885 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5886 { 5887 if ((msg->descriptor_size != dp->descriptor_size) || 5888 (msg->num_descriptors != dp->num_descriptors) || 5889 (msg->ncookies != dp->ncookies) || 5890 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5891 return (0); 5892 } else { 5893 return (1); 5894 } 5895 5896 } 5897 5898 /* 5899 * Reset and free all the resources associated with 5900 * the channel. 5901 */ 5902 static void 5903 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5904 { 5905 dring_info_t *dp, *dpp; 5906 lane_t *lp = NULL; 5907 5908 ASSERT(ldcp != NULL); 5909 5910 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5911 5912 if (dir == INBOUND) { 5913 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5914 " of channel %lld", __func__, ldcp->ldc_id); 5915 lp = &ldcp->lane_in; 5916 } else { 5917 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5918 " of channel %lld", __func__, ldcp->ldc_id); 5919 lp = &ldcp->lane_out; 5920 } 5921 5922 lp->lstate = VSW_LANE_INACTIV; 5923 lp->seq_num = VNET_ISS; 5924 5925 if (lp->dringp) { 5926 if (dir == INBOUND) { 5927 WRITE_ENTER(&lp->dlistrw); 5928 dp = lp->dringp; 5929 while (dp != NULL) { 5930 dpp = dp->next; 5931 if (dp->handle != NULL) 5932 (void) ldc_mem_dring_unmap(dp->handle); 5933 kmem_free(dp, sizeof (dring_info_t)); 5934 dp = dpp; 5935 } 5936 RW_EXIT(&lp->dlistrw); 5937 } else { 5938 /* 5939 * unbind, destroy exported dring, free dring struct 5940 */ 5941 WRITE_ENTER(&lp->dlistrw); 5942 dp = lp->dringp; 5943 vsw_free_ring(dp); 5944 RW_EXIT(&lp->dlistrw); 5945 } 5946 lp->dringp = NULL; 5947 } 5948 5949 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5950 } 5951 5952 /* 5953 * Free ring and all associated resources. 5954 * 5955 * Should be called with dlistrw rwlock held as writer. 5956 */ 5957 static void 5958 vsw_free_ring(dring_info_t *dp) 5959 { 5960 vsw_private_desc_t *paddr = NULL; 5961 dring_info_t *dpp; 5962 int i; 5963 5964 while (dp != NULL) { 5965 mutex_enter(&dp->dlock); 5966 dpp = dp->next; 5967 if (dp->priv_addr != NULL) { 5968 /* 5969 * First unbind and free the memory handles 5970 * stored in each descriptor within the ring. 5971 */ 5972 for (i = 0; i < vsw_ntxds; i++) { 5973 paddr = (vsw_private_desc_t *) 5974 dp->priv_addr + i; 5975 if (paddr->memhandle != NULL) { 5976 if (paddr->bound == 1) { 5977 if (ldc_mem_unbind_handle( 5978 paddr->memhandle) != 0) { 5979 DERR(NULL, "error " 5980 "unbinding handle for " 5981 "ring 0x%llx at pos %d", 5982 dp, i); 5983 continue; 5984 } 5985 paddr->bound = 0; 5986 } 5987 5988 if (ldc_mem_free_handle( 5989 paddr->memhandle) != 0) { 5990 DERR(NULL, "error freeing " 5991 "handle for ring 0x%llx " 5992 "at pos %d", dp, i); 5993 continue; 5994 } 5995 paddr->memhandle = NULL; 5996 } 5997 mutex_destroy(&paddr->dstate_lock); 5998 } 5999 kmem_free(dp->priv_addr, 6000 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 6001 } 6002 6003 /* 6004 * Now unbind and destroy the ring itself. 6005 */ 6006 if (dp->handle != NULL) { 6007 (void) ldc_mem_dring_unbind(dp->handle); 6008 (void) ldc_mem_dring_destroy(dp->handle); 6009 } 6010 6011 if (dp->data_addr != NULL) { 6012 kmem_free(dp->data_addr, dp->data_sz); 6013 } 6014 6015 mutex_exit(&dp->dlock); 6016 mutex_destroy(&dp->dlock); 6017 mutex_destroy(&dp->restart_lock); 6018 kmem_free(dp, sizeof (dring_info_t)); 6019 6020 dp = dpp; 6021 } 6022 } 6023 6024 /* 6025 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 6026 * This thread is woken up by the LDC interrupt handler to process 6027 * LDC packets and receive data. 6028 */ 6029 static void 6030 vsw_ldc_rx_worker(void *arg) 6031 { 6032 callb_cpr_t cprinfo; 6033 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 6034 vsw_t *vswp = ldcp->ldc_vswp; 6035 6036 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6037 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 6038 "vsw_rx_thread"); 6039 mutex_enter(&ldcp->rx_thr_lock); 6040 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 6041 6042 CALLB_CPR_SAFE_BEGIN(&cprinfo); 6043 /* 6044 * Wait until the data is received or a stop 6045 * request is received. 6046 */ 6047 while (!(ldcp->rx_thr_flags & 6048 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 6049 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 6050 } 6051 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 6052 6053 /* 6054 * First process the stop request. 6055 */ 6056 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 6057 D2(vswp, "%s(%lld):Rx thread stopped\n", 6058 __func__, ldcp->ldc_id); 6059 break; 6060 } 6061 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 6062 mutex_exit(&ldcp->rx_thr_lock); 6063 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 6064 __func__, ldcp->ldc_id); 6065 mutex_enter(&ldcp->ldc_cblock); 6066 vsw_process_pkt(ldcp); 6067 mutex_exit(&ldcp->ldc_cblock); 6068 mutex_enter(&ldcp->rx_thr_lock); 6069 } 6070 6071 /* 6072 * Update the run status and wakeup the thread that 6073 * has sent the stop request. 6074 */ 6075 ldcp->rx_thr_flags &= ~VSW_WTHR_STOP; 6076 ldcp->rx_thread = NULL; 6077 CALLB_CPR_EXIT(&cprinfo); 6078 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6079 thread_exit(); 6080 } 6081 6082 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 6083 static void 6084 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 6085 { 6086 kt_did_t tid = 0; 6087 vsw_t *vswp = ldcp->ldc_vswp; 6088 6089 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6090 /* 6091 * Send a stop request by setting the stop flag and 6092 * wait until the receive thread stops. 6093 */ 6094 mutex_enter(&ldcp->rx_thr_lock); 6095 if (ldcp->rx_thread != NULL) { 6096 tid = ldcp->rx_thread->t_did; 6097 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 6098 cv_signal(&ldcp->rx_thr_cv); 6099 } 6100 mutex_exit(&ldcp->rx_thr_lock); 6101 6102 if (tid != 0) { 6103 thread_join(tid); 6104 } 6105 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6106 } 6107 6108 /* 6109 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 6110 * This thread is woken up by the vsw_portsend to transmit 6111 * packets. 6112 */ 6113 static void 6114 vsw_ldc_tx_worker(void *arg) 6115 { 6116 callb_cpr_t cprinfo; 6117 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 6118 vsw_t *vswp = ldcp->ldc_vswp; 6119 mblk_t *mp; 6120 mblk_t *tmp; 6121 6122 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6123 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 6124 "vnet_tx_thread"); 6125 mutex_enter(&ldcp->tx_thr_lock); 6126 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 6127 6128 CALLB_CPR_SAFE_BEGIN(&cprinfo); 6129 /* 6130 * Wait until the data is received or a stop 6131 * request is received. 6132 */ 6133 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 6134 (ldcp->tx_mhead == NULL)) { 6135 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 6136 } 6137 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 6138 6139 /* 6140 * First process the stop request. 6141 */ 6142 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 6143 D2(vswp, "%s(%lld):tx thread stopped\n", 6144 __func__, ldcp->ldc_id); 6145 break; 6146 } 6147 mp = ldcp->tx_mhead; 6148 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 6149 ldcp->tx_cnt = 0; 6150 mutex_exit(&ldcp->tx_thr_lock); 6151 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 6152 __func__, ldcp->ldc_id); 6153 while (mp != NULL) { 6154 tmp = mp->b_next; 6155 mp->b_next = mp->b_prev = NULL; 6156 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 6157 mp = tmp; 6158 } 6159 mutex_enter(&ldcp->tx_thr_lock); 6160 } 6161 6162 /* 6163 * Update the run status and wakeup the thread that 6164 * has sent the stop request. 6165 */ 6166 ldcp->tx_thr_flags &= ~VSW_WTHR_STOP; 6167 ldcp->tx_thread = NULL; 6168 CALLB_CPR_EXIT(&cprinfo); 6169 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6170 thread_exit(); 6171 } 6172 6173 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 6174 static void 6175 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 6176 { 6177 kt_did_t tid = 0; 6178 vsw_t *vswp = ldcp->ldc_vswp; 6179 6180 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6181 /* 6182 * Send a stop request by setting the stop flag and 6183 * wait until the receive thread stops. 6184 */ 6185 mutex_enter(&ldcp->tx_thr_lock); 6186 if (ldcp->tx_thread != NULL) { 6187 tid = ldcp->tx_thread->t_did; 6188 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 6189 cv_signal(&ldcp->tx_thr_cv); 6190 } 6191 mutex_exit(&ldcp->tx_thr_lock); 6192 6193 if (tid != 0) { 6194 thread_join(tid); 6195 } 6196 6197 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6198 } 6199 6200 /* vsw_reclaim_dring -- reclaim descriptors */ 6201 static int 6202 vsw_reclaim_dring(dring_info_t *dp, int start) 6203 { 6204 int i, j, len; 6205 vsw_private_desc_t *priv_addr; 6206 vnet_public_desc_t *pub_addr; 6207 6208 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 6209 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6210 len = dp->num_descriptors; 6211 6212 D2(NULL, "%s: start index %ld\n", __func__, start); 6213 6214 j = 0; 6215 for (i = start; j < len; i = (i + 1) % len, j++) { 6216 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 6217 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6218 6219 mutex_enter(&priv_addr->dstate_lock); 6220 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 6221 mutex_exit(&priv_addr->dstate_lock); 6222 break; 6223 } 6224 pub_addr->hdr.dstate = VIO_DESC_FREE; 6225 priv_addr->dstate = VIO_DESC_FREE; 6226 /* clear all the fields */ 6227 priv_addr->datalen = 0; 6228 pub_addr->hdr.ack = 0; 6229 mutex_exit(&priv_addr->dstate_lock); 6230 6231 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 6232 i, pub_addr->hdr.dstate, priv_addr->dstate); 6233 } 6234 return (j); 6235 } 6236 6237 /* 6238 * Debugging routines 6239 */ 6240 static void 6241 display_state(void) 6242 { 6243 vsw_t *vswp; 6244 vsw_port_list_t *plist; 6245 vsw_port_t *port; 6246 vsw_ldc_list_t *ldcl; 6247 vsw_ldc_t *ldcp; 6248 extern vsw_t *vsw_head; 6249 6250 cmn_err(CE_NOTE, "***** system state *****"); 6251 6252 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6253 plist = &vswp->plist; 6254 READ_ENTER(&plist->lockrw); 6255 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6256 vswp->instance, plist->num_ports); 6257 6258 for (port = plist->head; port != NULL; port = port->p_next) { 6259 ldcl = &port->p_ldclist; 6260 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6261 port->p_instance, port->num_ldcs); 6262 READ_ENTER(&ldcl->lockrw); 6263 ldcp = ldcl->head; 6264 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6265 cmn_err(CE_CONT, "chan %lu : dev %d : " 6266 "status %d : phase %u\n", 6267 ldcp->ldc_id, ldcp->dev_class, 6268 ldcp->ldc_status, ldcp->hphase); 6269 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6270 "psession %lu\n", ldcp->ldc_id, 6271 ldcp->local_session, ldcp->peer_session); 6272 6273 cmn_err(CE_CONT, "Inbound lane:\n"); 6274 display_lane(&ldcp->lane_in); 6275 cmn_err(CE_CONT, "Outbound lane:\n"); 6276 display_lane(&ldcp->lane_out); 6277 } 6278 RW_EXIT(&ldcl->lockrw); 6279 } 6280 RW_EXIT(&plist->lockrw); 6281 } 6282 cmn_err(CE_NOTE, "***** system state *****"); 6283 } 6284 6285 static void 6286 display_lane(lane_t *lp) 6287 { 6288 dring_info_t *drp; 6289 6290 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6291 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6292 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6293 lp->addr_type, lp->addr, lp->xfer_mode); 6294 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6295 6296 cmn_err(CE_CONT, "Dring info:\n"); 6297 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6298 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6299 drp->num_descriptors, drp->descriptor_size); 6300 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6301 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6302 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6303 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6304 drp->ident, drp->end_idx); 6305 display_ring(drp); 6306 } 6307 } 6308 6309 static void 6310 display_ring(dring_info_t *dringp) 6311 { 6312 uint64_t i; 6313 uint64_t priv_count = 0; 6314 uint64_t pub_count = 0; 6315 vnet_public_desc_t *pub_addr = NULL; 6316 vsw_private_desc_t *priv_addr = NULL; 6317 6318 for (i = 0; i < vsw_ntxds; i++) { 6319 if (dringp->pub_addr != NULL) { 6320 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6321 6322 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6323 pub_count++; 6324 } 6325 6326 if (dringp->priv_addr != NULL) { 6327 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6328 6329 if (priv_addr->dstate == VIO_DESC_FREE) 6330 priv_count++; 6331 } 6332 } 6333 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6334 i, priv_count, pub_count); 6335 } 6336 6337 static void 6338 dump_flags(uint64_t state) 6339 { 6340 int i; 6341 6342 typedef struct flag_name { 6343 int flag_val; 6344 char *flag_name; 6345 } flag_name_t; 6346 6347 flag_name_t flags[] = { 6348 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6349 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6350 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6351 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6352 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6353 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6354 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6355 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6356 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6357 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6358 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6359 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6360 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6361 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6362 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6363 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6364 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6365 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6366 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6367 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6368 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6369 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6370 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6371 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6372 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6373 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6374 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6375 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6376 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6377 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6378 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6379 6380 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6381 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6382 if (state & flags[i].flag_val) 6383 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6384 } 6385 } 6386