1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/debug.h> 29 #include <sys/time.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/user.h> 33 #include <sys/stropts.h> 34 #include <sys/stream.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #include <sys/cmn_err.h> 38 #include <sys/cpu.h> 39 #include <sys/kmem.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/ksynch.h> 44 #include <sys/stat.h> 45 #include <sys/kstat.h> 46 #include <sys/vtrace.h> 47 #include <sys/strsun.h> 48 #include <sys/dlpi.h> 49 #include <sys/ethernet.h> 50 #include <net/if.h> 51 #include <sys/varargs.h> 52 #include <sys/machsystm.h> 53 #include <sys/modctl.h> 54 #include <sys/modhash.h> 55 #include <sys/mac.h> 56 #include <sys/mac_ether.h> 57 #include <sys/taskq.h> 58 #include <sys/note.h> 59 #include <sys/mach_descrip.h> 60 #include <sys/mdeg.h> 61 #include <sys/ldc.h> 62 #include <sys/vsw_fdb.h> 63 #include <sys/vsw.h> 64 #include <sys/vio_mailbox.h> 65 #include <sys/vnet_mailbox.h> 66 #include <sys/vnet_common.h> 67 #include <sys/vio_util.h> 68 #include <sys/sdt.h> 69 #include <sys/atomic.h> 70 #include <sys/callb.h> 71 #include <sys/vlan.h> 72 73 /* Port add/deletion/etc routines */ 74 static void vsw_port_delete(vsw_port_t *port); 75 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 76 static void vsw_ldc_detach(vsw_ldc_t *ldcp); 77 static int vsw_ldc_init(vsw_ldc_t *ldcp); 78 static void vsw_ldc_uninit(vsw_ldc_t *ldcp); 79 static void vsw_ldc_drain(vsw_ldc_t *ldcp); 80 static void vsw_drain_port_taskq(vsw_port_t *port); 81 static void vsw_marker_task(void *); 82 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 83 void vsw_detach_ports(vsw_t *vswp); 84 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 85 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 86 int vsw_port_detach(vsw_t *vswp, int p_instance); 87 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 88 int vsw_port_attach(vsw_port_t *portp); 89 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 90 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 91 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 92 void vsw_reset_ports(vsw_t *vswp); 93 void vsw_port_reset(vsw_port_t *portp); 94 void vsw_physlink_update_ports(vsw_t *vswp); 95 static void vsw_port_physlink_update(vsw_port_t *portp); 96 97 /* Interrupt routines */ 98 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 99 100 /* Handshake routines */ 101 static void vsw_ldc_reinit(vsw_ldc_t *); 102 static void vsw_conn_task(void *); 103 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 104 static void vsw_next_milestone(vsw_ldc_t *); 105 static int vsw_supported_version(vio_ver_msg_t *); 106 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 107 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 108 void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 109 110 /* Data processing routines */ 111 void vsw_process_pkt(void *); 112 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *, int); 113 static void vsw_process_ctrl_pkt(void *); 114 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 115 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_physlink_msg(vsw_ldc_t *, void *); 121 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 122 uint32_t); 123 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 124 static void vsw_process_pkt_data(void *, void *, uint32_t); 125 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 127 static void vsw_process_evt_read(vsw_ldc_t *ldcp); 128 static void vsw_ldc_rcv(vsw_ldc_t *ldcp); 129 130 /* Switching/data transmit routines */ 131 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 132 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 133 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 134 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 135 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 137 /* Packet creation routines */ 138 static void vsw_send_ver(void *); 139 static void vsw_send_attr(vsw_ldc_t *); 140 static void vsw_send_dring_info(vsw_ldc_t *); 141 static void vsw_send_rdx(vsw_ldc_t *); 142 static void vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state); 143 144 /* Dring routines */ 145 static void vsw_create_privring(vsw_ldc_t *); 146 static dring_info_t *vsw_map_dring(vsw_ldc_t *ldcp, void *pkt); 147 static void vsw_unmap_dring(vsw_ldc_t *ldcp); 148 static void vsw_destroy_dring(vsw_ldc_t *ldcp); 149 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 150 static int vsw_map_data(vsw_ldc_t *ldcp, dring_info_t *dp, void *pkt); 151 static void vsw_set_lane_attr(vsw_t *, lane_t *); 152 dring_info_t *vsw_map_dring_cmn(vsw_ldc_t *ldcp, 153 vio_dring_reg_msg_t *dring_pkt); 154 155 /* tx/msg/rcv thread routines */ 156 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 157 static void vsw_ldc_tx_worker(void *arg); 158 159 /* Misc support routines */ 160 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 161 static int vsw_get_same_dest_list(struct ether_header *ehp, 162 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 163 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 164 165 /* Debugging routines */ 166 static void dump_flags(uint64_t); 167 static void display_state(void); 168 static void display_lane(lane_t *); 169 static void display_ring(dring_info_t *); 170 171 /* 172 * Functions imported from other files. 173 */ 174 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 175 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 176 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 177 extern void vsw_del_mcst_port(vsw_port_t *port); 178 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 179 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 180 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 181 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 182 extern void vsw_create_vlans(void *arg, int type); 183 extern void vsw_destroy_vlans(void *arg, int type); 184 extern void vsw_vlan_add_ids(void *arg, int type); 185 extern void vsw_vlan_remove_ids(void *arg, int type); 186 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 187 struct ether_header *ehp, uint16_t *vidp); 188 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 189 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 190 mblk_t **npt); 191 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 192 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 193 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 194 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 195 extern void vsw_hio_stop_port(vsw_port_t *portp); 196 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 197 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 198 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 199 extern void vsw_destroy_rxpools(void *arg); 200 extern void vsw_stop_msg_thread(vsw_ldc_t *ldcp); 201 extern int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 202 extern int vsw_dringsend(vsw_ldc_t *, mblk_t *); 203 extern int vsw_reclaim_dring(dring_info_t *dp, int start); 204 extern int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 205 int *); 206 extern vio_dring_reg_msg_t *vsw_create_tx_dring_info(vsw_ldc_t *); 207 extern int vsw_setup_tx_dring(vsw_ldc_t *ldcp, dring_info_t *dp); 208 extern void vsw_destroy_tx_dring(vsw_ldc_t *ldcp); 209 extern dring_info_t *vsw_map_rx_dring(vsw_ldc_t *ldcp, void *pkt); 210 extern void vsw_unmap_rx_dring(vsw_ldc_t *ldcp); 211 extern void vsw_ldc_msg_worker(void *arg); 212 extern void vsw_process_dringdata(void *, void *); 213 extern vio_dring_reg_msg_t *vsw_create_rx_dring_info(vsw_ldc_t *); 214 extern void vsw_destroy_rx_dring(vsw_ldc_t *ldcp); 215 extern dring_info_t *vsw_map_tx_dring(vsw_ldc_t *ldcp, void *pkt); 216 extern void vsw_unmap_tx_dring(vsw_ldc_t *ldcp); 217 extern void vsw_ldc_rcv_worker(void *arg); 218 extern void vsw_stop_rcv_thread(vsw_ldc_t *ldcp); 219 extern int vsw_dringsend_shm(vsw_ldc_t *, mblk_t *); 220 extern void vsw_process_dringdata_shm(void *, void *); 221 222 /* 223 * Tunables used in this file. 224 */ 225 extern int vsw_num_handshakes; 226 extern int vsw_ldc_tx_delay; 227 extern int vsw_ldc_tx_retries; 228 extern int vsw_ldc_retries; 229 extern int vsw_ldc_delay; 230 extern boolean_t vsw_ldc_rxthr_enabled; 231 extern boolean_t vsw_ldc_txthr_enabled; 232 extern uint32_t vsw_num_descriptors; 233 extern uint8_t vsw_dring_mode; 234 extern uint32_t vsw_max_tx_qcount; 235 extern boolean_t vsw_obp_ver_proto_workaround; 236 extern uint32_t vsw_publish_macaddr_count; 237 238 #define LDC_ENTER_LOCK(ldcp) \ 239 mutex_enter(&((ldcp)->ldc_cblock));\ 240 mutex_enter(&((ldcp)->ldc_rxlock));\ 241 mutex_enter(&((ldcp)->ldc_txlock)); 242 #define LDC_EXIT_LOCK(ldcp) \ 243 mutex_exit(&((ldcp)->ldc_txlock));\ 244 mutex_exit(&((ldcp)->ldc_rxlock));\ 245 mutex_exit(&((ldcp)->ldc_cblock)); 246 247 #define VSW_VER_EQ(ldcp, major, minor) \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor == (minor)) 250 251 #define VSW_VER_LT(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major < (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor < (minor))) 255 256 #define VSW_VER_GTEQ(ldcp, major, minor) \ 257 (((ldcp)->lane_out.ver_major > (major)) || \ 258 ((ldcp)->lane_out.ver_major == (major) && \ 259 (ldcp)->lane_out.ver_minor >= (minor))) 260 261 #define VSW_VER_LTEQ(ldcp, major, minor) \ 262 (((ldcp)->lane_out.ver_major < (major)) || \ 263 ((ldcp)->lane_out.ver_major == (major) && \ 264 (ldcp)->lane_out.ver_minor <= (minor))) 265 266 /* 267 * VIO Protocol Version Info: 268 * 269 * The version specified below represents the version of protocol currently 270 * supported in the driver. It means the driver can negotiate with peers with 271 * versions <= this version. Here is a summary of the feature(s) that are 272 * supported at each version of the protocol: 273 * 274 * 1.0 Basic VIO protocol. 275 * 1.1 vDisk protocol update (no virtual network update). 276 * 1.2 Support for priority frames (priority-ether-types). 277 * 1.3 VLAN and HybridIO support. 278 * 1.4 Jumbo Frame support. 279 * 1.5 Link State Notification support with optional support 280 * for Physical Link information. 281 * 1.6 Support for RxDringData mode. 282 */ 283 static ver_sup_t vsw_versions[] = { {1, 6} }; 284 285 /* 286 * For the moment the state dump routines have their own 287 * private flag. 288 */ 289 #define DUMP_STATE 0 290 291 #if DUMP_STATE 292 293 #define DUMP_TAG(tag) \ 294 { \ 295 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 296 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 297 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 298 } 299 300 #define DUMP_TAG_PTR(tag) \ 301 { \ 302 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 303 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 304 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 305 } 306 307 #define DUMP_FLAGS(flags) dump_flags(flags); 308 #define DISPLAY_STATE() display_state() 309 310 #else 311 312 #define DUMP_TAG(tag) 313 #define DUMP_TAG_PTR(tag) 314 #define DUMP_FLAGS(state) 315 #define DISPLAY_STATE() 316 317 #endif /* DUMP_STATE */ 318 319 /* 320 * Attach the specified port. 321 * 322 * Returns 0 on success, 1 on failure. 323 */ 324 int 325 vsw_port_attach(vsw_port_t *port) 326 { 327 vsw_t *vswp = port->p_vswp; 328 vsw_port_list_t *plist = &vswp->plist; 329 vsw_port_t *p, **pp; 330 int nids = port->num_ldcs; 331 uint64_t *ldcids; 332 int rv; 333 334 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 335 336 /* port already exists? */ 337 READ_ENTER(&plist->lockrw); 338 for (p = plist->head; p != NULL; p = p->p_next) { 339 if (p->p_instance == port->p_instance) { 340 DWARN(vswp, "%s: port instance %d already attached", 341 __func__, p->p_instance); 342 RW_EXIT(&plist->lockrw); 343 return (1); 344 } 345 } 346 RW_EXIT(&plist->lockrw); 347 348 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 349 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 350 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 351 352 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 353 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 354 port->state = VSW_PORT_INIT; 355 356 D2(vswp, "%s: %d nids", __func__, nids); 357 ldcids = port->ldc_ids; 358 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[0]); 359 if (vsw_ldc_attach(port, (uint64_t)ldcids[0]) != 0) { 360 DERR(vswp, "%s: ldc_attach failed", __func__); 361 goto exit_error; 362 } 363 364 if (vswp->switching_setup_done == B_TRUE) { 365 /* 366 * If the underlying network device has been setup, 367 * then open a mac client and porgram the mac address 368 * for this port. 369 */ 370 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 371 if (rv != 0) { 372 goto exit_error; 373 } 374 } 375 376 /* create the fdb entry for this port/mac address */ 377 vsw_fdbe_add(vswp, port); 378 379 vsw_create_vlans(port, VSW_VNETPORT); 380 381 WRITE_ENTER(&plist->lockrw); 382 383 /* link it into the list of ports for this vsw instance */ 384 pp = (vsw_port_t **)(&plist->head); 385 port->p_next = *pp; 386 *pp = port; 387 plist->num_ports++; 388 389 RW_EXIT(&plist->lockrw); 390 391 /* 392 * Initialise the port and any ldc's under it. 393 */ 394 (void) vsw_ldc_init(port->ldcp); 395 396 /* announce macaddr of vnet to the physical switch */ 397 if (vsw_publish_macaddr_count != 0) { /* enabled */ 398 vsw_publish_macaddr(vswp, port); 399 } 400 401 D1(vswp, "%s: exit", __func__); 402 return (0); 403 404 exit_error: 405 406 cv_destroy(&port->state_cv); 407 mutex_destroy(&port->state_lock); 408 409 rw_destroy(&port->maccl_rwlock); 410 mutex_destroy(&port->tx_lock); 411 mutex_destroy(&port->mca_lock); 412 kmem_free(port, sizeof (vsw_port_t)); 413 return (1); 414 } 415 416 /* 417 * Detach the specified port. 418 * 419 * Returns 0 on success, 1 on failure. 420 */ 421 int 422 vsw_port_detach(vsw_t *vswp, int p_instance) 423 { 424 vsw_port_t *port = NULL; 425 vsw_port_list_t *plist = &vswp->plist; 426 427 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 428 429 WRITE_ENTER(&plist->lockrw); 430 431 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 432 RW_EXIT(&plist->lockrw); 433 return (1); 434 } 435 436 if (vsw_plist_del_node(vswp, port)) { 437 RW_EXIT(&plist->lockrw); 438 return (1); 439 } 440 441 /* cleanup any HybridIO for this port */ 442 vsw_hio_stop_port(port); 443 444 /* 445 * No longer need to hold writer lock on port list now 446 * that we have unlinked the target port from the list. 447 */ 448 RW_EXIT(&plist->lockrw); 449 450 /* Cleanup and close the mac client */ 451 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 452 453 /* Remove the fdb entry for this port/mac address */ 454 vsw_fdbe_del(vswp, &(port->p_macaddr)); 455 vsw_destroy_vlans(port, VSW_VNETPORT); 456 457 /* Remove any multicast addresses.. */ 458 vsw_del_mcst_port(port); 459 460 vsw_port_delete(port); 461 462 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 463 return (0); 464 } 465 466 /* 467 * Detach all active ports. 468 */ 469 void 470 vsw_detach_ports(vsw_t *vswp) 471 { 472 vsw_port_list_t *plist = &vswp->plist; 473 vsw_port_t *port = NULL; 474 475 D1(vswp, "%s: enter", __func__); 476 477 WRITE_ENTER(&plist->lockrw); 478 479 while ((port = plist->head) != NULL) { 480 (void) vsw_plist_del_node(vswp, port); 481 482 /* cleanup any HybridIO for this port */ 483 vsw_hio_stop_port(port); 484 485 /* Cleanup and close the mac client */ 486 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 487 488 /* Remove the fdb entry for this port/mac address */ 489 vsw_fdbe_del(vswp, &(port->p_macaddr)); 490 vsw_destroy_vlans(port, VSW_VNETPORT); 491 492 /* Remove any multicast addresses.. */ 493 vsw_del_mcst_port(port); 494 495 /* 496 * No longer need to hold the lock on the port list 497 * now that we have unlinked the target port from the 498 * list. 499 */ 500 RW_EXIT(&plist->lockrw); 501 vsw_port_delete(port); 502 WRITE_ENTER(&plist->lockrw); 503 } 504 RW_EXIT(&plist->lockrw); 505 506 D1(vswp, "%s: exit", __func__); 507 } 508 509 /* 510 * Delete the specified port. 511 */ 512 static void 513 vsw_port_delete(vsw_port_t *port) 514 { 515 vsw_t *vswp = port->p_vswp; 516 517 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 518 519 vsw_ldc_uninit(port->ldcp); 520 521 /* 522 * Wait for any pending ctrl msg tasks which reference this 523 * port to finish. 524 */ 525 vsw_drain_port_taskq(port); 526 527 /* 528 * Wait for any active callbacks to finish 529 */ 530 vsw_ldc_drain(port->ldcp); 531 532 vsw_ldc_detach(port->ldcp); 533 534 rw_destroy(&port->maccl_rwlock); 535 mutex_destroy(&port->mca_lock); 536 mutex_destroy(&port->tx_lock); 537 538 cv_destroy(&port->state_cv); 539 mutex_destroy(&port->state_lock); 540 541 if (port->num_ldcs != 0) { 542 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 543 port->num_ldcs = 0; 544 } 545 546 if (port->nvids != 0) { 547 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 548 } 549 550 kmem_free(port, sizeof (vsw_port_t)); 551 552 D1(vswp, "%s: exit", __func__); 553 } 554 555 /* 556 * Attach a logical domain channel (ldc) under a specified port. 557 * 558 * Returns 0 on success, 1 on failure. 559 */ 560 static int 561 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 562 { 563 vsw_t *vswp = port->p_vswp; 564 vsw_ldc_t *ldcp = NULL; 565 ldc_attr_t attr; 566 ldc_status_t istatus; 567 int status = DDI_FAILURE; 568 char kname[MAXNAMELEN]; 569 enum { PROG_init = 0x0, 570 PROG_callback = 0x1, 571 PROG_tx_thread = 0x2} 572 progress; 573 574 progress = PROG_init; 575 576 D1(vswp, "%s: enter", __func__); 577 578 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 579 if (ldcp == NULL) { 580 DERR(vswp, "%s: kmem_zalloc failed", __func__); 581 return (1); 582 } 583 ldcp->ldc_id = ldc_id; 584 585 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 586 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 587 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 588 ldcp->msg_thr_flags = 0; 589 mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL); 590 cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL); 591 ldcp->rcv_thr_flags = 0; 592 mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL); 593 cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL); 594 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 595 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 596 597 /* required for handshake with peer */ 598 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 599 ldcp->peer_session = 0; 600 ldcp->session_status = 0; 601 ldcp->hss_id = 1; /* Initial handshake session id */ 602 ldcp->hphase = VSW_MILESTONE0; 603 604 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 605 606 /* only set for outbound lane, inbound set by peer */ 607 vsw_set_lane_attr(vswp, &ldcp->lane_out); 608 609 attr.devclass = LDC_DEV_NT_SVC; 610 attr.instance = ddi_get_instance(vswp->dip); 611 attr.mode = LDC_MODE_UNRELIABLE; 612 attr.mtu = VSW_LDC_MTU; 613 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 614 if (status != 0) { 615 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 616 __func__, ldc_id, status); 617 goto ldc_attach_fail; 618 } 619 620 if (vsw_ldc_txthr_enabled) { 621 ldcp->tx_thr_flags = 0; 622 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 623 624 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 625 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 626 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 627 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 628 629 progress |= PROG_tx_thread; 630 if (ldcp->tx_thread == NULL) { 631 DWARN(vswp, "%s(%lld): Failed to create worker thread", 632 __func__, ldc_id); 633 goto ldc_attach_fail; 634 } 635 } 636 637 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 638 if (status != 0) { 639 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 640 __func__, ldc_id, status); 641 (void) ldc_fini(ldcp->ldc_handle); 642 goto ldc_attach_fail; 643 } 644 /* 645 * allocate a message for ldc_read()s, big enough to hold ctrl and 646 * data msgs, including raw data msgs used to recv priority frames. 647 */ 648 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 649 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 650 651 progress |= PROG_callback; 652 653 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 654 655 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 656 DERR(vswp, "%s: ldc_status failed", __func__); 657 mutex_destroy(&ldcp->status_lock); 658 goto ldc_attach_fail; 659 } 660 661 ldcp->ldc_status = istatus; 662 ldcp->ldc_port = port; 663 ldcp->ldc_vswp = vswp; 664 665 vsw_reset_vnet_proto_ops(ldcp); 666 667 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 668 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 669 kname, &ldcp->ldc_stats); 670 if (ldcp->ksp == NULL) { 671 DERR(vswp, "%s: kstats setup failed", __func__); 672 goto ldc_attach_fail; 673 } 674 675 /* link it into this port */ 676 port->ldcp = ldcp; 677 678 D1(vswp, "%s: exit", __func__); 679 return (0); 680 681 ldc_attach_fail: 682 683 if (progress & PROG_callback) { 684 (void) ldc_unreg_callback(ldcp->ldc_handle); 685 kmem_free(ldcp->ldcmsg, ldcp->msglen); 686 } 687 688 if (progress & PROG_tx_thread) { 689 if (ldcp->tx_thread != NULL) { 690 vsw_stop_tx_thread(ldcp); 691 } 692 mutex_destroy(&ldcp->tx_thr_lock); 693 cv_destroy(&ldcp->tx_thr_cv); 694 } 695 if (ldcp->ksp != NULL) { 696 vgen_destroy_kstats(ldcp->ksp); 697 } 698 mutex_destroy(&ldcp->msg_thr_lock); 699 mutex_destroy(&ldcp->rcv_thr_lock); 700 mutex_destroy(&ldcp->ldc_txlock); 701 mutex_destroy(&ldcp->ldc_rxlock); 702 mutex_destroy(&ldcp->ldc_cblock); 703 mutex_destroy(&ldcp->drain_cv_lock); 704 cv_destroy(&ldcp->msg_thr_cv); 705 cv_destroy(&ldcp->rcv_thr_cv); 706 cv_destroy(&ldcp->drain_cv); 707 708 kmem_free(ldcp, sizeof (vsw_ldc_t)); 709 710 return (1); 711 } 712 713 /* 714 * Detach a logical domain channel (ldc) belonging to a 715 * particular port. 716 */ 717 static void 718 vsw_ldc_detach(vsw_ldc_t *ldcp) 719 { 720 int rv; 721 vsw_t *vswp = ldcp->ldc_port->p_vswp; 722 int retries = 0; 723 724 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 725 726 /* Stop msg/rcv thread */ 727 if (ldcp->rcv_thread != NULL) { 728 vsw_stop_rcv_thread(ldcp); 729 } else if (ldcp->msg_thread != NULL) { 730 vsw_stop_msg_thread(ldcp); 731 } 732 kmem_free(ldcp->ldcmsg, ldcp->msglen); 733 734 /* Stop the tx thread */ 735 if (ldcp->tx_thread != NULL) { 736 vsw_stop_tx_thread(ldcp); 737 mutex_destroy(&ldcp->tx_thr_lock); 738 cv_destroy(&ldcp->tx_thr_cv); 739 if (ldcp->tx_mhead != NULL) { 740 freemsgchain(ldcp->tx_mhead); 741 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 742 ldcp->tx_cnt = 0; 743 } 744 } 745 746 /* Destory kstats */ 747 vgen_destroy_kstats(ldcp->ksp); 748 749 /* 750 * Before we can close the channel we must release any mapped 751 * resources (e.g. drings). 752 */ 753 vsw_free_lane_resources(ldcp, INBOUND); 754 vsw_free_lane_resources(ldcp, OUTBOUND); 755 756 /* 757 * Close the channel, retry on EAAGIN. 758 */ 759 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 760 if (++retries > vsw_ldc_retries) { 761 break; 762 } 763 drv_usecwait(vsw_ldc_delay); 764 } 765 if (rv != 0) { 766 cmn_err(CE_NOTE, 767 "!vsw%d: Error(%d) closing the channel(0x%lx)\n", 768 vswp->instance, rv, ldcp->ldc_id); 769 } 770 771 (void) ldc_fini(ldcp->ldc_handle); 772 773 ldcp->ldc_status = LDC_INIT; 774 ldcp->ldc_handle = NULL; 775 ldcp->ldc_vswp = NULL; 776 777 mutex_destroy(&ldcp->msg_thr_lock); 778 mutex_destroy(&ldcp->rcv_thr_lock); 779 mutex_destroy(&ldcp->ldc_txlock); 780 mutex_destroy(&ldcp->ldc_rxlock); 781 mutex_destroy(&ldcp->ldc_cblock); 782 mutex_destroy(&ldcp->drain_cv_lock); 783 mutex_destroy(&ldcp->status_lock); 784 cv_destroy(&ldcp->msg_thr_cv); 785 cv_destroy(&ldcp->rcv_thr_cv); 786 cv_destroy(&ldcp->drain_cv); 787 788 kmem_free(ldcp, sizeof (vsw_ldc_t)); 789 } 790 791 /* 792 * Open and attempt to bring up the channel. Note that channel 793 * can only be brought up if peer has also opened channel. 794 * 795 * Returns 0 if can open and bring up channel, otherwise 796 * returns 1. 797 */ 798 static int 799 vsw_ldc_init(vsw_ldc_t *ldcp) 800 { 801 vsw_t *vswp = ldcp->ldc_vswp; 802 ldc_status_t istatus = 0; 803 int rv; 804 805 D1(vswp, "%s: enter", __func__); 806 807 LDC_ENTER_LOCK(ldcp); 808 809 /* don't start at 0 in case clients don't like that */ 810 ldcp->next_ident = 1; 811 812 rv = ldc_open(ldcp->ldc_handle); 813 if (rv != 0) { 814 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 815 __func__, ldcp->ldc_id, rv); 816 LDC_EXIT_LOCK(ldcp); 817 return (1); 818 } 819 820 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 821 DERR(vswp, "%s: unable to get status", __func__); 822 LDC_EXIT_LOCK(ldcp); 823 return (1); 824 825 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 826 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 827 __func__, ldcp->ldc_id, istatus); 828 LDC_EXIT_LOCK(ldcp); 829 return (1); 830 } 831 832 mutex_enter(&ldcp->status_lock); 833 ldcp->ldc_status = istatus; 834 mutex_exit(&ldcp->status_lock); 835 836 rv = ldc_up(ldcp->ldc_handle); 837 if (rv != 0) { 838 /* 839 * Not a fatal error for ldc_up() to fail, as peer 840 * end point may simply not be ready yet. 841 */ 842 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 843 ldcp->ldc_id, rv); 844 LDC_EXIT_LOCK(ldcp); 845 return (1); 846 } 847 848 /* 849 * ldc_up() call is non-blocking so need to explicitly 850 * check channel status to see if in fact the channel 851 * is UP. 852 */ 853 mutex_enter(&ldcp->status_lock); 854 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 855 DERR(vswp, "%s: unable to get status", __func__); 856 mutex_exit(&ldcp->status_lock); 857 LDC_EXIT_LOCK(ldcp); 858 return (1); 859 860 } 861 862 if (ldcp->ldc_status == LDC_UP) { 863 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 864 ldcp->ldc_id, istatus); 865 mutex_exit(&ldcp->status_lock); 866 LDC_EXIT_LOCK(ldcp); 867 868 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 869 return (0); 870 } 871 872 mutex_exit(&ldcp->status_lock); 873 LDC_EXIT_LOCK(ldcp); 874 875 D1(vswp, "%s: exit", __func__); 876 return (0); 877 } 878 879 /* disable callbacks on the channel */ 880 static void 881 vsw_ldc_uninit(vsw_ldc_t *ldcp) 882 { 883 vsw_t *vswp = ldcp->ldc_vswp; 884 int rv; 885 886 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 887 888 LDC_ENTER_LOCK(ldcp); 889 890 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 891 if (rv != 0) { 892 cmn_err(CE_NOTE, "!vsw_ldc_uninit(%ld): error disabling " 893 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 894 } 895 896 mutex_enter(&ldcp->status_lock); 897 ldcp->ldc_status = LDC_INIT; 898 mutex_exit(&ldcp->status_lock); 899 900 LDC_EXIT_LOCK(ldcp); 901 902 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 903 } 904 905 /* 906 * Wait until the callback(s) associated with the ldcs under the specified 907 * port have completed. 908 * 909 * Prior to this function being invoked each channel under this port 910 * should have been quiesced via ldc_set_cb_mode(DISABLE). 911 * 912 * A short explaination of what we are doing below.. 913 * 914 * The simplest approach would be to have a reference counter in 915 * the ldc structure which is increment/decremented by the callbacks as 916 * they use the channel. The drain function could then simply disable any 917 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 918 * there is a tiny window here - before the callback is able to get the lock 919 * on the channel it is interrupted and this function gets to execute. It 920 * sees that the ref count is zero and believes its free to delete the 921 * associated data structures. 922 * 923 * We get around this by taking advantage of the fact that before the ldc 924 * framework invokes a callback it sets a flag to indicate that there is a 925 * callback active (or about to become active). If when we attempt to 926 * unregister a callback when this active flag is set then the unregister 927 * will fail with EWOULDBLOCK. 928 * 929 * If the unregister fails we do a cv_timedwait. We will either be signaled 930 * by the callback as it is exiting (note we have to wait a short period to 931 * allow the callback to return fully to the ldc framework and it to clear 932 * the active flag), or by the timer expiring. In either case we again attempt 933 * the unregister. We repeat this until we can succesfully unregister the 934 * callback. 935 * 936 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 937 * the case where the callback has finished but the ldc framework has not yet 938 * cleared the active flag. In this case we would never get a cv_signal. 939 */ 940 static void 941 vsw_ldc_drain(vsw_ldc_t *ldcp) 942 { 943 vsw_t *vswp = ldcp->ldc_port->p_vswp; 944 945 D1(vswp, "%s: enter", __func__); 946 947 /* 948 * If we can unregister the channel callback then we 949 * know that there is no callback either running or 950 * scheduled to run for this channel so move on to next 951 * channel in the list. 952 */ 953 mutex_enter(&ldcp->drain_cv_lock); 954 955 /* prompt active callbacks to quit */ 956 ldcp->drain_state = VSW_LDC_DRAINING; 957 958 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 959 D2(vswp, "%s: unreg callback for chan %ld", __func__, 960 ldcp->ldc_id); 961 mutex_exit(&ldcp->drain_cv_lock); 962 } else { 963 /* 964 * If we end up here we know that either 1) a callback 965 * is currently executing, 2) is about to start (i.e. 966 * the ldc framework has set the active flag but 967 * has not actually invoked the callback yet, or 3) 968 * has finished and has returned to the ldc framework 969 * but the ldc framework has not yet cleared the 970 * active bit. 971 * 972 * Wait for it to finish. 973 */ 974 while (ldc_unreg_callback(ldcp->ldc_handle) == EWOULDBLOCK) { 975 (void) cv_timedwait(&ldcp->drain_cv, 976 &ldcp->drain_cv_lock, ddi_get_lbolt() + hz); 977 } 978 979 mutex_exit(&ldcp->drain_cv_lock); 980 D2(vswp, "%s: unreg callback for chan %ld after " 981 "timeout", __func__, ldcp->ldc_id); 982 } 983 984 D1(vswp, "%s: exit", __func__); 985 } 986 987 /* 988 * Wait until all tasks which reference this port have completed. 989 * 990 * Prior to this function being invoked each channel under this port 991 * should have been quiesced via ldc_set_cb_mode(DISABLE). 992 */ 993 static void 994 vsw_drain_port_taskq(vsw_port_t *port) 995 { 996 vsw_t *vswp = port->p_vswp; 997 998 D1(vswp, "%s: enter", __func__); 999 1000 /* 1001 * Mark the port as in the process of being detached, and 1002 * dispatch a marker task to the queue so we know when all 1003 * relevant tasks have completed. 1004 */ 1005 mutex_enter(&port->state_lock); 1006 port->state = VSW_PORT_DETACHING; 1007 1008 if ((vswp->taskq_p == NULL) || 1009 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1010 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1011 cmn_err(CE_NOTE, "!vsw%d: unable to dispatch marker task", 1012 vswp->instance); 1013 mutex_exit(&port->state_lock); 1014 return; 1015 } 1016 1017 /* 1018 * Wait for the marker task to finish. 1019 */ 1020 while (port->state != VSW_PORT_DETACHABLE) 1021 cv_wait(&port->state_cv, &port->state_lock); 1022 1023 mutex_exit(&port->state_lock); 1024 1025 D1(vswp, "%s: exit", __func__); 1026 } 1027 1028 static void 1029 vsw_marker_task(void *arg) 1030 { 1031 vsw_port_t *port = arg; 1032 vsw_t *vswp = port->p_vswp; 1033 1034 D1(vswp, "%s: enter", __func__); 1035 1036 mutex_enter(&port->state_lock); 1037 1038 /* 1039 * No further tasks should be dispatched which reference 1040 * this port so ok to mark it as safe to detach. 1041 */ 1042 port->state = VSW_PORT_DETACHABLE; 1043 1044 cv_signal(&port->state_cv); 1045 1046 mutex_exit(&port->state_lock); 1047 1048 D1(vswp, "%s: exit", __func__); 1049 } 1050 1051 vsw_port_t * 1052 vsw_lookup_port(vsw_t *vswp, int p_instance) 1053 { 1054 vsw_port_list_t *plist = &vswp->plist; 1055 vsw_port_t *port; 1056 1057 for (port = plist->head; port != NULL; port = port->p_next) { 1058 if (port->p_instance == p_instance) { 1059 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1060 return (port); 1061 } 1062 } 1063 1064 return (NULL); 1065 } 1066 1067 void 1068 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1069 { 1070 vsw_ldc_t *ldcp = portp->ldcp; 1071 1072 mutex_enter(&ldcp->ldc_cblock); 1073 1074 /* 1075 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1076 * the connection. See comments in vsw_set_vnet_proto_ops(). 1077 */ 1078 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1079 portp->nvids != 0) { 1080 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1081 } 1082 1083 mutex_exit(&ldcp->ldc_cblock); 1084 } 1085 1086 void 1087 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1088 { 1089 vsw_ldc_t *ldcp = portp->ldcp; 1090 1091 mutex_enter(&ldcp->ldc_cblock); 1092 1093 /* 1094 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1095 * to trigger re-negotiation, which inturn trigger HybridIO 1096 * setup/cleanup. 1097 */ 1098 if ((ldcp->hphase == VSW_MILESTONE4) && 1099 (portp->p_hio_capable == B_TRUE)) { 1100 if (immediate == B_TRUE) { 1101 (void) ldc_down(ldcp->ldc_handle); 1102 } else { 1103 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1104 } 1105 } 1106 1107 mutex_exit(&ldcp->ldc_cblock); 1108 } 1109 1110 void 1111 vsw_port_reset(vsw_port_t *portp) 1112 { 1113 vsw_ldc_t *ldcp = portp->ldcp; 1114 1115 mutex_enter(&ldcp->ldc_cblock); 1116 1117 /* 1118 * reset channel and terminate the connection. 1119 */ 1120 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1121 1122 mutex_exit(&ldcp->ldc_cblock); 1123 } 1124 1125 void 1126 vsw_reset_ports(vsw_t *vswp) 1127 { 1128 vsw_port_list_t *plist = &vswp->plist; 1129 vsw_port_t *portp; 1130 1131 READ_ENTER(&plist->lockrw); 1132 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1133 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1134 vsw_hio_stop_port(portp); 1135 } 1136 vsw_port_reset(portp); 1137 } 1138 RW_EXIT(&plist->lockrw); 1139 } 1140 1141 static void 1142 vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state) 1143 { 1144 vnet_physlink_msg_t msg; 1145 vnet_physlink_msg_t *msgp = &msg; 1146 uint32_t physlink_info = 0; 1147 1148 if (plink_state == LINK_STATE_UP) { 1149 physlink_info |= VNET_PHYSLINK_STATE_UP; 1150 } else { 1151 physlink_info |= VNET_PHYSLINK_STATE_DOWN; 1152 } 1153 1154 msgp->tag.vio_msgtype = VIO_TYPE_CTRL; 1155 msgp->tag.vio_subtype = VIO_SUBTYPE_INFO; 1156 msgp->tag.vio_subtype_env = VNET_PHYSLINK_INFO; 1157 msgp->tag.vio_sid = ldcp->local_session; 1158 msgp->physlink_info = physlink_info; 1159 1160 (void) vsw_send_msg(ldcp, msgp, sizeof (msg), B_TRUE); 1161 } 1162 1163 static void 1164 vsw_port_physlink_update(vsw_port_t *portp) 1165 { 1166 vsw_ldc_t *ldcp; 1167 vsw_t *vswp; 1168 1169 vswp = portp->p_vswp; 1170 ldcp = portp->ldcp; 1171 1172 mutex_enter(&ldcp->ldc_cblock); 1173 1174 /* 1175 * If handshake has completed successfully and if the vnet device 1176 * has negotiated to get physical link state updates, send a message 1177 * with the current state. 1178 */ 1179 if (ldcp->hphase == VSW_MILESTONE4 && ldcp->pls_negotiated == B_TRUE) { 1180 vsw_send_physlink_msg(ldcp, vswp->phys_link_state); 1181 } 1182 1183 mutex_exit(&ldcp->ldc_cblock); 1184 } 1185 1186 void 1187 vsw_physlink_update_ports(vsw_t *vswp) 1188 { 1189 vsw_port_list_t *plist = &vswp->plist; 1190 vsw_port_t *portp; 1191 1192 READ_ENTER(&plist->lockrw); 1193 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1194 vsw_port_physlink_update(portp); 1195 } 1196 RW_EXIT(&plist->lockrw); 1197 } 1198 1199 /* 1200 * Search for and remove the specified port from the port 1201 * list. Returns 0 if able to locate and remove port, otherwise 1202 * returns 1. 1203 */ 1204 static int 1205 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1206 { 1207 vsw_port_list_t *plist = &vswp->plist; 1208 vsw_port_t *curr_p, *prev_p; 1209 1210 if (plist->head == NULL) 1211 return (1); 1212 1213 curr_p = prev_p = plist->head; 1214 1215 while (curr_p != NULL) { 1216 if (curr_p == port) { 1217 if (prev_p == curr_p) { 1218 plist->head = curr_p->p_next; 1219 } else { 1220 prev_p->p_next = curr_p->p_next; 1221 } 1222 plist->num_ports--; 1223 break; 1224 } else { 1225 prev_p = curr_p; 1226 curr_p = curr_p->p_next; 1227 } 1228 } 1229 return (0); 1230 } 1231 1232 /* 1233 * Interrupt handler for ldc messages. 1234 */ 1235 static uint_t 1236 vsw_ldc_cb(uint64_t event, caddr_t arg) 1237 { 1238 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1239 vsw_t *vswp = ldcp->ldc_vswp; 1240 1241 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1242 1243 mutex_enter(&ldcp->ldc_cblock); 1244 ldcp->ldc_stats.callbacks++; 1245 1246 mutex_enter(&ldcp->status_lock); 1247 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1248 mutex_exit(&ldcp->status_lock); 1249 mutex_exit(&ldcp->ldc_cblock); 1250 return (LDC_SUCCESS); 1251 } 1252 mutex_exit(&ldcp->status_lock); 1253 1254 if (event & LDC_EVT_UP) { 1255 /* 1256 * Channel has come up. 1257 */ 1258 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1259 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1260 1261 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1262 1263 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1264 } 1265 1266 if (event & LDC_EVT_READ) { 1267 /* 1268 * Data available for reading. 1269 */ 1270 D2(vswp, "%s: id(ld) event(%llx) data READ", 1271 __func__, ldcp->ldc_id, event); 1272 1273 vsw_process_evt_read(ldcp); 1274 1275 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1276 1277 goto vsw_cb_exit; 1278 } 1279 1280 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1281 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1282 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1283 1284 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1285 } 1286 1287 /* 1288 * Catch either LDC_EVT_WRITE which we don't support or any 1289 * unknown event. 1290 */ 1291 if (event & 1292 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1293 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1294 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1295 } 1296 1297 vsw_cb_exit: 1298 mutex_exit(&ldcp->ldc_cblock); 1299 1300 /* 1301 * Let the drain function know we are finishing if it 1302 * is waiting. 1303 */ 1304 mutex_enter(&ldcp->drain_cv_lock); 1305 if (ldcp->drain_state == VSW_LDC_DRAINING) 1306 cv_signal(&ldcp->drain_cv); 1307 mutex_exit(&ldcp->drain_cv_lock); 1308 1309 return (LDC_SUCCESS); 1310 } 1311 1312 /* 1313 * Reinitialise data structures associated with the channel. 1314 */ 1315 static void 1316 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1317 { 1318 vsw_t *vswp = ldcp->ldc_vswp; 1319 vsw_port_t *port; 1320 1321 D1(vswp, "%s: enter", __func__); 1322 1323 port = ldcp->ldc_port; 1324 1325 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1326 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1327 1328 vsw_free_lane_resources(ldcp, INBOUND); 1329 vsw_free_lane_resources(ldcp, OUTBOUND); 1330 1331 ldcp->lane_in.lstate = 0; 1332 ldcp->lane_out.lstate = 0; 1333 1334 /* 1335 * Remove parent port from any multicast groups 1336 * it may have registered with. Client must resend 1337 * multicast add command after handshake completes. 1338 */ 1339 vsw_del_mcst_port(port); 1340 1341 ldcp->peer_session = 0; 1342 ldcp->session_status = 0; 1343 ldcp->hcnt = 0; 1344 ldcp->hphase = VSW_MILESTONE0; 1345 1346 vsw_reset_vnet_proto_ops(ldcp); 1347 1348 D1(vswp, "%s: exit", __func__); 1349 } 1350 1351 /* 1352 * Process a connection event. 1353 */ 1354 void 1355 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1356 { 1357 vsw_t *vswp = ldcp->ldc_vswp; 1358 vsw_conn_evt_t *conn = NULL; 1359 1360 D1(vswp, "%s: enter", __func__); 1361 1362 /* 1363 * Check if either a reset or restart event is pending 1364 * or in progress. If so just return. 1365 * 1366 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1367 * being received by the callback handler, or a ECONNRESET error 1368 * code being returned from a ldc_read() or ldc_write() call. 1369 * 1370 * A VSW_CONN_RESTART event occurs when some error checking code 1371 * decides that there is a problem with data from the channel, 1372 * and that the handshake should be restarted. 1373 */ 1374 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1375 (ldstub((uint8_t *)&ldcp->reset_active))) 1376 return; 1377 1378 /* 1379 * If it is an LDC_UP event we first check the recorded 1380 * state of the channel. If this is UP then we know that 1381 * the channel moving to the UP state has already been dealt 1382 * with and don't need to dispatch a new task. 1383 * 1384 * The reason for this check is that when we do a ldc_up(), 1385 * depending on the state of the peer, we may or may not get 1386 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1387 * every time we do ldc_up() we explicitly check the channel 1388 * status to see has it come up (ldc_up() is asynch and will 1389 * complete at some undefined time), and take the appropriate 1390 * action. 1391 * 1392 * The flip side of this is that we may get a LDC_UP event 1393 * when we have already seen that the channel is up and have 1394 * dealt with that. 1395 */ 1396 mutex_enter(&ldcp->status_lock); 1397 if (evt == VSW_CONN_UP) { 1398 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1399 mutex_exit(&ldcp->status_lock); 1400 return; 1401 } 1402 } 1403 mutex_exit(&ldcp->status_lock); 1404 1405 /* 1406 * The transaction group id allows us to identify and discard 1407 * any tasks which are still pending on the taskq and refer 1408 * to the handshake session we are about to restart or reset. 1409 * These stale messages no longer have any real meaning. 1410 */ 1411 (void) atomic_inc_32(&ldcp->hss_id); 1412 1413 ASSERT(vswp->taskq_p != NULL); 1414 1415 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1416 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1417 " connection event", vswp->instance); 1418 goto err_exit; 1419 } 1420 1421 conn->evt = evt; 1422 conn->ldcp = ldcp; 1423 1424 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1425 DDI_NOSLEEP) != DDI_SUCCESS) { 1426 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1427 vswp->instance); 1428 1429 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1430 goto err_exit; 1431 } 1432 1433 D1(vswp, "%s: exit", __func__); 1434 return; 1435 1436 err_exit: 1437 /* 1438 * Have mostly likely failed due to memory shortage. Clear the flag so 1439 * that future requests will at least be attempted and will hopefully 1440 * succeed. 1441 */ 1442 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1443 ldcp->reset_active = 0; 1444 } 1445 1446 /* 1447 * Deal with events relating to a connection. Invoked from a taskq. 1448 */ 1449 static void 1450 vsw_conn_task(void *arg) 1451 { 1452 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1453 vsw_ldc_t *ldcp = NULL; 1454 vsw_port_t *portp; 1455 vsw_t *vswp = NULL; 1456 uint16_t evt; 1457 ldc_status_t curr_status; 1458 1459 ldcp = conn->ldcp; 1460 evt = conn->evt; 1461 vswp = ldcp->ldc_vswp; 1462 portp = ldcp->ldc_port; 1463 1464 D1(vswp, "%s: enter", __func__); 1465 1466 /* can safely free now have copied out data */ 1467 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1468 1469 if (ldcp->rcv_thread != NULL) { 1470 vsw_stop_rcv_thread(ldcp); 1471 } else if (ldcp->msg_thread != NULL) { 1472 vsw_stop_msg_thread(ldcp); 1473 } 1474 1475 mutex_enter(&ldcp->status_lock); 1476 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1477 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1478 "channel %ld", vswp->instance, ldcp->ldc_id); 1479 mutex_exit(&ldcp->status_lock); 1480 return; 1481 } 1482 1483 /* 1484 * If we wish to restart the handshake on this channel, then if 1485 * the channel is UP we bring it DOWN to flush the underlying 1486 * ldc queue. 1487 */ 1488 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1489 (void) ldc_down(ldcp->ldc_handle); 1490 1491 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1492 vsw_hio_stop(vswp, ldcp); 1493 } 1494 1495 /* 1496 * re-init all the associated data structures. 1497 */ 1498 vsw_ldc_reinit(ldcp); 1499 1500 /* 1501 * Bring the channel back up (note it does no harm to 1502 * do this even if the channel is already UP, Just 1503 * becomes effectively a no-op). 1504 */ 1505 (void) ldc_up(ldcp->ldc_handle); 1506 1507 /* 1508 * Check if channel is now UP. This will only happen if 1509 * peer has also done a ldc_up(). 1510 */ 1511 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1512 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1513 "channel %ld", vswp->instance, ldcp->ldc_id); 1514 mutex_exit(&ldcp->status_lock); 1515 return; 1516 } 1517 1518 ldcp->ldc_status = curr_status; 1519 1520 /* channel UP so restart handshake by sending version info */ 1521 if (curr_status == LDC_UP) { 1522 if (ldcp->hcnt++ > vsw_num_handshakes) { 1523 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1524 " handshake attempts (%d) on channel %ld", 1525 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1526 mutex_exit(&ldcp->status_lock); 1527 return; 1528 } 1529 1530 if (vsw_obp_ver_proto_workaround == B_FALSE && 1531 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1532 DDI_NOSLEEP) != DDI_SUCCESS)) { 1533 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1534 vswp->instance); 1535 1536 /* 1537 * Don't count as valid restart attempt if couldn't 1538 * send version msg. 1539 */ 1540 if (ldcp->hcnt > 0) 1541 ldcp->hcnt--; 1542 } 1543 } 1544 1545 /* 1546 * Mark that the process is complete by clearing the flag. 1547 * 1548 * Note is it possible that the taskq dispatch above may have failed, 1549 * most likely due to memory shortage. We still clear the flag so 1550 * future attempts will at least be attempted and will hopefully 1551 * succeed. 1552 */ 1553 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1554 ldcp->reset_active = 0; 1555 1556 mutex_exit(&ldcp->status_lock); 1557 1558 D1(vswp, "%s: exit", __func__); 1559 } 1560 1561 /* 1562 * returns 0 if legal for event signified by flag to have 1563 * occured at the time it did. Otherwise returns 1. 1564 */ 1565 int 1566 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1567 { 1568 vsw_t *vswp = ldcp->ldc_vswp; 1569 uint64_t state; 1570 uint64_t phase; 1571 1572 if (dir == INBOUND) 1573 state = ldcp->lane_in.lstate; 1574 else 1575 state = ldcp->lane_out.lstate; 1576 1577 phase = ldcp->hphase; 1578 1579 switch (flag) { 1580 case VSW_VER_INFO_RECV: 1581 if (phase > VSW_MILESTONE0) { 1582 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1583 " when in state %d\n", ldcp->ldc_id, phase); 1584 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1585 return (1); 1586 } 1587 break; 1588 1589 case VSW_VER_ACK_RECV: 1590 case VSW_VER_NACK_RECV: 1591 if (!(state & VSW_VER_INFO_SENT)) { 1592 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1593 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1594 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1595 return (1); 1596 } else 1597 state &= ~VSW_VER_INFO_SENT; 1598 break; 1599 1600 case VSW_ATTR_INFO_RECV: 1601 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1602 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1603 " when in state %d\n", ldcp->ldc_id, phase); 1604 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1605 return (1); 1606 } 1607 break; 1608 1609 case VSW_ATTR_ACK_RECV: 1610 case VSW_ATTR_NACK_RECV: 1611 if (!(state & VSW_ATTR_INFO_SENT)) { 1612 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1613 " or ATTR_NACK when in state %d\n", 1614 ldcp->ldc_id, phase); 1615 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1616 return (1); 1617 } else 1618 state &= ~VSW_ATTR_INFO_SENT; 1619 break; 1620 1621 case VSW_DRING_INFO_RECV: 1622 if (phase < VSW_MILESTONE1) { 1623 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1624 " when in state %d\n", ldcp->ldc_id, phase); 1625 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1626 return (1); 1627 } 1628 break; 1629 1630 case VSW_DRING_ACK_RECV: 1631 case VSW_DRING_NACK_RECV: 1632 if (!(state & VSW_DRING_INFO_SENT)) { 1633 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1634 " or DRING_NACK when in state %d\n", 1635 ldcp->ldc_id, phase); 1636 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1637 return (1); 1638 } else 1639 state &= ~VSW_DRING_INFO_SENT; 1640 break; 1641 1642 case VSW_RDX_INFO_RECV: 1643 if (phase < VSW_MILESTONE3) { 1644 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1645 " when in state %d\n", ldcp->ldc_id, phase); 1646 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1647 return (1); 1648 } 1649 break; 1650 1651 case VSW_RDX_ACK_RECV: 1652 case VSW_RDX_NACK_RECV: 1653 if (!(state & VSW_RDX_INFO_SENT)) { 1654 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1655 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1656 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1657 return (1); 1658 } else 1659 state &= ~VSW_RDX_INFO_SENT; 1660 break; 1661 1662 case VSW_MCST_INFO_RECV: 1663 if (phase < VSW_MILESTONE3) { 1664 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1665 " when in state %d\n", ldcp->ldc_id, phase); 1666 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1667 return (1); 1668 } 1669 break; 1670 1671 default: 1672 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1673 ldcp->ldc_id, flag); 1674 return (1); 1675 } 1676 1677 if (dir == INBOUND) 1678 ldcp->lane_in.lstate = state; 1679 else 1680 ldcp->lane_out.lstate = state; 1681 1682 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1683 1684 return (0); 1685 } 1686 1687 void 1688 vsw_next_milestone(vsw_ldc_t *ldcp) 1689 { 1690 vsw_t *vswp = ldcp->ldc_vswp; 1691 vsw_port_t *portp = ldcp->ldc_port; 1692 lane_t *lane_out = &ldcp->lane_out; 1693 lane_t *lane_in = &ldcp->lane_in; 1694 1695 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1696 ldcp->ldc_id, ldcp->hphase); 1697 1698 DUMP_FLAGS(lane_in->lstate); 1699 DUMP_FLAGS(lane_out->lstate); 1700 1701 switch (ldcp->hphase) { 1702 1703 case VSW_MILESTONE0: 1704 /* 1705 * If we haven't started to handshake with our peer, 1706 * start to do so now. 1707 */ 1708 if (lane_out->lstate == 0) { 1709 D2(vswp, "%s: (chan %lld) starting handshake " 1710 "with peer", __func__, ldcp->ldc_id); 1711 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1712 } 1713 1714 /* 1715 * Only way to pass this milestone is to have successfully 1716 * negotiated version info. 1717 */ 1718 if ((lane_in->lstate & VSW_VER_ACK_SENT) && 1719 (lane_out->lstate & VSW_VER_ACK_RECV)) { 1720 1721 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1722 __func__, ldcp->ldc_id); 1723 1724 vsw_set_vnet_proto_ops(ldcp); 1725 1726 /* 1727 * Next milestone is passed when attribute 1728 * information has been successfully exchanged. 1729 */ 1730 ldcp->hphase = VSW_MILESTONE1; 1731 vsw_send_attr(ldcp); 1732 1733 } 1734 break; 1735 1736 case VSW_MILESTONE1: 1737 /* 1738 * Only way to pass this milestone is to have successfully 1739 * negotiated attribute information, in both directions. 1740 */ 1741 if (!((lane_in->lstate & VSW_ATTR_ACK_SENT) && 1742 (lane_out->lstate & VSW_ATTR_ACK_RECV))) { 1743 break; 1744 } 1745 1746 ldcp->hphase = VSW_MILESTONE2; 1747 1748 /* 1749 * If the peer device has said it wishes to 1750 * use descriptor rings then we send it our ring 1751 * info, otherwise we just set up a private ring 1752 * which we use an internal buffer 1753 */ 1754 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1755 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 1756 (VSW_VER_LT(ldcp, 1, 2) && 1757 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 1758 vsw_send_dring_info(ldcp); 1759 break; 1760 } 1761 1762 /* 1763 * The peer doesn't operate in dring mode; we 1764 * can simply fallthru to the RDX phase from 1765 * here. 1766 */ 1767 /*FALLTHRU*/ 1768 1769 case VSW_MILESTONE2: 1770 /* 1771 * If peer has indicated in its attribute message that 1772 * it wishes to use descriptor rings then the only way 1773 * to pass this milestone is for us to have received 1774 * valid dring info. 1775 * 1776 * If peer is not using descriptor rings then just fall 1777 * through. 1778 */ 1779 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1780 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 1781 (VSW_VER_LT(ldcp, 1, 2) && 1782 (lane_in->xfer_mode == 1783 VIO_DRING_MODE_V1_0))) { 1784 if (!(lane_in->lstate & VSW_DRING_ACK_SENT)) 1785 break; 1786 } 1787 1788 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1789 __func__, ldcp->ldc_id); 1790 1791 ldcp->hphase = VSW_MILESTONE3; 1792 vsw_send_rdx(ldcp); 1793 break; 1794 1795 case VSW_MILESTONE3: 1796 /* 1797 * Pass this milestone when all paramaters have been 1798 * successfully exchanged and RDX sent in both directions. 1799 * 1800 * Mark the relevant lane as available to transmit data. In 1801 * RxDringData mode, lane_in is associated with transmit and 1802 * lane_out is associated with receive. It is the reverse in 1803 * TxDring mode. 1804 */ 1805 if ((lane_out->lstate & VSW_RDX_ACK_SENT) && 1806 (lane_in->lstate & VSW_RDX_ACK_RECV)) { 1807 1808 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1809 __func__, ldcp->ldc_id); 1810 D2(vswp, "%s: ** handshake complete (0x%llx : " 1811 "0x%llx) **", __func__, lane_in->lstate, 1812 lane_out->lstate); 1813 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 1814 lane_in->lstate |= VSW_LANE_ACTIVE; 1815 } else { 1816 lane_out->lstate |= VSW_LANE_ACTIVE; 1817 } 1818 ldcp->hphase = VSW_MILESTONE4; 1819 ldcp->hcnt = 0; 1820 DISPLAY_STATE(); 1821 /* Start HIO if enabled and capable */ 1822 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1823 D2(vswp, "%s: start HybridIO setup", __func__); 1824 vsw_hio_start(vswp, ldcp); 1825 } 1826 1827 if (ldcp->pls_negotiated == B_TRUE) { 1828 /* 1829 * The vnet device has negotiated to get phys 1830 * link updates. Now that the handshake with 1831 * the vnet device is complete, send an initial 1832 * update with the current physical link state. 1833 */ 1834 vsw_send_physlink_msg(ldcp, 1835 vswp->phys_link_state); 1836 } 1837 1838 } else { 1839 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1840 __func__, lane_in->lstate, 1841 lane_out->lstate); 1842 } 1843 break; 1844 1845 case VSW_MILESTONE4: 1846 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1847 ldcp->ldc_id); 1848 break; 1849 1850 default: 1851 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1852 ldcp->ldc_id, ldcp->hphase); 1853 } 1854 1855 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1856 ldcp->hphase); 1857 } 1858 1859 /* 1860 * Check if major version is supported. 1861 * 1862 * Returns 0 if finds supported major number, and if necessary 1863 * adjusts the minor field. 1864 * 1865 * Returns 1 if can't match major number exactly. Sets mjor/minor 1866 * to next lowest support values, or to zero if no other values possible. 1867 */ 1868 static int 1869 vsw_supported_version(vio_ver_msg_t *vp) 1870 { 1871 int i; 1872 1873 D1(NULL, "vsw_supported_version: enter"); 1874 1875 for (i = 0; i < VSW_NUM_VER; i++) { 1876 if (vsw_versions[i].ver_major == vp->ver_major) { 1877 /* 1878 * Matching or lower major version found. Update 1879 * minor number if necessary. 1880 */ 1881 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1882 D2(NULL, "%s: adjusting minor value from %d " 1883 "to %d", __func__, vp->ver_minor, 1884 vsw_versions[i].ver_minor); 1885 vp->ver_minor = vsw_versions[i].ver_minor; 1886 } 1887 1888 return (0); 1889 } 1890 1891 /* 1892 * If the message contains a higher major version number, set 1893 * the message's major/minor versions to the current values 1894 * and return false, so this message will get resent with 1895 * these values. 1896 */ 1897 if (vsw_versions[i].ver_major < vp->ver_major) { 1898 D2(NULL, "%s: adjusting major and minor " 1899 "values to %d, %d\n", 1900 __func__, vsw_versions[i].ver_major, 1901 vsw_versions[i].ver_minor); 1902 vp->ver_major = vsw_versions[i].ver_major; 1903 vp->ver_minor = vsw_versions[i].ver_minor; 1904 return (1); 1905 } 1906 } 1907 1908 /* No match was possible, zero out fields */ 1909 vp->ver_major = 0; 1910 vp->ver_minor = 0; 1911 1912 D1(NULL, "vsw_supported_version: exit"); 1913 1914 return (1); 1915 } 1916 1917 /* 1918 * Set vnet-protocol-version dependent functions based on version. 1919 */ 1920 static void 1921 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1922 { 1923 vsw_t *vswp = ldcp->ldc_vswp; 1924 lane_t *lp = &ldcp->lane_out; 1925 1926 /* 1927 * Setup the appropriate dring data processing routine and any 1928 * associated thread based on the version. 1929 * 1930 * In versions < 1.6, we support only TxDring mode. In this mode, the 1931 * msg worker thread processes all types of VIO msgs (ctrl and data). 1932 * 1933 * In versions >= 1.6, we also support RxDringData mode. In this mode, 1934 * the rcv worker thread processes dring data messages (msgtype: 1935 * VIO_TYPE_DATA, subtype: VIO_SUBTYPE_INFO, env: VIO_DRING_DATA). The 1936 * rest of the data messages (including acks) and ctrl messages are 1937 * handled directly by the callback (intr) thread. 1938 * 1939 * However, for versions >= 1.6, we could still fallback to TxDring 1940 * mode. This could happen if RxDringData mode has been disabled (see 1941 * vsw_dring_mode) on this guest or on the peer guest. This info is 1942 * determined as part of attr exchange phase of handshake. Hence, we 1943 * setup these pointers for v1.6 after attr msg phase completes during 1944 * handshake. 1945 */ 1946 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 1947 /* 1948 * Set data dring mode for vsw_send_attr(). We setup msg worker 1949 * thread in TxDring mode or rcv worker thread in RxDringData 1950 * mode when attr phase of handshake completes. 1951 */ 1952 if (vsw_dring_mode == VIO_RX_DRING_DATA) { 1953 lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING); 1954 } else { 1955 lp->dring_mode = VIO_TX_DRING; 1956 } 1957 } else { 1958 lp->dring_mode = VIO_TX_DRING; 1959 } 1960 1961 /* 1962 * Setup the MTU for attribute negotiation based on the version. 1963 */ 1964 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 1965 /* 1966 * If the version negotiated with peer is >= 1.4(Jumbo Frame 1967 * Support), set the mtu in our attributes to max_frame_size. 1968 */ 1969 lp->mtu = vswp->max_frame_size; 1970 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 1971 /* 1972 * If the version negotiated with peer is == 1.3 (Vlan Tag 1973 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 1974 */ 1975 lp->mtu = ETHERMAX + VLAN_TAGSZ; 1976 } else { 1977 vsw_port_t *portp = ldcp->ldc_port; 1978 /* 1979 * Pre-1.3 peers expect max frame size of ETHERMAX. 1980 * We can negotiate that size with those peers provided only 1981 * pvid is defined for our peer and there are no vids. Then we 1982 * can send/recv only untagged frames of max size ETHERMAX. 1983 * Note that pvid of the peer can be different, as vsw has to 1984 * serve the vnet in that vlan even if itself is not assigned 1985 * to that vlan. 1986 */ 1987 if (portp->nvids == 0) { 1988 lp->mtu = ETHERMAX; 1989 } 1990 } 1991 1992 /* 1993 * Setup version dependent data processing functions. 1994 */ 1995 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 1996 /* Versions >= 1.2 */ 1997 1998 if (VSW_PRI_ETH_DEFINED(vswp)) { 1999 /* 2000 * enable priority routines and pkt mode only if 2001 * at least one pri-eth-type is specified in MD. 2002 */ 2003 ldcp->tx = vsw_ldctx_pri; 2004 ldcp->rx_pktdata = vsw_process_pkt_data; 2005 2006 /* set xfer mode for vsw_send_attr() */ 2007 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2008 } else { 2009 /* no priority eth types defined in MD */ 2010 2011 ldcp->tx = vsw_ldctx; 2012 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2013 2014 /* set xfer mode for vsw_send_attr() */ 2015 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2016 } 2017 2018 } else { 2019 /* Versions prior to 1.2 */ 2020 2021 vsw_reset_vnet_proto_ops(ldcp); 2022 } 2023 } 2024 2025 /* 2026 * Reset vnet-protocol-version dependent functions to v1.0. 2027 */ 2028 static void 2029 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2030 { 2031 lane_t *lp = &ldcp->lane_out; 2032 2033 ldcp->tx = vsw_ldctx; 2034 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2035 2036 /* set xfer mode for vsw_send_attr() */ 2037 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2038 } 2039 2040 static void 2041 vsw_process_evt_read(vsw_ldc_t *ldcp) 2042 { 2043 if (ldcp->msg_thread != NULL) { 2044 /* 2045 * TxDring mode; wakeup message worker 2046 * thread to process the VIO messages. 2047 */ 2048 mutex_exit(&ldcp->ldc_cblock); 2049 mutex_enter(&ldcp->msg_thr_lock); 2050 if (!(ldcp->msg_thr_flags & VSW_WTHR_DATARCVD)) { 2051 ldcp->msg_thr_flags |= VSW_WTHR_DATARCVD; 2052 cv_signal(&ldcp->msg_thr_cv); 2053 } 2054 mutex_exit(&ldcp->msg_thr_lock); 2055 mutex_enter(&ldcp->ldc_cblock); 2056 } else { 2057 /* 2058 * We invoke vsw_process_pkt() in the context of the LDC 2059 * callback (vsw_ldc_cb()) during handshake, until the dring 2060 * mode is negotiated. After the dring mode is negotiated, the 2061 * msgs are processed by the msg worker thread (above case) if 2062 * the dring mode is TxDring. Otherwise (in RxDringData mode) 2063 * we continue to process the msgs directly in the callback 2064 * context. 2065 */ 2066 vsw_process_pkt(ldcp); 2067 } 2068 } 2069 2070 /* 2071 * Main routine for processing messages received over LDC. 2072 */ 2073 void 2074 vsw_process_pkt(void *arg) 2075 { 2076 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2077 vsw_t *vswp = ldcp->ldc_vswp; 2078 size_t msglen; 2079 vio_msg_tag_t *tagp; 2080 uint64_t *ldcmsg; 2081 int rv = 0; 2082 2083 2084 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2085 2086 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2087 2088 ldcmsg = ldcp->ldcmsg; 2089 /* 2090 * If channel is up read messages until channel is empty. 2091 */ 2092 do { 2093 msglen = ldcp->msglen; 2094 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2095 2096 if (rv != 0) { 2097 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2098 __func__, ldcp->ldc_id, rv, msglen); 2099 } 2100 2101 /* channel has been reset */ 2102 if (rv == ECONNRESET) { 2103 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2104 break; 2105 } 2106 2107 if (msglen == 0) { 2108 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2109 ldcp->ldc_id); 2110 break; 2111 } 2112 2113 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2114 ldcp->ldc_id, msglen); 2115 2116 /* 2117 * Figure out what sort of packet we have gotten by 2118 * examining the msg tag, and then switch it appropriately. 2119 */ 2120 tagp = (vio_msg_tag_t *)ldcmsg; 2121 2122 switch (tagp->vio_msgtype) { 2123 case VIO_TYPE_CTRL: 2124 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp, msglen); 2125 break; 2126 case VIO_TYPE_DATA: 2127 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2128 break; 2129 case VIO_TYPE_ERR: 2130 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2131 break; 2132 default: 2133 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2134 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2135 break; 2136 } 2137 } while (msglen); 2138 2139 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2140 } 2141 2142 /* 2143 * Dispatch a task to process a VIO control message. 2144 */ 2145 static void 2146 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp, 2147 int msglen) 2148 { 2149 vsw_ctrl_task_t *ctaskp = NULL; 2150 vsw_port_t *port = ldcp->ldc_port; 2151 vsw_t *vswp = port->p_vswp; 2152 2153 D1(vswp, "%s: enter", __func__); 2154 2155 /* 2156 * We need to handle RDX ACK messages in-band as once they 2157 * are exchanged it is possible that we will get an 2158 * immediate (legitimate) data packet. 2159 */ 2160 if ((tagp->vio_subtype_env == VIO_RDX) && 2161 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2162 2163 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2164 return; 2165 2166 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2167 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2168 "(ostate 0x%llx : hphase %d)", __func__, 2169 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2170 vsw_next_milestone(ldcp); 2171 return; 2172 } 2173 2174 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2175 2176 if (ctaskp == NULL) { 2177 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2178 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2179 return; 2180 } 2181 2182 ctaskp->ldcp = ldcp; 2183 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, msglen); 2184 ctaskp->hss_id = ldcp->hss_id; 2185 2186 /* 2187 * Dispatch task to processing taskq if port is not in 2188 * the process of being detached. 2189 */ 2190 mutex_enter(&port->state_lock); 2191 if (port->state == VSW_PORT_INIT) { 2192 if ((vswp->taskq_p == NULL) || 2193 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2194 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2195 mutex_exit(&port->state_lock); 2196 DERR(vswp, "%s: unable to dispatch task to taskq", 2197 __func__); 2198 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2199 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2200 return; 2201 } 2202 } else { 2203 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2204 DWARN(vswp, "%s: port %d detaching, not dispatching " 2205 "task", __func__, port->p_instance); 2206 } 2207 2208 mutex_exit(&port->state_lock); 2209 2210 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2211 ldcp->ldc_id); 2212 D1(vswp, "%s: exit", __func__); 2213 } 2214 2215 /* 2216 * Process a VIO ctrl message. Invoked from taskq. 2217 */ 2218 static void 2219 vsw_process_ctrl_pkt(void *arg) 2220 { 2221 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2222 vsw_ldc_t *ldcp = ctaskp->ldcp; 2223 vsw_t *vswp = ldcp->ldc_vswp; 2224 vio_msg_tag_t tag; 2225 uint16_t env; 2226 2227 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2228 2229 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2230 env = tag.vio_subtype_env; 2231 2232 /* stale pkt check */ 2233 if (ctaskp->hss_id < ldcp->hss_id) { 2234 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2235 " (%ld) handshake session", __func__, ctaskp->hss_id); 2236 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2237 return; 2238 } 2239 2240 /* session id check */ 2241 if (ldcp->session_status & VSW_PEER_SESSION) { 2242 if (ldcp->peer_session != tag.vio_sid) { 2243 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2244 __func__, ldcp->ldc_id, tag.vio_sid); 2245 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2246 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2247 return; 2248 } 2249 } 2250 2251 /* 2252 * Switch on vio_subtype envelope, then let lower routines 2253 * decide if its an INFO, ACK or NACK packet. 2254 */ 2255 switch (env) { 2256 case VIO_VER_INFO: 2257 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2258 break; 2259 case VIO_DRING_REG: 2260 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2261 break; 2262 case VIO_DRING_UNREG: 2263 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2264 break; 2265 case VIO_ATTR_INFO: 2266 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2267 break; 2268 case VNET_MCAST_INFO: 2269 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2270 break; 2271 case VIO_RDX: 2272 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2273 break; 2274 case VIO_DDS_INFO: 2275 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2276 break; 2277 2278 case VNET_PHYSLINK_INFO: 2279 vsw_process_physlink_msg(ldcp, &ctaskp->pktp); 2280 break; 2281 default: 2282 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2283 } 2284 2285 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2286 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2287 } 2288 2289 /* 2290 * Version negotiation. We can end up here either because our peer 2291 * has responded to a handshake message we have sent it, or our peer 2292 * has initiated a handshake with us. If its the former then can only 2293 * be ACK or NACK, if its the later can only be INFO. 2294 * 2295 * If its an ACK we move to the next stage of the handshake, namely 2296 * attribute exchange. If its a NACK we see if we can specify another 2297 * version, if we can't we stop. 2298 * 2299 * If it is an INFO we reset all params associated with communication 2300 * in that direction over this channel (remember connection is 2301 * essentially 2 independent simplex channels). 2302 */ 2303 void 2304 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2305 { 2306 vio_ver_msg_t *ver_pkt; 2307 vsw_t *vswp = ldcp->ldc_vswp; 2308 2309 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2310 2311 /* 2312 * We know this is a ctrl/version packet so 2313 * cast it into the correct structure. 2314 */ 2315 ver_pkt = (vio_ver_msg_t *)pkt; 2316 2317 switch (ver_pkt->tag.vio_subtype) { 2318 case VIO_SUBTYPE_INFO: 2319 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2320 2321 /* 2322 * Record the session id, which we will use from now 2323 * until we see another VER_INFO msg. Even then the 2324 * session id in most cases will be unchanged, execpt 2325 * if channel was reset. 2326 */ 2327 if ((ldcp->session_status & VSW_PEER_SESSION) && 2328 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2329 DERR(vswp, "%s: updating session id for chan %lld " 2330 "from %llx to %llx", __func__, ldcp->ldc_id, 2331 ldcp->peer_session, ver_pkt->tag.vio_sid); 2332 } 2333 2334 ldcp->peer_session = ver_pkt->tag.vio_sid; 2335 ldcp->session_status |= VSW_PEER_SESSION; 2336 2337 /* Legal message at this time ? */ 2338 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2339 return; 2340 2341 /* 2342 * First check the device class. Currently only expect 2343 * to be talking to a network device. In the future may 2344 * also talk to another switch. 2345 */ 2346 if (ver_pkt->dev_class != VDEV_NETWORK) { 2347 DERR(vswp, "%s: illegal device class %d", __func__, 2348 ver_pkt->dev_class); 2349 2350 ver_pkt->tag.vio_sid = ldcp->local_session; 2351 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2352 2353 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2354 2355 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2356 sizeof (vio_ver_msg_t), B_TRUE); 2357 2358 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2359 vsw_next_milestone(ldcp); 2360 return; 2361 } else { 2362 ldcp->dev_class = ver_pkt->dev_class; 2363 } 2364 2365 /* 2366 * Now check the version. 2367 */ 2368 if (vsw_supported_version(ver_pkt) == 0) { 2369 /* 2370 * Support this major version and possibly 2371 * adjusted minor version. 2372 */ 2373 2374 D2(vswp, "%s: accepted ver %d:%d", __func__, 2375 ver_pkt->ver_major, ver_pkt->ver_minor); 2376 2377 /* Store accepted values */ 2378 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2379 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2380 2381 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2382 2383 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2384 2385 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2386 /* 2387 * Send a version info message 2388 * using the accepted version that 2389 * we are about to ack. Also note that 2390 * we send our ver info before we ack. 2391 * Otherwise, as soon as receiving the 2392 * ack, obp sends attr info msg, which 2393 * breaks vsw_check_flag() invoked 2394 * from vsw_process_ctrl_attr_pkt(); 2395 * as we also need VSW_VER_ACK_RECV to 2396 * be set in lane_out.lstate, before 2397 * we can receive attr info. 2398 */ 2399 vsw_send_ver(ldcp); 2400 } 2401 } else { 2402 /* 2403 * NACK back with the next lower major/minor 2404 * pairing we support (if don't suuport any more 2405 * versions then they will be set to zero. 2406 */ 2407 2408 D2(vswp, "%s: replying with ver %d:%d", __func__, 2409 ver_pkt->ver_major, ver_pkt->ver_minor); 2410 2411 /* Store updated values */ 2412 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2413 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2414 2415 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2416 2417 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2418 } 2419 2420 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2421 ver_pkt->tag.vio_sid = ldcp->local_session; 2422 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2423 sizeof (vio_ver_msg_t), B_TRUE); 2424 2425 vsw_next_milestone(ldcp); 2426 break; 2427 2428 case VIO_SUBTYPE_ACK: 2429 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2430 2431 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2432 return; 2433 2434 /* Store updated values */ 2435 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2436 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2437 2438 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2439 vsw_next_milestone(ldcp); 2440 2441 break; 2442 2443 case VIO_SUBTYPE_NACK: 2444 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2445 2446 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2447 return; 2448 2449 /* 2450 * If our peer sent us a NACK with the ver fields set to 2451 * zero then there is nothing more we can do. Otherwise see 2452 * if we support either the version suggested, or a lesser 2453 * one. 2454 */ 2455 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2456 DERR(vswp, "%s: peer unable to negotiate any " 2457 "further.", __func__); 2458 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2459 vsw_next_milestone(ldcp); 2460 return; 2461 } 2462 2463 /* 2464 * Check to see if we support this major version or 2465 * a lower one. If we don't then maj/min will be set 2466 * to zero. 2467 */ 2468 (void) vsw_supported_version(ver_pkt); 2469 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2470 /* Nothing more we can do */ 2471 DERR(vswp, "%s: version negotiation failed.\n", 2472 __func__); 2473 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2474 vsw_next_milestone(ldcp); 2475 } else { 2476 /* found a supported major version */ 2477 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2478 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2479 2480 D2(vswp, "%s: resending with updated values (%x, %x)", 2481 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2482 2483 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2484 ver_pkt->tag.vio_sid = ldcp->local_session; 2485 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2486 2487 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2488 2489 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2490 sizeof (vio_ver_msg_t), B_TRUE); 2491 2492 vsw_next_milestone(ldcp); 2493 2494 } 2495 break; 2496 2497 default: 2498 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2499 ver_pkt->tag.vio_subtype); 2500 } 2501 2502 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2503 } 2504 2505 static int 2506 vsw_process_attr_info(vsw_ldc_t *ldcp, vnet_attr_msg_t *msg) 2507 { 2508 vsw_t *vswp = ldcp->ldc_vswp; 2509 vsw_port_t *port = ldcp->ldc_port; 2510 struct ether_addr ea; 2511 uint64_t macaddr = 0; 2512 lane_t *lane_out = &ldcp->lane_out; 2513 lane_t *lane_in = &ldcp->lane_in; 2514 uint32_t mtu; 2515 int i; 2516 uint8_t dring_mode; 2517 2518 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2519 2520 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) { 2521 return (1); 2522 } 2523 2524 if ((msg->xfer_mode != VIO_DESC_MODE) && 2525 (msg->xfer_mode != lane_out->xfer_mode)) { 2526 D2(NULL, "%s: unknown mode %x\n", __func__, msg->xfer_mode); 2527 return (1); 2528 } 2529 2530 /* Only support MAC addresses at moment. */ 2531 if ((msg->addr_type != ADDR_TYPE_MAC) || (msg->addr == 0)) { 2532 D2(NULL, "%s: invalid addr_type %x, or address 0x%llx\n", 2533 __func__, msg->addr_type, msg->addr); 2534 return (1); 2535 } 2536 2537 /* 2538 * MAC address supplied by device should match that stored 2539 * in the vsw-port OBP node. Need to decide what to do if they 2540 * don't match, for the moment just warn but don't fail. 2541 */ 2542 vnet_macaddr_ultostr(msg->addr, ea.ether_addr_octet); 2543 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 2544 DERR(NULL, "%s: device supplied address " 2545 "0x%llx doesn't match node address 0x%llx\n", 2546 __func__, msg->addr, port->p_macaddr); 2547 } 2548 2549 /* 2550 * Ack freq only makes sense in pkt mode, in shared 2551 * mode the ring descriptors say whether or not to 2552 * send back an ACK. 2553 */ 2554 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2555 (msg->xfer_mode & VIO_DRING_MODE_V1_2)) || 2556 (VSW_VER_LT(ldcp, 1, 2) && 2557 (msg->xfer_mode == VIO_DRING_MODE_V1_0))) { 2558 if (msg->ack_freq > 0) { 2559 D2(NULL, "%s: non zero ack freq in SHM mode\n", 2560 __func__); 2561 return (1); 2562 } 2563 } 2564 2565 /* 2566 * Process dring mode attribute. 2567 */ 2568 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 2569 /* 2570 * Versions >= 1.6: 2571 * Though we are operating in v1.6 mode, it is possible that 2572 * RxDringData mode has been disabled either on this guest or 2573 * on the peer guest. If so, we revert to pre v1.6 behavior of 2574 * TxDring mode. But this must be agreed upon in both 2575 * directions of attr exchange. We first determine the mode 2576 * that can be negotiated. 2577 */ 2578 if ((msg->options & VIO_RX_DRING_DATA) != 0 && 2579 vsw_dring_mode == VIO_RX_DRING_DATA) { 2580 /* 2581 * The peer is capable of handling RxDringData AND we 2582 * are also capable of it; we enable RxDringData mode 2583 * on this channel. 2584 */ 2585 dring_mode = VIO_RX_DRING_DATA; 2586 } else if ((msg->options & VIO_TX_DRING) != 0) { 2587 /* 2588 * If the peer is capable of TxDring mode, we 2589 * negotiate TxDring mode on this channel. 2590 */ 2591 dring_mode = VIO_TX_DRING; 2592 } else { 2593 /* 2594 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA 2595 * modes. We don't support VIO_RX_DRING mode. 2596 */ 2597 return (1); 2598 } 2599 2600 /* 2601 * If we have received an ack for the attr info that we sent, 2602 * then check if the dring mode matches what the peer had ack'd 2603 * (saved in lane_out). If they don't match, we fail the 2604 * handshake. 2605 */ 2606 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2607 if (msg->options != lane_out->dring_mode) { 2608 /* send NACK */ 2609 return (1); 2610 } 2611 } else { 2612 /* 2613 * Save the negotiated dring mode in our attr 2614 * parameters, so it gets sent in the attr info from us 2615 * to the peer. 2616 */ 2617 lane_out->dring_mode = dring_mode; 2618 } 2619 2620 /* save the negotiated dring mode in the msg to be replied */ 2621 msg->options = dring_mode; 2622 } 2623 2624 /* 2625 * Process MTU attribute. 2626 */ 2627 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2628 /* 2629 * Versions >= 1.4: 2630 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu 2631 * is negotiated down to the minimum of our mtu and peer's mtu. 2632 */ 2633 if (msg->mtu < ETHERMAX) { 2634 return (1); 2635 } 2636 2637 mtu = MIN(msg->mtu, vswp->max_frame_size); 2638 2639 /* 2640 * If we have received an ack for the attr info 2641 * that we sent, then check if the mtu computed 2642 * above matches the mtu that the peer had ack'd 2643 * (saved in local hparams). If they don't 2644 * match, we fail the handshake. 2645 */ 2646 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2647 if (mtu != lane_out->mtu) { 2648 /* send NACK */ 2649 return (1); 2650 } 2651 } else { 2652 /* 2653 * Save the mtu computed above in our 2654 * attr parameters, so it gets sent in 2655 * the attr info from us to the peer. 2656 */ 2657 lane_out->mtu = mtu; 2658 } 2659 2660 /* save the MIN mtu in the msg to be replied */ 2661 msg->mtu = mtu; 2662 } else { 2663 /* Versions < 1.4, mtu must match */ 2664 if (msg->mtu != lane_out->mtu) { 2665 D2(NULL, "%s: invalid MTU (0x%llx)\n", 2666 __func__, msg->mtu); 2667 return (1); 2668 } 2669 } 2670 2671 /* 2672 * Otherwise store attributes for this lane and update 2673 * lane state. 2674 */ 2675 lane_in->mtu = msg->mtu; 2676 lane_in->addr = msg->addr; 2677 lane_in->addr_type = msg->addr_type; 2678 lane_in->xfer_mode = msg->xfer_mode; 2679 lane_in->ack_freq = msg->ack_freq; 2680 lane_in->physlink_update = msg->physlink_update; 2681 lane_in->dring_mode = msg->options; 2682 2683 /* 2684 * Check if the client has requested physlink state updates. 2685 * If there is a physical device bound to this vswitch (L2 2686 * mode), set the ack bits to indicate it is supported. 2687 * Otherwise, set the nack bits. 2688 */ 2689 if (VSW_VER_GTEQ(ldcp, 1, 5)) { /* Protocol ver >= 1.5 */ 2690 2691 /* Does the vnet need phys link state updates ? */ 2692 if ((lane_in->physlink_update & 2693 PHYSLINK_UPDATE_STATE_MASK) == 2694 PHYSLINK_UPDATE_STATE) { 2695 2696 if (vswp->smode & VSW_LAYER2) { 2697 /* is a net-dev assigned to us ? */ 2698 msg->physlink_update = 2699 PHYSLINK_UPDATE_STATE_ACK; 2700 ldcp->pls_negotiated = B_TRUE; 2701 } else { 2702 /* not in L2 mode */ 2703 msg->physlink_update = 2704 PHYSLINK_UPDATE_STATE_NACK; 2705 ldcp->pls_negotiated = B_FALSE; 2706 } 2707 2708 } else { 2709 msg->physlink_update = 2710 PHYSLINK_UPDATE_NONE; 2711 ldcp->pls_negotiated = B_FALSE; 2712 } 2713 2714 } else { 2715 /* 2716 * physlink_update bits are ignored 2717 * if set by clients < v1.5 protocol. 2718 */ 2719 msg->physlink_update = PHYSLINK_UPDATE_NONE; 2720 ldcp->pls_negotiated = B_FALSE; 2721 } 2722 2723 macaddr = lane_in->addr; 2724 for (i = ETHERADDRL - 1; i >= 0; i--) { 2725 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2726 macaddr >>= 8; 2727 } 2728 2729 /* 2730 * Setup device specific xmit routines. Note this could be changed 2731 * further in vsw_send_dring_info() for versions >= 1.6 if operating in 2732 * RxDringData mode. 2733 */ 2734 mutex_enter(&port->tx_lock); 2735 2736 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2737 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2738 (VSW_VER_LT(ldcp, 1, 2) && 2739 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2740 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2741 port->transmit = vsw_dringsend; 2742 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2743 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2744 vsw_create_privring(ldcp); 2745 port->transmit = vsw_descrsend; 2746 lane_out->xfer_mode = VIO_DESC_MODE; 2747 } 2748 2749 /* 2750 * HybridIO is supported only vnet, not by OBP. 2751 * So, set hio_capable to true only when in DRING mode. 2752 */ 2753 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2754 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2755 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2756 } else { 2757 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2758 } 2759 2760 mutex_exit(&port->tx_lock); 2761 2762 return (0); 2763 } 2764 2765 static int 2766 vsw_process_attr_ack(vsw_ldc_t *ldcp, vnet_attr_msg_t *msg) 2767 { 2768 vsw_t *vswp = ldcp->ldc_vswp; 2769 lane_t *lane_out = &ldcp->lane_out; 2770 lane_t *lane_in = &ldcp->lane_in; 2771 2772 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2773 2774 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) { 2775 return (1); 2776 } 2777 2778 /* 2779 * Process dring mode attribute. 2780 */ 2781 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 2782 /* 2783 * Versions >= 1.6: 2784 * The ack msg sent by the peer contains the negotiated dring 2785 * mode between our capability (that we had sent in our attr 2786 * info) and the peer's capability. 2787 */ 2788 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2789 /* 2790 * If we have sent an ack for the attr info msg from 2791 * the peer, check if the dring mode that was 2792 * negotiated then (saved in lane_out) matches the 2793 * mode that the peer has ack'd. If they don't match, 2794 * we fail the handshake. 2795 */ 2796 if (lane_out->dring_mode != msg->options) { 2797 return (1); 2798 } 2799 } else { 2800 if ((msg->options & lane_out->dring_mode) == 0) { 2801 /* 2802 * Peer ack'd with a mode that we don't 2803 * support; we fail the handshake. 2804 */ 2805 return (1); 2806 } 2807 if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA)) 2808 == (VIO_TX_DRING|VIO_RX_DRING_DATA)) { 2809 /* 2810 * Peer must ack with only one negotiated mode. 2811 * Otherwise fail handshake. 2812 */ 2813 return (1); 2814 } 2815 2816 /* 2817 * Save the negotiated mode, so we can validate it when 2818 * we receive attr info from the peer. 2819 */ 2820 lane_out->dring_mode = msg->options; 2821 } 2822 } 2823 2824 /* 2825 * Process MTU attribute. 2826 */ 2827 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2828 /* 2829 * Versions >= 1.4: 2830 * The ack msg sent by the peer contains the minimum of 2831 * our mtu (that we had sent in our attr info) and the 2832 * peer's mtu. 2833 * 2834 * If we have sent an ack for the attr info msg from 2835 * the peer, check if the mtu that was computed then 2836 * (saved in lane_out params) matches the mtu that the 2837 * peer has ack'd. If they don't match, we fail the 2838 * handshake. 2839 */ 2840 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2841 if (lane_out->mtu != msg->mtu) { 2842 return (1); 2843 } 2844 } else { 2845 /* 2846 * If the mtu ack'd by the peer is > our mtu 2847 * fail handshake. Otherwise, save the mtu, so 2848 * we can validate it when we receive attr info 2849 * from our peer. 2850 */ 2851 if (msg->mtu <= lane_out->mtu) { 2852 lane_out->mtu = msg->mtu; 2853 } else { 2854 return (1); 2855 } 2856 } 2857 } 2858 2859 return (0); 2860 } 2861 2862 /* 2863 * Process an attribute packet. We can end up here either because our peer 2864 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2865 * peer has sent us an attribute INFO message 2866 * 2867 * If its an ACK we then move to the next stage of the handshake which 2868 * is to send our descriptor ring info to our peer. If its a NACK then 2869 * there is nothing more we can (currently) do. 2870 * 2871 * If we get a valid/acceptable INFO packet (and we have already negotiated 2872 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2873 * NACK back and reset channel state to INACTIV. 2874 * 2875 * FUTURE: in time we will probably negotiate over attributes, but for 2876 * the moment unacceptable attributes are regarded as a fatal error. 2877 * 2878 */ 2879 void 2880 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2881 { 2882 vnet_attr_msg_t *attr_pkt; 2883 vsw_t *vswp = ldcp->ldc_vswp; 2884 lane_t *lane_out = &ldcp->lane_out; 2885 lane_t *lane_in = &ldcp->lane_in; 2886 int rv; 2887 2888 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2889 2890 /* 2891 * We know this is a ctrl/attr packet so 2892 * cast it into the correct structure. 2893 */ 2894 attr_pkt = (vnet_attr_msg_t *)pkt; 2895 2896 switch (attr_pkt->tag.vio_subtype) { 2897 case VIO_SUBTYPE_INFO: 2898 2899 rv = vsw_process_attr_info(ldcp, attr_pkt); 2900 if (rv != 0) { 2901 vsw_free_lane_resources(ldcp, INBOUND); 2902 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2903 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2904 } else { 2905 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2906 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2907 } 2908 attr_pkt->tag.vio_sid = ldcp->local_session; 2909 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2910 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2911 sizeof (vnet_attr_msg_t), B_TRUE); 2912 vsw_next_milestone(ldcp); 2913 break; 2914 2915 case VIO_SUBTYPE_ACK: 2916 2917 rv = vsw_process_attr_ack(ldcp, attr_pkt); 2918 if (rv != 0) { 2919 return; 2920 } 2921 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2922 vsw_next_milestone(ldcp); 2923 break; 2924 2925 case VIO_SUBTYPE_NACK: 2926 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2927 2928 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2929 return; 2930 2931 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2932 vsw_next_milestone(ldcp); 2933 break; 2934 2935 default: 2936 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2937 attr_pkt->tag.vio_subtype); 2938 } 2939 2940 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2941 } 2942 2943 static int 2944 vsw_process_dring_reg_info(vsw_ldc_t *ldcp, vio_msg_tag_t *tagp) 2945 { 2946 int rv; 2947 vsw_t *vswp = ldcp->ldc_vswp; 2948 lane_t *lp = &ldcp->lane_out; 2949 dring_info_t *dp = NULL; 2950 2951 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2952 2953 rv = vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV); 2954 if (rv != 0) { 2955 return (1); 2956 } 2957 2958 if (VSW_VER_GTEQ(ldcp, 1, 6) && 2959 (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) { 2960 /* 2961 * The earlier version of Solaris vnet driver doesn't set the 2962 * option (VIO_TX_DRING in its case) correctly in its dring reg 2963 * message. We workaround that here by doing the check only 2964 * for versions >= v1.6. 2965 */ 2966 DWARN(vswp, "%s(%lld): Rcvd dring reg option (%d), " 2967 "negotiated mode (%d)\n", __func__, ldcp->ldc_id, 2968 ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode); 2969 return (1); 2970 } 2971 2972 /* 2973 * Map dring exported by the peer. 2974 */ 2975 dp = vsw_map_dring(ldcp, (void *)tagp); 2976 if (dp == NULL) { 2977 return (1); 2978 } 2979 2980 /* 2981 * Map data buffers exported by the peer if we are in RxDringData mode. 2982 */ 2983 if (lp->dring_mode == VIO_RX_DRING_DATA) { 2984 rv = vsw_map_data(ldcp, dp, (void *)tagp); 2985 if (rv != 0) { 2986 vsw_unmap_dring(ldcp); 2987 return (1); 2988 } 2989 } 2990 2991 return (0); 2992 } 2993 2994 static int 2995 vsw_process_dring_reg_ack(vsw_ldc_t *ldcp, vio_msg_tag_t *tagp) 2996 { 2997 vsw_t *vswp = ldcp->ldc_vswp; 2998 dring_info_t *dp; 2999 3000 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3001 3002 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) { 3003 return (1); 3004 } 3005 3006 dp = ldcp->lane_out.dringp; 3007 3008 /* save dring_ident acked by peer */ 3009 dp->ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident; 3010 3011 return (0); 3012 } 3013 3014 /* 3015 * Process a dring info packet. We can end up here either because our peer 3016 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3017 * peer has sent us a dring INFO message. 3018 * 3019 * If we get a valid/acceptable INFO packet (and we have already negotiated 3020 * a version) we ACK back and update the lane state, otherwise we NACK back. 3021 * 3022 * FUTURE: nothing to stop client from sending us info on multiple dring's 3023 * but for the moment we will just use the first one we are given. 3024 * 3025 */ 3026 void 3027 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3028 { 3029 int rv; 3030 int msgsize; 3031 dring_info_t *dp; 3032 vio_msg_tag_t *tagp = (vio_msg_tag_t *)pkt; 3033 vsw_t *vswp = ldcp->ldc_vswp; 3034 lane_t *lane_out = &ldcp->lane_out; 3035 lane_t *lane_in = &ldcp->lane_in; 3036 3037 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3038 3039 switch (tagp->vio_subtype) { 3040 case VIO_SUBTYPE_INFO: 3041 rv = vsw_process_dring_reg_info(ldcp, tagp); 3042 if (rv != 0) { 3043 vsw_free_lane_resources(ldcp, INBOUND); 3044 tagp->vio_subtype = VIO_SUBTYPE_NACK; 3045 lane_in->lstate |= VSW_DRING_NACK_SENT; 3046 } else { 3047 tagp->vio_subtype = VIO_SUBTYPE_ACK; 3048 lane_in->lstate |= VSW_DRING_ACK_SENT; 3049 } 3050 tagp->vio_sid = ldcp->local_session; 3051 DUMP_TAG_PTR(tagp); 3052 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 3053 dp = lane_in->dringp; 3054 msgsize = 3055 VNET_DRING_REG_EXT_MSG_SIZE(dp->data_ncookies); 3056 } else { 3057 msgsize = sizeof (vio_dring_reg_msg_t); 3058 } 3059 (void) vsw_send_msg(ldcp, (void *)tagp, msgsize, B_TRUE); 3060 vsw_next_milestone(ldcp); 3061 break; 3062 3063 case VIO_SUBTYPE_ACK: 3064 rv = vsw_process_dring_reg_ack(ldcp, tagp); 3065 if (rv != 0) { 3066 return; 3067 } 3068 lane_out->lstate |= VSW_DRING_ACK_RECV; 3069 vsw_next_milestone(ldcp); 3070 break; 3071 3072 case VIO_SUBTYPE_NACK: 3073 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3074 3075 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3076 return; 3077 3078 lane_out->lstate |= VSW_DRING_NACK_RECV; 3079 vsw_next_milestone(ldcp); 3080 break; 3081 3082 default: 3083 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3084 tagp->vio_subtype); 3085 } 3086 3087 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3088 } 3089 3090 /* 3091 * Process a request from peer to unregister a dring. 3092 * 3093 * For the moment we just restart the handshake if our 3094 * peer endpoint attempts to unregister a dring. 3095 */ 3096 void 3097 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3098 { 3099 vsw_t *vswp = ldcp->ldc_vswp; 3100 vio_dring_unreg_msg_t *dring_pkt; 3101 3102 /* 3103 * We know this is a ctrl/dring packet so 3104 * cast it into the correct structure. 3105 */ 3106 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3107 3108 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3109 3110 switch (dring_pkt->tag.vio_subtype) { 3111 case VIO_SUBTYPE_INFO: 3112 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3113 3114 DWARN(vswp, "%s: restarting handshake..", __func__); 3115 break; 3116 3117 case VIO_SUBTYPE_ACK: 3118 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3119 3120 DWARN(vswp, "%s: restarting handshake..", __func__); 3121 break; 3122 3123 case VIO_SUBTYPE_NACK: 3124 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3125 3126 DWARN(vswp, "%s: restarting handshake..", __func__); 3127 break; 3128 3129 default: 3130 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3131 dring_pkt->tag.vio_subtype); 3132 } 3133 3134 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3135 3136 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3137 } 3138 3139 #define SND_MCST_NACK(ldcp, pkt) \ 3140 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3141 pkt->tag.vio_sid = ldcp->local_session; \ 3142 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3143 sizeof (vnet_mcast_msg_t), B_TRUE); 3144 3145 /* 3146 * Process a multicast request from a vnet. 3147 * 3148 * Vnet's specify a multicast address that they are interested in. This 3149 * address is used as a key into the hash table which forms the multicast 3150 * forwarding database (mFDB). 3151 * 3152 * The table keys are the multicast addresses, while the table entries 3153 * are pointers to lists of ports which wish to receive packets for the 3154 * specified multicast address. 3155 * 3156 * When a multicast packet is being switched we use the address as a key 3157 * into the hash table, and then walk the appropriate port list forwarding 3158 * the pkt to each port in turn. 3159 * 3160 * If a vnet is no longer interested in a particular multicast grouping 3161 * we simply find the correct location in the hash table and then delete 3162 * the relevant port from the port list. 3163 * 3164 * To deal with the case whereby a port is being deleted without first 3165 * removing itself from the lists in the hash table, we maintain a list 3166 * of multicast addresses the port has registered an interest in, within 3167 * the port structure itself. We then simply walk that list of addresses 3168 * using them as keys into the hash table and remove the port from the 3169 * appropriate lists. 3170 */ 3171 static void 3172 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3173 { 3174 vnet_mcast_msg_t *mcst_pkt; 3175 vsw_port_t *port = ldcp->ldc_port; 3176 vsw_t *vswp = ldcp->ldc_vswp; 3177 int i; 3178 3179 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3180 3181 /* 3182 * We know this is a ctrl/mcast packet so 3183 * cast it into the correct structure. 3184 */ 3185 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3186 3187 switch (mcst_pkt->tag.vio_subtype) { 3188 case VIO_SUBTYPE_INFO: 3189 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3190 3191 /* 3192 * Check if in correct state to receive a multicast 3193 * message (i.e. handshake complete). If not reset 3194 * the handshake. 3195 */ 3196 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3197 return; 3198 3199 /* 3200 * Before attempting to add or remove address check 3201 * that they are valid multicast addresses. 3202 * If not, then NACK back. 3203 */ 3204 for (i = 0; i < mcst_pkt->count; i++) { 3205 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3206 DERR(vswp, "%s: invalid multicast address", 3207 __func__); 3208 SND_MCST_NACK(ldcp, mcst_pkt); 3209 return; 3210 } 3211 } 3212 3213 /* 3214 * Now add/remove the addresses. If this fails we 3215 * NACK back. 3216 */ 3217 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3218 SND_MCST_NACK(ldcp, mcst_pkt); 3219 return; 3220 } 3221 3222 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3223 mcst_pkt->tag.vio_sid = ldcp->local_session; 3224 3225 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3226 3227 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3228 sizeof (vnet_mcast_msg_t), B_TRUE); 3229 break; 3230 3231 case VIO_SUBTYPE_ACK: 3232 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3233 3234 /* 3235 * We shouldn't ever get a multicast ACK message as 3236 * at the moment we never request multicast addresses 3237 * to be set on some other device. This may change in 3238 * the future if we have cascading switches. 3239 */ 3240 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3241 return; 3242 3243 /* Do nothing */ 3244 break; 3245 3246 case VIO_SUBTYPE_NACK: 3247 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3248 3249 /* 3250 * We shouldn't get a multicast NACK packet for the 3251 * same reasons as we shouldn't get a ACK packet. 3252 */ 3253 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3254 return; 3255 3256 /* Do nothing */ 3257 break; 3258 3259 default: 3260 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3261 mcst_pkt->tag.vio_subtype); 3262 } 3263 3264 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3265 } 3266 3267 static void 3268 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3269 { 3270 vio_rdx_msg_t *rdx_pkt; 3271 vsw_t *vswp = ldcp->ldc_vswp; 3272 3273 /* 3274 * We know this is a ctrl/rdx packet so 3275 * cast it into the correct structure. 3276 */ 3277 rdx_pkt = (vio_rdx_msg_t *)pkt; 3278 3279 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3280 3281 switch (rdx_pkt->tag.vio_subtype) { 3282 case VIO_SUBTYPE_INFO: 3283 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3284 3285 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3286 return; 3287 3288 rdx_pkt->tag.vio_sid = ldcp->local_session; 3289 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3290 3291 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3292 3293 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3294 3295 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3296 sizeof (vio_rdx_msg_t), B_TRUE); 3297 3298 vsw_next_milestone(ldcp); 3299 break; 3300 3301 case VIO_SUBTYPE_ACK: 3302 /* 3303 * Should be handled in-band by callback handler. 3304 */ 3305 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3306 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3307 break; 3308 3309 case VIO_SUBTYPE_NACK: 3310 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3311 3312 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3313 return; 3314 3315 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3316 vsw_next_milestone(ldcp); 3317 break; 3318 3319 default: 3320 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3321 rdx_pkt->tag.vio_subtype); 3322 } 3323 3324 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3325 } 3326 3327 static void 3328 vsw_process_physlink_msg(vsw_ldc_t *ldcp, void *pkt) 3329 { 3330 vnet_physlink_msg_t *msgp; 3331 vsw_t *vswp = ldcp->ldc_vswp; 3332 3333 msgp = (vnet_physlink_msg_t *)pkt; 3334 3335 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3336 3337 switch (msgp->tag.vio_subtype) { 3338 case VIO_SUBTYPE_INFO: 3339 3340 /* vsw shouldn't recv physlink info */ 3341 DWARN(vswp, "%s: Unexpected VIO_SUBTYPE_INFO", __func__); 3342 break; 3343 3344 case VIO_SUBTYPE_ACK: 3345 3346 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3347 break; 3348 3349 case VIO_SUBTYPE_NACK: 3350 3351 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3352 break; 3353 3354 default: 3355 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3356 msgp->tag.vio_subtype); 3357 } 3358 3359 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3360 } 3361 3362 static void 3363 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3364 uint32_t msglen) 3365 { 3366 uint16_t env = tagp->vio_subtype_env; 3367 vsw_t *vswp = ldcp->ldc_vswp; 3368 lane_t *lp = &ldcp->lane_out; 3369 uint8_t dring_mode = lp->dring_mode; 3370 3371 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3372 3373 /* session id check */ 3374 if (ldcp->session_status & VSW_PEER_SESSION) { 3375 if (ldcp->peer_session != tagp->vio_sid) { 3376 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3377 __func__, ldcp->ldc_id, tagp->vio_sid); 3378 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3379 return; 3380 } 3381 } 3382 3383 /* 3384 * It is an error for us to be getting data packets 3385 * before the handshake has completed. 3386 */ 3387 if (ldcp->hphase != VSW_MILESTONE4) { 3388 DERR(vswp, "%s: got data packet before handshake complete " 3389 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3390 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3391 DUMP_FLAGS(ldcp->lane_in.lstate); 3392 DUMP_FLAGS(ldcp->lane_out.lstate); 3393 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3394 return; 3395 } 3396 if (dring_mode == VIO_TX_DRING) { 3397 /* 3398 * To reduce the locking contention, release the ldc_cblock 3399 * here and re-acquire it once we are done receiving packets. 3400 * We do this only in TxDring mode to allow further callbaks to 3401 * continue while the msg worker thread processes the messages. 3402 * In RxDringData mode, we process the messages in the callback 3403 * itself and wake up rcv worker thread to process only data 3404 * info messages. 3405 */ 3406 mutex_exit(&ldcp->ldc_cblock); 3407 mutex_enter(&ldcp->ldc_rxlock); 3408 } 3409 3410 /* 3411 * Switch on vio_subtype envelope, then let lower routines 3412 * decide if its an INFO, ACK or NACK packet. 3413 */ 3414 if (env == VIO_DRING_DATA) { 3415 ldcp->rx_dringdata(ldcp, dpkt); 3416 } else if (env == VIO_PKT_DATA) { 3417 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3418 } else if (env == VIO_DESC_DATA) { 3419 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3420 } else { 3421 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", 3422 __func__, env); 3423 } 3424 3425 if (dring_mode == VIO_TX_DRING) { 3426 mutex_exit(&ldcp->ldc_rxlock); 3427 mutex_enter(&ldcp->ldc_cblock); 3428 } 3429 3430 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3431 } 3432 3433 /* 3434 * dummy pkt data handler function for vnet protocol version 1.0 3435 */ 3436 static void 3437 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3438 { 3439 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3440 } 3441 3442 /* 3443 * This function handles raw pkt data messages received over the channel. 3444 * Currently, only priority-eth-type frames are received through this mechanism. 3445 * In this case, the frame(data) is present within the message itself which 3446 * is copied into an mblk before switching it. 3447 */ 3448 static void 3449 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3450 { 3451 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3452 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3453 uint32_t size; 3454 mblk_t *mp; 3455 vio_mblk_t *vmp; 3456 vsw_t *vswp = ldcp->ldc_vswp; 3457 vgen_stats_t *statsp = &ldcp->ldc_stats; 3458 lane_t *lp = &ldcp->lane_out; 3459 3460 size = msglen - VIO_PKT_DATA_HDRSIZE; 3461 if (size < ETHERMIN || size > lp->mtu) { 3462 (void) atomic_inc_32(&statsp->rx_pri_fail); 3463 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3464 ldcp->ldc_id, size); 3465 return; 3466 } 3467 3468 vmp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3469 if (vmp == NULL) { 3470 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3471 if (mp == NULL) { 3472 (void) atomic_inc_32(&statsp->rx_pri_fail); 3473 DWARN(vswp, "%s(%lld) allocb failure, " 3474 "unable to process priority frame\n", __func__, 3475 ldcp->ldc_id); 3476 return; 3477 } 3478 } else { 3479 mp = vmp->mp; 3480 } 3481 3482 /* skip over the extra space for vlan tag */ 3483 mp->b_rptr += VLAN_TAGSZ; 3484 3485 /* copy the frame from the payload of raw data msg into the mblk */ 3486 bcopy(dpkt->data, mp->b_rptr, size); 3487 mp->b_wptr = mp->b_rptr + size; 3488 3489 if (vmp != NULL) { 3490 vmp->state = VIO_MBLK_HAS_DATA; 3491 } 3492 3493 /* update stats */ 3494 (void) atomic_inc_64(&statsp->rx_pri_packets); 3495 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3496 3497 /* 3498 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3499 */ 3500 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3501 3502 /* switch the frame to destination */ 3503 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3504 } 3505 3506 /* 3507 * Process an in-band descriptor message (most likely from 3508 * OBP). 3509 */ 3510 static void 3511 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3512 { 3513 vnet_ibnd_desc_t *ibnd_desc; 3514 dring_info_t *dp = NULL; 3515 vsw_private_desc_t *priv_addr = NULL; 3516 vsw_t *vswp = ldcp->ldc_vswp; 3517 mblk_t *mp = NULL; 3518 size_t nbytes = 0; 3519 size_t off = 0; 3520 uint64_t idx = 0; 3521 uint32_t num = 1, len, datalen = 0; 3522 uint64_t ncookies = 0; 3523 int i, rv; 3524 int j = 0; 3525 3526 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3527 3528 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3529 3530 switch (ibnd_desc->hdr.tag.vio_subtype) { 3531 case VIO_SUBTYPE_INFO: 3532 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3533 3534 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3535 return; 3536 3537 /* 3538 * Data is padded to align on a 8 byte boundary, 3539 * nbytes is actual data length, i.e. minus that 3540 * padding. 3541 */ 3542 datalen = ibnd_desc->nbytes; 3543 3544 D2(vswp, "%s(%lld): processing inband desc : " 3545 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3546 3547 ncookies = ibnd_desc->ncookies; 3548 3549 /* 3550 * allocb(9F) returns an aligned data block. We 3551 * need to ensure that we ask ldc for an aligned 3552 * number of bytes also. 3553 */ 3554 nbytes = datalen; 3555 if (nbytes & 0x7) { 3556 off = 8 - (nbytes & 0x7); 3557 nbytes += off; 3558 } 3559 3560 /* alloc extra space for VLAN_TAG */ 3561 mp = allocb(datalen + 8, BPRI_MED); 3562 if (mp == NULL) { 3563 DERR(vswp, "%s(%lld): allocb failed", 3564 __func__, ldcp->ldc_id); 3565 ldcp->ldc_stats.rx_allocb_fail++; 3566 return; 3567 } 3568 3569 /* skip over the extra space for VLAN_TAG */ 3570 mp->b_rptr += 8; 3571 3572 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3573 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3574 LDC_COPY_IN); 3575 3576 if (rv != 0) { 3577 DERR(vswp, "%s(%d): unable to copy in data from " 3578 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3579 freemsg(mp); 3580 ldcp->ldc_stats.ierrors++; 3581 return; 3582 } 3583 3584 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3585 __func__, ldcp->ldc_id, nbytes, ncookies); 3586 3587 /* point to the actual end of data */ 3588 mp->b_wptr = mp->b_rptr + datalen; 3589 ldcp->ldc_stats.ipackets++; 3590 ldcp->ldc_stats.rbytes += datalen; 3591 3592 /* 3593 * We ACK back every in-band descriptor message we process 3594 */ 3595 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3596 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3597 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3598 sizeof (vnet_ibnd_desc_t), B_TRUE); 3599 3600 /* 3601 * there is extra space alloc'd for VLAN_TAG 3602 */ 3603 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3604 3605 /* send the packet to be switched */ 3606 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3607 ldcp->ldc_port, NULL); 3608 3609 break; 3610 3611 case VIO_SUBTYPE_ACK: 3612 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3613 3614 /* Verify the ACK is valid */ 3615 idx = ibnd_desc->hdr.desc_handle; 3616 3617 if (idx >= vsw_num_descriptors) { 3618 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3619 "(idx %ld)", vswp->instance, idx); 3620 return; 3621 } 3622 3623 if ((dp = ldcp->lane_out.dringp) == NULL) { 3624 DERR(vswp, "%s: no dring found", __func__); 3625 return; 3626 } 3627 3628 len = dp->num_descriptors; 3629 /* 3630 * If the descriptor we are being ACK'ed for is not the 3631 * one we expected, then pkts were lost somwhere, either 3632 * when we tried to send a msg, or a previous ACK msg from 3633 * our peer. In either case we now reclaim the descriptors 3634 * in the range from the last ACK we received up to the 3635 * current ACK. 3636 */ 3637 if (idx != dp->last_ack_recv) { 3638 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3639 __func__, dp->last_ack_recv, idx); 3640 num = idx >= dp->last_ack_recv ? 3641 idx - dp->last_ack_recv + 1: 3642 (len - dp->last_ack_recv + 1) + idx; 3643 } 3644 3645 /* 3646 * When we sent the in-band message to our peer we 3647 * marked the copy in our private ring as READY. We now 3648 * check that the descriptor we are being ACK'ed for is in 3649 * fact READY, i.e. it is one we have shared with our peer. 3650 * 3651 * If its not we flag an error, but still reset the descr 3652 * back to FREE. 3653 */ 3654 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3655 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3656 mutex_enter(&priv_addr->dstate_lock); 3657 if (priv_addr->dstate != VIO_DESC_READY) { 3658 DERR(vswp, "%s: (%ld) desc at index %ld not " 3659 "READY (0x%lx)", __func__, 3660 ldcp->ldc_id, idx, priv_addr->dstate); 3661 DERR(vswp, "%s: bound %d: ncookies %ld : " 3662 "datalen %ld", __func__, 3663 priv_addr->bound, priv_addr->ncookies, 3664 priv_addr->datalen); 3665 } 3666 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3667 ldcp->ldc_id, idx); 3668 /* release resources associated with sent msg */ 3669 priv_addr->datalen = 0; 3670 priv_addr->dstate = VIO_DESC_FREE; 3671 mutex_exit(&priv_addr->dstate_lock); 3672 } 3673 /* update to next expected value */ 3674 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3675 3676 break; 3677 3678 case VIO_SUBTYPE_NACK: 3679 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3680 3681 /* 3682 * We should only get a NACK if our peer doesn't like 3683 * something about a message we have sent it. If this 3684 * happens we just release the resources associated with 3685 * the message. (We are relying on higher layers to decide 3686 * whether or not to resend. 3687 */ 3688 3689 /* limit check */ 3690 idx = ibnd_desc->hdr.desc_handle; 3691 3692 if (idx >= vsw_num_descriptors) { 3693 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3694 __func__, idx); 3695 return; 3696 } 3697 3698 if ((dp = ldcp->lane_out.dringp) == NULL) { 3699 DERR(vswp, "%s: no dring found", __func__); 3700 return; 3701 } 3702 3703 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3704 3705 /* move to correct location in ring */ 3706 priv_addr += idx; 3707 3708 /* release resources associated with sent msg */ 3709 mutex_enter(&priv_addr->dstate_lock); 3710 priv_addr->datalen = 0; 3711 priv_addr->dstate = VIO_DESC_FREE; 3712 mutex_exit(&priv_addr->dstate_lock); 3713 3714 break; 3715 3716 default: 3717 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3718 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3719 } 3720 3721 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3722 } 3723 3724 static void 3725 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3726 { 3727 _NOTE(ARGUNUSED(epkt)) 3728 3729 vsw_t *vswp = ldcp->ldc_vswp; 3730 uint16_t env = tagp->vio_subtype_env; 3731 3732 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3733 3734 /* 3735 * Error vio_subtypes have yet to be defined. So for 3736 * the moment we can't do anything. 3737 */ 3738 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3739 3740 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3741 } 3742 3743 /* transmit the packet over the given port */ 3744 int 3745 vsw_portsend(vsw_port_t *port, mblk_t *mp) 3746 { 3747 mblk_t *mpt; 3748 int count; 3749 vsw_ldc_t *ldcp = port->ldcp; 3750 int status = 0; 3751 3752 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3753 if (count != 0) { 3754 status = ldcp->tx(ldcp, mp, mpt, count); 3755 } 3756 return (status); 3757 } 3758 3759 /* 3760 * Break up frames into 2 seperate chains: normal and 3761 * priority, based on the frame type. The number of 3762 * priority frames is also counted and returned. 3763 * 3764 * Params: 3765 * vswp: pointer to the instance of vsw 3766 * np: head of packet chain to be broken 3767 * npt: tail of packet chain to be broken 3768 * 3769 * Returns: 3770 * np: head of normal data packets 3771 * npt: tail of normal data packets 3772 * hp: head of high priority packets 3773 * hpt: tail of high priority packets 3774 */ 3775 static uint32_t 3776 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 3777 mblk_t **hp, mblk_t **hpt) 3778 { 3779 mblk_t *tmp = NULL; 3780 mblk_t *smp = NULL; 3781 mblk_t *hmp = NULL; /* high prio pkts head */ 3782 mblk_t *hmpt = NULL; /* high prio pkts tail */ 3783 mblk_t *nmp = NULL; /* normal pkts head */ 3784 mblk_t *nmpt = NULL; /* normal pkts tail */ 3785 uint32_t count = 0; 3786 int i; 3787 struct ether_header *ehp; 3788 uint32_t num_types; 3789 uint16_t *types; 3790 3791 tmp = *np; 3792 while (tmp != NULL) { 3793 3794 smp = tmp; 3795 tmp = tmp->b_next; 3796 smp->b_next = NULL; 3797 smp->b_prev = NULL; 3798 3799 ehp = (struct ether_header *)smp->b_rptr; 3800 num_types = vswp->pri_num_types; 3801 types = vswp->pri_types; 3802 for (i = 0; i < num_types; i++) { 3803 if (ehp->ether_type == types[i]) { 3804 /* high priority frame */ 3805 3806 if (hmp != NULL) { 3807 hmpt->b_next = smp; 3808 hmpt = smp; 3809 } else { 3810 hmp = hmpt = smp; 3811 } 3812 count++; 3813 break; 3814 } 3815 } 3816 if (i == num_types) { 3817 /* normal data frame */ 3818 3819 if (nmp != NULL) { 3820 nmpt->b_next = smp; 3821 nmpt = smp; 3822 } else { 3823 nmp = nmpt = smp; 3824 } 3825 } 3826 } 3827 3828 *hp = hmp; 3829 *hpt = hmpt; 3830 *np = nmp; 3831 *npt = nmpt; 3832 3833 return (count); 3834 } 3835 3836 /* 3837 * Wrapper function to transmit normal and/or priority frames over the channel. 3838 */ 3839 static int 3840 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3841 { 3842 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3843 mblk_t *tmp; 3844 mblk_t *smp; 3845 mblk_t *hmp; /* high prio pkts head */ 3846 mblk_t *hmpt; /* high prio pkts tail */ 3847 mblk_t *nmp; /* normal pkts head */ 3848 mblk_t *nmpt; /* normal pkts tail */ 3849 uint32_t n = 0; 3850 vsw_t *vswp = ldcp->ldc_vswp; 3851 3852 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 3853 ASSERT(count != 0); 3854 3855 nmp = mp; 3856 nmpt = mpt; 3857 3858 /* gather any priority frames from the chain of packets */ 3859 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 3860 3861 /* transmit priority frames */ 3862 tmp = hmp; 3863 while (tmp != NULL) { 3864 smp = tmp; 3865 tmp = tmp->b_next; 3866 smp->b_next = NULL; 3867 vsw_ldcsend_pkt(ldcp, smp); 3868 } 3869 3870 count -= n; 3871 3872 if (count == 0) { 3873 /* no normal data frames to process */ 3874 return (0); 3875 } 3876 3877 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 3878 } 3879 3880 /* 3881 * Wrapper function to transmit normal frames over the channel. 3882 */ 3883 static int 3884 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3885 { 3886 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3887 mblk_t *tmp = NULL; 3888 3889 ASSERT(count != 0); 3890 /* 3891 * If the TX thread is enabled, then queue the 3892 * ordinary frames and signal the tx thread. 3893 */ 3894 if (ldcp->tx_thread != NULL) { 3895 3896 mutex_enter(&ldcp->tx_thr_lock); 3897 3898 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 3899 /* 3900 * If we reached queue limit, 3901 * do not queue new packets, 3902 * drop them. 3903 */ 3904 ldcp->ldc_stats.tx_qfull += count; 3905 mutex_exit(&ldcp->tx_thr_lock); 3906 freemsgchain(mp); 3907 goto exit; 3908 } 3909 if (ldcp->tx_mhead == NULL) { 3910 ldcp->tx_mhead = mp; 3911 ldcp->tx_mtail = mpt; 3912 cv_signal(&ldcp->tx_thr_cv); 3913 } else { 3914 ldcp->tx_mtail->b_next = mp; 3915 ldcp->tx_mtail = mpt; 3916 } 3917 ldcp->tx_cnt += count; 3918 mutex_exit(&ldcp->tx_thr_lock); 3919 } else { 3920 while (mp != NULL) { 3921 tmp = mp->b_next; 3922 mp->b_next = mp->b_prev = NULL; 3923 (void) vsw_ldcsend(ldcp, mp, 1); 3924 mp = tmp; 3925 } 3926 } 3927 3928 exit: 3929 return (0); 3930 } 3931 3932 /* 3933 * This function transmits the frame in the payload of a raw data 3934 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 3935 * send special frames with high priorities, without going through 3936 * the normal data path which uses descriptor ring mechanism. 3937 */ 3938 static void 3939 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 3940 { 3941 vio_raw_data_msg_t *pkt; 3942 mblk_t *bp; 3943 mblk_t *nmp = NULL; 3944 vio_mblk_t *vmp; 3945 caddr_t dst; 3946 uint32_t mblksz; 3947 uint32_t size; 3948 uint32_t nbytes; 3949 int rv; 3950 vsw_t *vswp = ldcp->ldc_vswp; 3951 vgen_stats_t *statsp = &ldcp->ldc_stats; 3952 3953 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3954 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3955 (void) atomic_inc_32(&statsp->tx_pri_fail); 3956 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 3957 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 3958 ldcp->lane_out.lstate); 3959 goto send_pkt_exit; 3960 } 3961 3962 size = msgsize(mp); 3963 3964 /* frame size bigger than available payload len of raw data msg ? */ 3965 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 3966 (void) atomic_inc_32(&statsp->tx_pri_fail); 3967 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3968 ldcp->ldc_id, size); 3969 goto send_pkt_exit; 3970 } 3971 3972 if (size < ETHERMIN) 3973 size = ETHERMIN; 3974 3975 /* alloc space for a raw data message */ 3976 vmp = vio_allocb(vswp->pri_tx_vmp); 3977 if (vmp == NULL) { 3978 (void) atomic_inc_32(&statsp->tx_pri_fail); 3979 DWARN(vswp, "vio_allocb failed\n"); 3980 goto send_pkt_exit; 3981 } else { 3982 nmp = vmp->mp; 3983 } 3984 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 3985 3986 /* copy frame into the payload of raw data message */ 3987 dst = (caddr_t)pkt->data; 3988 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3989 mblksz = MBLKL(bp); 3990 bcopy(bp->b_rptr, dst, mblksz); 3991 dst += mblksz; 3992 } 3993 3994 vmp->state = VIO_MBLK_HAS_DATA; 3995 3996 /* setup the raw data msg */ 3997 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 3998 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3999 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4000 pkt->tag.vio_sid = ldcp->local_session; 4001 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4002 4003 /* send the msg over ldc */ 4004 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4005 if (rv != 0) { 4006 (void) atomic_inc_32(&statsp->tx_pri_fail); 4007 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4008 ldcp->ldc_id); 4009 goto send_pkt_exit; 4010 } 4011 4012 /* update stats */ 4013 (void) atomic_inc_64(&statsp->tx_pri_packets); 4014 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4015 4016 send_pkt_exit: 4017 if (nmp != NULL) 4018 freemsg(nmp); 4019 freemsg(mp); 4020 } 4021 4022 /* 4023 * Transmit the packet over the given LDC channel. 4024 * 4025 * The 'retries' argument indicates how many times a packet 4026 * is retried before it is dropped. Note, the retry is done 4027 * only for a resource related failure, for all other failures 4028 * the packet is dropped immediately. 4029 */ 4030 static int 4031 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4032 { 4033 int i; 4034 int rc; 4035 int status = 0; 4036 vsw_port_t *port = ldcp->ldc_port; 4037 dring_info_t *dp = NULL; 4038 lane_t *lp = &ldcp->lane_out; 4039 4040 for (i = 0; i < retries; ) { 4041 /* 4042 * Send the message out using the appropriate 4043 * transmit function which will free mblock when it 4044 * is finished with it. 4045 */ 4046 mutex_enter(&port->tx_lock); 4047 if (port->transmit != NULL) { 4048 status = (*port->transmit)(ldcp, mp); 4049 } 4050 if (status == LDC_TX_SUCCESS) { 4051 mutex_exit(&port->tx_lock); 4052 break; 4053 } 4054 i++; /* increment the counter here */ 4055 4056 /* If its the last retry, then update the oerror */ 4057 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4058 ldcp->ldc_stats.oerrors++; 4059 } 4060 mutex_exit(&port->tx_lock); 4061 4062 if (status != LDC_TX_NORESOURCES) { 4063 /* 4064 * No retrying required for errors un-related 4065 * to resources. 4066 */ 4067 break; 4068 } 4069 if (((dp = ldcp->lane_out.dringp) != NULL) && 4070 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4071 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4072 ((VSW_VER_LT(ldcp, 1, 2) && 4073 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4074 4075 /* Need to reclaim in TxDring mode. */ 4076 if (lp->dring_mode == VIO_TX_DRING) { 4077 rc = vsw_reclaim_dring(dp, dp->end_idx); 4078 } 4079 4080 } else { 4081 /* 4082 * If there is no dring or the xfer_mode is 4083 * set to DESC_MODE(ie., OBP), then simply break here. 4084 */ 4085 break; 4086 } 4087 4088 /* 4089 * Delay only if none were reclaimed 4090 * and its not the last retry. 4091 */ 4092 if ((rc == 0) && (i < retries)) { 4093 delay(drv_usectohz(vsw_ldc_tx_delay)); 4094 } 4095 } 4096 freemsg(mp); 4097 return (status); 4098 } 4099 4100 /* 4101 * Send an in-band descriptor message over ldc. 4102 */ 4103 static int 4104 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4105 { 4106 vsw_t *vswp = ldcp->ldc_vswp; 4107 vnet_ibnd_desc_t ibnd_msg; 4108 vsw_private_desc_t *priv_desc = NULL; 4109 dring_info_t *dp = NULL; 4110 size_t n, size = 0; 4111 caddr_t bufp; 4112 mblk_t *bp; 4113 int idx, i; 4114 int status = LDC_TX_SUCCESS; 4115 static int warn_msg = 1; 4116 lane_t *lp = &ldcp->lane_out; 4117 4118 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4119 4120 ASSERT(mp != NULL); 4121 4122 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4123 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4124 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4125 __func__, ldcp->ldc_id, ldcp->ldc_status, 4126 ldcp->lane_out.lstate); 4127 ldcp->ldc_stats.oerrors++; 4128 return (LDC_TX_FAILURE); 4129 } 4130 4131 /* 4132 * The dring here is as an internal buffer, 4133 * rather than a transfer channel. 4134 */ 4135 if ((dp = ldcp->lane_out.dringp) == NULL) { 4136 DERR(vswp, "%s(%lld): no dring for outbound lane", 4137 __func__, ldcp->ldc_id); 4138 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4139 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4140 ldcp->ldc_stats.oerrors++; 4141 return (LDC_TX_FAILURE); 4142 } 4143 4144 size = msgsize(mp); 4145 if (size > (size_t)lp->mtu) { 4146 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4147 ldcp->ldc_id, size); 4148 ldcp->ldc_stats.oerrors++; 4149 return (LDC_TX_FAILURE); 4150 } 4151 4152 /* 4153 * Find a free descriptor in our buffer ring 4154 */ 4155 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4156 if (warn_msg) { 4157 DERR(vswp, "%s(%lld): no descriptor available for ring " 4158 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4159 warn_msg = 0; 4160 } 4161 4162 /* nothing more we can do */ 4163 status = LDC_TX_NORESOURCES; 4164 goto vsw_descrsend_free_exit; 4165 } else { 4166 D2(vswp, "%s(%lld): free private descriptor found at pos " 4167 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4168 warn_msg = 1; 4169 } 4170 4171 /* copy data into the descriptor */ 4172 bufp = priv_desc->datap; 4173 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4174 n = MBLKL(bp); 4175 bcopy(bp->b_rptr, bufp, n); 4176 bufp += n; 4177 } 4178 4179 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4180 4181 /* create and send the in-band descp msg */ 4182 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4183 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4184 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4185 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4186 4187 /* 4188 * Copy the mem cookies describing the data from the 4189 * private region of the descriptor ring into the inband 4190 * descriptor. 4191 */ 4192 for (i = 0; i < priv_desc->ncookies; i++) { 4193 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4194 sizeof (ldc_mem_cookie_t)); 4195 } 4196 4197 ibnd_msg.hdr.desc_handle = idx; 4198 ibnd_msg.ncookies = priv_desc->ncookies; 4199 ibnd_msg.nbytes = size; 4200 4201 ldcp->ldc_stats.opackets++; 4202 ldcp->ldc_stats.obytes += size; 4203 4204 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4205 sizeof (vnet_ibnd_desc_t), B_TRUE); 4206 4207 vsw_descrsend_free_exit: 4208 4209 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4210 return (status); 4211 } 4212 4213 static void 4214 vsw_send_ver(void *arg) 4215 { 4216 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4217 vsw_t *vswp = ldcp->ldc_vswp; 4218 lane_t *lp = &ldcp->lane_out; 4219 vio_ver_msg_t ver_msg; 4220 4221 D1(vswp, "%s enter", __func__); 4222 4223 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4224 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4225 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4226 ver_msg.tag.vio_sid = ldcp->local_session; 4227 4228 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4229 ver_msg.ver_major = vsw_versions[0].ver_major; 4230 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4231 } else { 4232 /* use the major,minor that we've ack'd */ 4233 lane_t *lpi = &ldcp->lane_in; 4234 ver_msg.ver_major = lpi->ver_major; 4235 ver_msg.ver_minor = lpi->ver_minor; 4236 } 4237 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4238 4239 lp->lstate |= VSW_VER_INFO_SENT; 4240 lp->ver_major = ver_msg.ver_major; 4241 lp->ver_minor = ver_msg.ver_minor; 4242 4243 DUMP_TAG(ver_msg.tag); 4244 4245 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4246 4247 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4248 } 4249 4250 static void 4251 vsw_send_attr(vsw_ldc_t *ldcp) 4252 { 4253 vsw_t *vswp = ldcp->ldc_vswp; 4254 lane_t *lp = &ldcp->lane_out; 4255 vnet_attr_msg_t attr_msg; 4256 4257 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4258 4259 /* 4260 * Subtype is set to INFO by default 4261 */ 4262 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4263 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4264 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4265 attr_msg.tag.vio_sid = ldcp->local_session; 4266 4267 /* payload copied from default settings for lane */ 4268 attr_msg.mtu = lp->mtu; 4269 attr_msg.addr_type = lp->addr_type; 4270 attr_msg.xfer_mode = lp->xfer_mode; 4271 attr_msg.ack_freq = lp->xfer_mode; 4272 attr_msg.options = lp->dring_mode; 4273 4274 READ_ENTER(&vswp->if_lockrw); 4275 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4276 RW_EXIT(&vswp->if_lockrw); 4277 4278 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4279 4280 DUMP_TAG(attr_msg.tag); 4281 4282 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4283 4284 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4285 } 4286 4287 static void 4288 vsw_send_dring_info(vsw_ldc_t *ldcp) 4289 { 4290 int msgsize; 4291 void *msg; 4292 vsw_t *vswp = ldcp->ldc_vswp; 4293 vsw_port_t *port = ldcp->ldc_port; 4294 lane_t *lp = &ldcp->lane_out; 4295 vgen_stats_t *statsp = &ldcp->ldc_stats; 4296 4297 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4298 4299 /* dring mode has been negotiated in attr phase; save in stats */ 4300 statsp->dring_mode = lp->dring_mode; 4301 4302 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4303 /* 4304 * Change the transmit routine for RxDringData mode. 4305 */ 4306 port->transmit = vsw_dringsend_shm; 4307 msg = (void *) vsw_create_rx_dring_info(ldcp); 4308 if (msg == NULL) { 4309 return; 4310 } 4311 msgsize = 4312 VNET_DRING_REG_EXT_MSG_SIZE(lp->dringp->data_ncookies); 4313 ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 4314 vsw_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 4315 ldcp->rx_dringdata = vsw_process_dringdata_shm; 4316 } else { 4317 msg = (void *) vsw_create_tx_dring_info(ldcp); 4318 if (msg == NULL) { 4319 return; 4320 } 4321 msgsize = sizeof (vio_dring_reg_msg_t); 4322 ldcp->msg_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 4323 vsw_ldc_msg_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 4324 ldcp->rx_dringdata = vsw_process_dringdata; 4325 } 4326 4327 lp->lstate |= VSW_DRING_INFO_SENT; 4328 DUMP_TAG_PTR((vio_msg_tag_t *)msg); 4329 (void) vsw_send_msg(ldcp, msg, msgsize, B_TRUE); 4330 kmem_free(msg, msgsize); 4331 4332 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4333 } 4334 4335 static void 4336 vsw_send_rdx(vsw_ldc_t *ldcp) 4337 { 4338 vsw_t *vswp = ldcp->ldc_vswp; 4339 vio_rdx_msg_t rdx_msg; 4340 4341 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4342 4343 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4344 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4345 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4346 rdx_msg.tag.vio_sid = ldcp->local_session; 4347 4348 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4349 4350 DUMP_TAG(rdx_msg.tag); 4351 4352 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4353 4354 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4355 } 4356 4357 /* 4358 * Remove the specified address from the list of address maintained 4359 * in this port node. 4360 */ 4361 mcst_addr_t * 4362 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4363 { 4364 vsw_t *vswp = NULL; 4365 vsw_port_t *port = NULL; 4366 mcst_addr_t *prev_p = NULL; 4367 mcst_addr_t *curr_p = NULL; 4368 4369 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4370 __func__, devtype, addr); 4371 4372 if (devtype == VSW_VNETPORT) { 4373 port = (vsw_port_t *)arg; 4374 mutex_enter(&port->mca_lock); 4375 prev_p = curr_p = port->mcap; 4376 } else { 4377 vswp = (vsw_t *)arg; 4378 mutex_enter(&vswp->mca_lock); 4379 prev_p = curr_p = vswp->mcap; 4380 } 4381 4382 while (curr_p != NULL) { 4383 if (curr_p->addr == addr) { 4384 D2(NULL, "%s: address found", __func__); 4385 /* match found */ 4386 if (prev_p == curr_p) { 4387 /* list head */ 4388 if (devtype == VSW_VNETPORT) 4389 port->mcap = curr_p->nextp; 4390 else 4391 vswp->mcap = curr_p->nextp; 4392 } else { 4393 prev_p->nextp = curr_p->nextp; 4394 } 4395 break; 4396 } else { 4397 prev_p = curr_p; 4398 curr_p = curr_p->nextp; 4399 } 4400 } 4401 4402 if (devtype == VSW_VNETPORT) 4403 mutex_exit(&port->mca_lock); 4404 else 4405 mutex_exit(&vswp->mca_lock); 4406 4407 D1(NULL, "%s: exit", __func__); 4408 4409 return (curr_p); 4410 } 4411 4412 /* 4413 * Create a ring consisting of just a private portion and link 4414 * it into the list of rings for the outbound lane. 4415 * 4416 * These type of rings are used primarily for temporary data 4417 * storage (i.e. as data buffers). 4418 */ 4419 void 4420 vsw_create_privring(vsw_ldc_t *ldcp) 4421 { 4422 dring_info_t *dp; 4423 vsw_t *vswp = ldcp->ldc_vswp; 4424 4425 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4426 4427 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4428 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4429 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4430 ldcp->lane_out.dringp = dp; 4431 4432 /* no public section */ 4433 dp->pub_addr = NULL; 4434 dp->priv_addr = kmem_zalloc( 4435 (sizeof (vsw_private_desc_t) * vsw_num_descriptors), KM_SLEEP); 4436 dp->num_descriptors = vsw_num_descriptors; 4437 4438 if (vsw_setup_tx_dring(ldcp, dp)) { 4439 DERR(vswp, "%s: setup of ring failed", __func__); 4440 vsw_destroy_tx_dring(ldcp); 4441 return; 4442 } 4443 4444 /* haven't used any descriptors yet */ 4445 dp->end_idx = 0; 4446 dp->restart_reqd = B_TRUE; 4447 4448 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4449 } 4450 4451 /* 4452 * Set the default lane attributes. These are copied into 4453 * the attr msg we send to our peer. If they are not acceptable 4454 * then (currently) the handshake ends. 4455 */ 4456 static void 4457 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 4458 { 4459 bzero(lp, sizeof (lane_t)); 4460 4461 READ_ENTER(&vswp->if_lockrw); 4462 ether_copy(&(vswp->if_addr), &(lp->addr)); 4463 RW_EXIT(&vswp->if_lockrw); 4464 4465 lp->mtu = vswp->max_frame_size; 4466 lp->addr_type = ADDR_TYPE_MAC; 4467 lp->xfer_mode = VIO_DRING_MODE_V1_0; 4468 lp->ack_freq = 0; /* for shared mode */ 4469 lp->seq_num = VNET_ISS; 4470 } 4471 4472 /* 4473 * Map the descriptor ring exported by the peer. 4474 */ 4475 static dring_info_t * 4476 vsw_map_dring(vsw_ldc_t *ldcp, void *pkt) 4477 { 4478 dring_info_t *dp = NULL; 4479 lane_t *lp = &ldcp->lane_out; 4480 4481 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4482 /* 4483 * In RxDringData mode, dring that we map in 4484 * becomes our transmit descriptor ring. 4485 */ 4486 dp = vsw_map_tx_dring(ldcp, pkt); 4487 } else { 4488 /* 4489 * In TxDring mode, dring that we map in 4490 * becomes our receive descriptor ring. 4491 */ 4492 dp = vsw_map_rx_dring(ldcp, pkt); 4493 } 4494 return (dp); 4495 } 4496 4497 /* 4498 * Common dring mapping function used in both TxDring and RxDringData modes. 4499 */ 4500 dring_info_t * 4501 vsw_map_dring_cmn(vsw_ldc_t *ldcp, vio_dring_reg_msg_t *dring_pkt) 4502 { 4503 int rv; 4504 dring_info_t *dp; 4505 ldc_mem_info_t minfo; 4506 vsw_t *vswp = ldcp->ldc_vswp; 4507 4508 /* 4509 * If the dring params are unacceptable then we NACK back. 4510 */ 4511 if ((dring_pkt->num_descriptors == 0) || 4512 (dring_pkt->descriptor_size == 0) || 4513 (dring_pkt->ncookies != 1)) { 4514 DERR(vswp, "%s (%lld): invalid dring info", 4515 __func__, ldcp->ldc_id); 4516 return (NULL); 4517 } 4518 4519 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4520 4521 dp->num_descriptors = dring_pkt->num_descriptors; 4522 dp->descriptor_size = dring_pkt->descriptor_size; 4523 dp->options = dring_pkt->options; 4524 dp->dring_ncookies = dring_pkt->ncookies; 4525 4526 /* 4527 * Note: should only get one cookie. Enforced in 4528 * the ldc layer. 4529 */ 4530 bcopy(&dring_pkt->cookie[0], &dp->dring_cookie[0], 4531 sizeof (ldc_mem_cookie_t)); 4532 4533 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dp->dring_cookie[0], 4534 dp->dring_ncookies, dp->num_descriptors, dp->descriptor_size, 4535 LDC_DIRECT_MAP, &(dp->dring_handle)); 4536 if (rv != 0) { 4537 goto fail; 4538 } 4539 4540 rv = ldc_mem_dring_info(dp->dring_handle, &minfo); 4541 if (rv != 0) { 4542 goto fail; 4543 } 4544 /* store the address of the ring */ 4545 dp->pub_addr = minfo.vaddr; 4546 4547 /* cache the dring mtype */ 4548 dp->dring_mtype = minfo.mtype; 4549 4550 /* no private section as we are importing */ 4551 dp->priv_addr = NULL; 4552 4553 /* 4554 * Using simple mono increasing int for ident at the moment. 4555 */ 4556 dp->ident = ldcp->next_ident; 4557 ldcp->next_ident++; 4558 4559 /* 4560 * Acknowledge it; we send back a unique dring identifier that 4561 * the sending side will use in future to refer to this 4562 * descriptor ring. 4563 */ 4564 dring_pkt->dring_ident = dp->ident; 4565 4566 return (dp); 4567 fail: 4568 if (dp->dring_handle != NULL) { 4569 (void) ldc_mem_dring_unmap(dp->dring_handle); 4570 } 4571 kmem_free(dp, sizeof (*dp)); 4572 return (NULL); 4573 } 4574 4575 /* 4576 * Unmap the descriptor ring exported by the peer. 4577 */ 4578 static void 4579 vsw_unmap_dring(vsw_ldc_t *ldcp) 4580 { 4581 lane_t *lane_out = &ldcp->lane_out; 4582 4583 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 4584 vsw_unmap_tx_dring(ldcp); 4585 } else { 4586 vsw_unmap_rx_dring(ldcp); 4587 } 4588 } 4589 4590 /* 4591 * Map the shared memory data buffer area exported by the peer. 4592 * Used in RxDringData mode only. 4593 */ 4594 static int 4595 vsw_map_data(vsw_ldc_t *ldcp, dring_info_t *dp, void *pkt) 4596 { 4597 int rv; 4598 vio_dring_reg_ext_msg_t *emsg; 4599 vio_dring_reg_msg_t *msg = pkt; 4600 uint8_t *buf = (uint8_t *)msg->cookie; 4601 vsw_t *vswp = ldcp->ldc_vswp; 4602 4603 /* skip over dring cookies */ 4604 ASSERT(msg->ncookies == 1); 4605 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); 4606 4607 emsg = (vio_dring_reg_ext_msg_t *)buf; 4608 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) { 4609 return (1); 4610 } 4611 4612 /* save # of data area cookies */ 4613 dp->data_ncookies = emsg->data_ncookies; 4614 4615 /* save data area size */ 4616 dp->data_sz = emsg->data_area_size; 4617 4618 /* allocate ldc mem handle for data area */ 4619 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &dp->data_handle); 4620 if (rv != 0) { 4621 cmn_err(CE_WARN, "ldc_mem_alloc_handle failed\n"); 4622 DWARN(vswp, "%s (%lld) ldc_mem_alloc_handle() failed: %d\n", 4623 __func__, ldcp->ldc_id, rv); 4624 return (1); 4625 } 4626 4627 /* map the data area */ 4628 rv = ldc_mem_map(dp->data_handle, emsg->data_cookie, 4629 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_R, 4630 (caddr_t *)&dp->data_addr, NULL); 4631 if (rv != 0) { 4632 cmn_err(CE_WARN, "ldc_mem_map failed\n"); 4633 DWARN(vswp, "%s (%lld) ldc_mem_map() failed: %d\n", 4634 __func__, ldcp->ldc_id, rv); 4635 return (1); 4636 } 4637 4638 /* allocate memory for data area cookies */ 4639 dp->data_cookie = kmem_zalloc(emsg->data_ncookies * 4640 sizeof (ldc_mem_cookie_t), KM_SLEEP); 4641 4642 /* save data area cookies */ 4643 bcopy(emsg->data_cookie, dp->data_cookie, 4644 emsg->data_ncookies * sizeof (ldc_mem_cookie_t)); 4645 4646 return (0); 4647 } 4648 4649 /* 4650 * Reset and free all the resources associated with the channel. 4651 */ 4652 static void 4653 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 4654 { 4655 lane_t *lp; 4656 4657 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 4658 4659 if (dir == INBOUND) { 4660 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 4661 " of channel %lld", __func__, ldcp->ldc_id); 4662 lp = &ldcp->lane_in; 4663 } else { 4664 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 4665 " of channel %lld", __func__, ldcp->ldc_id); 4666 lp = &ldcp->lane_out; 4667 } 4668 4669 lp->lstate = VSW_LANE_INACTIV; 4670 lp->seq_num = VNET_ISS; 4671 4672 if (dir == INBOUND) { 4673 /* Unmap the remote dring which is imported from the peer */ 4674 vsw_unmap_dring(ldcp); 4675 } else { 4676 /* Destroy the local dring which is exported to the peer */ 4677 vsw_destroy_dring(ldcp); 4678 } 4679 4680 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 4681 } 4682 4683 /* 4684 * Destroy the descriptor ring. 4685 */ 4686 static void 4687 vsw_destroy_dring(vsw_ldc_t *ldcp) 4688 { 4689 lane_t *lp = &ldcp->lane_out; 4690 4691 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4692 vsw_destroy_rx_dring(ldcp); 4693 } else { 4694 vsw_destroy_tx_dring(ldcp); 4695 } 4696 } 4697 4698 /* 4699 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 4700 * This thread is woken up by the vsw_portsend to transmit 4701 * packets. 4702 */ 4703 static void 4704 vsw_ldc_tx_worker(void *arg) 4705 { 4706 callb_cpr_t cprinfo; 4707 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4708 vsw_t *vswp = ldcp->ldc_vswp; 4709 mblk_t *mp; 4710 mblk_t *tmp; 4711 4712 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 4713 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 4714 "vnet_tx_thread"); 4715 mutex_enter(&ldcp->tx_thr_lock); 4716 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 4717 4718 CALLB_CPR_SAFE_BEGIN(&cprinfo); 4719 /* 4720 * Wait until the data is received or a stop 4721 * request is received. 4722 */ 4723 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 4724 (ldcp->tx_mhead == NULL)) { 4725 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 4726 } 4727 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 4728 4729 /* 4730 * First process the stop request. 4731 */ 4732 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 4733 D2(vswp, "%s(%lld):tx thread stopped\n", 4734 __func__, ldcp->ldc_id); 4735 break; 4736 } 4737 mp = ldcp->tx_mhead; 4738 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 4739 ldcp->tx_cnt = 0; 4740 mutex_exit(&ldcp->tx_thr_lock); 4741 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 4742 __func__, ldcp->ldc_id); 4743 while (mp != NULL) { 4744 tmp = mp->b_next; 4745 mp->b_next = mp->b_prev = NULL; 4746 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 4747 mp = tmp; 4748 } 4749 mutex_enter(&ldcp->tx_thr_lock); 4750 } 4751 4752 /* 4753 * Update the run status and wakeup the thread that 4754 * has sent the stop request. 4755 */ 4756 ldcp->tx_thr_flags &= ~VSW_WTHR_STOP; 4757 ldcp->tx_thread = NULL; 4758 CALLB_CPR_EXIT(&cprinfo); 4759 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 4760 thread_exit(); 4761 } 4762 4763 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 4764 static void 4765 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 4766 { 4767 kt_did_t tid = 0; 4768 vsw_t *vswp = ldcp->ldc_vswp; 4769 4770 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 4771 /* 4772 * Send a stop request by setting the stop flag and 4773 * wait until the receive thread stops. 4774 */ 4775 mutex_enter(&ldcp->tx_thr_lock); 4776 if (ldcp->tx_thread != NULL) { 4777 tid = ldcp->tx_thread->t_did; 4778 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 4779 cv_signal(&ldcp->tx_thr_cv); 4780 } 4781 mutex_exit(&ldcp->tx_thr_lock); 4782 4783 if (tid != 0) { 4784 thread_join(tid); 4785 } 4786 4787 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 4788 } 4789 4790 /* 4791 * Debugging routines 4792 */ 4793 static void 4794 display_state(void) 4795 { 4796 vsw_t *vswp; 4797 vsw_port_list_t *plist; 4798 vsw_port_t *port; 4799 vsw_ldc_t *ldcp; 4800 extern vsw_t *vsw_head; 4801 4802 cmn_err(CE_NOTE, "***** system state *****"); 4803 4804 for (vswp = vsw_head; vswp; vswp = vswp->next) { 4805 plist = &vswp->plist; 4806 READ_ENTER(&plist->lockrw); 4807 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 4808 vswp->instance, plist->num_ports); 4809 4810 for (port = plist->head; port != NULL; port = port->p_next) { 4811 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 4812 port->p_instance, port->num_ldcs); 4813 ldcp = port->ldcp; 4814 cmn_err(CE_CONT, "chan %lu : dev %d : " 4815 "status %d : phase %u\n", 4816 ldcp->ldc_id, ldcp->dev_class, 4817 ldcp->ldc_status, ldcp->hphase); 4818 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 4819 "psession %lu\n", ldcp->ldc_id, 4820 ldcp->local_session, ldcp->peer_session); 4821 4822 cmn_err(CE_CONT, "Inbound lane:\n"); 4823 display_lane(&ldcp->lane_in); 4824 cmn_err(CE_CONT, "Outbound lane:\n"); 4825 display_lane(&ldcp->lane_out); 4826 } 4827 RW_EXIT(&plist->lockrw); 4828 } 4829 cmn_err(CE_NOTE, "***** system state *****"); 4830 } 4831 4832 static void 4833 display_lane(lane_t *lp) 4834 { 4835 dring_info_t *drp = lp->dringp; 4836 4837 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 4838 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 4839 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 4840 lp->addr_type, lp->addr, lp->xfer_mode); 4841 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 4842 4843 cmn_err(CE_CONT, "Dring info:\n"); 4844 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 4845 drp->num_descriptors, drp->descriptor_size); 4846 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->dring_handle); 4847 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 4848 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 4849 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 4850 drp->ident, drp->end_idx); 4851 display_ring(drp); 4852 } 4853 4854 static void 4855 display_ring(dring_info_t *dringp) 4856 { 4857 uint64_t i; 4858 uint64_t priv_count = 0; 4859 uint64_t pub_count = 0; 4860 vnet_public_desc_t *pub_addr = NULL; 4861 vsw_private_desc_t *priv_addr = NULL; 4862 4863 for (i = 0; i < vsw_num_descriptors; i++) { 4864 if (dringp->pub_addr != NULL) { 4865 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 4866 4867 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 4868 pub_count++; 4869 } 4870 4871 if (dringp->priv_addr != NULL) { 4872 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 4873 4874 if (priv_addr->dstate == VIO_DESC_FREE) 4875 priv_count++; 4876 } 4877 } 4878 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 4879 i, priv_count, pub_count); 4880 } 4881 4882 static void 4883 dump_flags(uint64_t state) 4884 { 4885 int i; 4886 4887 typedef struct flag_name { 4888 int flag_val; 4889 char *flag_name; 4890 } flag_name_t; 4891 4892 flag_name_t flags[] = { 4893 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 4894 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 4895 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 4896 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 4897 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 4898 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 4899 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 4900 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 4901 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 4902 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 4903 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 4904 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 4905 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 4906 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 4907 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 4908 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 4909 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 4910 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 4911 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 4912 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 4913 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 4914 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 4915 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 4916 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 4917 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 4918 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 4919 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 4920 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 4921 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 4922 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 4923 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 4924 4925 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 4926 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 4927 if (state & flags[i].flag_val) 4928 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 4929 } 4930 } 4931