1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* Port add/deletion/etc routines */ 75 static void vsw_port_delete(vsw_port_t *port); 76 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 77 static void vsw_ldc_detach(vsw_ldc_t *ldcp); 78 static int vsw_ldc_init(vsw_ldc_t *ldcp); 79 static void vsw_ldc_uninit(vsw_ldc_t *ldcp); 80 static void vsw_ldc_drain(vsw_ldc_t *ldcp); 81 static void vsw_drain_port_taskq(vsw_port_t *port); 82 static void vsw_marker_task(void *); 83 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 84 void vsw_detach_ports(vsw_t *vswp); 85 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 86 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 87 int vsw_port_detach(vsw_t *vswp, int p_instance); 88 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 89 int vsw_port_attach(vsw_port_t *portp); 90 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 91 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 92 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 93 void vsw_reset_ports(vsw_t *vswp); 94 void vsw_port_reset(vsw_port_t *portp); 95 void vsw_physlink_update_ports(vsw_t *vswp); 96 static void vsw_port_physlink_update(vsw_port_t *portp); 97 98 /* Interrupt routines */ 99 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 100 101 /* Handshake routines */ 102 static void vsw_ldc_reinit(vsw_ldc_t *); 103 static void vsw_conn_task(void *); 104 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 105 static void vsw_next_milestone(vsw_ldc_t *); 106 static int vsw_supported_version(vio_ver_msg_t *); 107 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 108 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 109 void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 110 111 /* Data processing routines */ 112 void vsw_process_pkt(void *); 113 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *, int); 114 static void vsw_process_ctrl_pkt(void *); 115 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_physlink_msg(vsw_ldc_t *, void *); 122 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 123 uint32_t); 124 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 125 static void vsw_process_pkt_data(void *, void *, uint32_t); 126 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 127 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 128 static void vsw_process_evt_read(vsw_ldc_t *ldcp); 129 static void vsw_ldc_rcv(vsw_ldc_t *ldcp); 130 131 /* Switching/data transmit routines */ 132 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 133 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 134 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 135 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 138 /* Packet creation routines */ 139 static void vsw_send_ver(void *); 140 static void vsw_send_attr(vsw_ldc_t *); 141 static void vsw_send_dring_info(vsw_ldc_t *); 142 static void vsw_send_rdx(vsw_ldc_t *); 143 static void vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state); 144 145 /* Dring routines */ 146 static void vsw_create_privring(vsw_ldc_t *); 147 static dring_info_t *vsw_map_dring(vsw_ldc_t *ldcp, void *pkt); 148 static void vsw_unmap_dring(vsw_ldc_t *ldcp); 149 static void vsw_destroy_dring(vsw_ldc_t *ldcp); 150 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 151 static int vsw_map_data(vsw_ldc_t *ldcp, dring_info_t *dp, void *pkt); 152 static void vsw_set_lane_attr(vsw_t *, lane_t *); 153 dring_info_t *vsw_map_dring_cmn(vsw_ldc_t *ldcp, 154 vio_dring_reg_msg_t *dring_pkt); 155 156 /* tx/msg/rcv thread routines */ 157 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 158 static void vsw_ldc_tx_worker(void *arg); 159 160 /* Misc support routines */ 161 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 162 static int vsw_get_same_dest_list(struct ether_header *ehp, 163 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 164 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 165 166 /* Debugging routines */ 167 static void dump_flags(uint64_t); 168 static void display_state(void); 169 static void display_lane(lane_t *); 170 static void display_ring(dring_info_t *); 171 172 /* 173 * Functions imported from other files. 174 */ 175 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 176 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 177 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 178 extern void vsw_del_mcst_port(vsw_port_t *port); 179 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 180 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 181 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 182 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 183 extern void vsw_create_vlans(void *arg, int type); 184 extern void vsw_destroy_vlans(void *arg, int type); 185 extern void vsw_vlan_add_ids(void *arg, int type); 186 extern void vsw_vlan_remove_ids(void *arg, int type); 187 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 188 struct ether_header *ehp, uint16_t *vidp); 189 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 190 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 191 mblk_t **npt); 192 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 193 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 194 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 195 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 196 extern void vsw_hio_stop_port(vsw_port_t *portp); 197 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 198 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 199 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 200 extern void vsw_destroy_rxpools(void *arg); 201 extern void vsw_stop_msg_thread(vsw_ldc_t *ldcp); 202 extern int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 203 extern int vsw_dringsend(vsw_ldc_t *, mblk_t *); 204 extern int vsw_reclaim_dring(dring_info_t *dp, int start); 205 extern int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 206 int *); 207 extern vio_dring_reg_msg_t *vsw_create_tx_dring_info(vsw_ldc_t *); 208 extern int vsw_setup_tx_dring(vsw_ldc_t *ldcp, dring_info_t *dp); 209 extern void vsw_destroy_tx_dring(vsw_ldc_t *ldcp); 210 extern dring_info_t *vsw_map_rx_dring(vsw_ldc_t *ldcp, void *pkt); 211 extern void vsw_unmap_rx_dring(vsw_ldc_t *ldcp); 212 extern void vsw_ldc_msg_worker(void *arg); 213 extern void vsw_process_dringdata(void *, void *); 214 extern vio_dring_reg_msg_t *vsw_create_rx_dring_info(vsw_ldc_t *); 215 extern void vsw_destroy_rx_dring(vsw_ldc_t *ldcp); 216 extern dring_info_t *vsw_map_tx_dring(vsw_ldc_t *ldcp, void *pkt); 217 extern void vsw_unmap_tx_dring(vsw_ldc_t *ldcp); 218 extern void vsw_ldc_rcv_worker(void *arg); 219 extern void vsw_stop_rcv_thread(vsw_ldc_t *ldcp); 220 extern int vsw_dringsend_shm(vsw_ldc_t *, mblk_t *); 221 extern void vsw_process_dringdata_shm(void *, void *); 222 223 /* 224 * Tunables used in this file. 225 */ 226 extern int vsw_num_handshakes; 227 extern int vsw_ldc_tx_delay; 228 extern int vsw_ldc_tx_retries; 229 extern int vsw_ldc_retries; 230 extern int vsw_ldc_delay; 231 extern boolean_t vsw_ldc_rxthr_enabled; 232 extern boolean_t vsw_ldc_txthr_enabled; 233 extern uint32_t vsw_num_descriptors; 234 extern uint8_t vsw_dring_mode; 235 extern uint32_t vsw_max_tx_qcount; 236 extern boolean_t vsw_obp_ver_proto_workaround; 237 extern uint32_t vsw_publish_macaddr_count; 238 239 #define LDC_ENTER_LOCK(ldcp) \ 240 mutex_enter(&((ldcp)->ldc_cblock));\ 241 mutex_enter(&((ldcp)->ldc_rxlock));\ 242 mutex_enter(&((ldcp)->ldc_txlock)); 243 #define LDC_EXIT_LOCK(ldcp) \ 244 mutex_exit(&((ldcp)->ldc_txlock));\ 245 mutex_exit(&((ldcp)->ldc_rxlock));\ 246 mutex_exit(&((ldcp)->ldc_cblock)); 247 248 #define VSW_VER_EQ(ldcp, major, minor) \ 249 ((ldcp)->lane_out.ver_major == (major) && \ 250 (ldcp)->lane_out.ver_minor == (minor)) 251 252 #define VSW_VER_LT(ldcp, major, minor) \ 253 (((ldcp)->lane_out.ver_major < (major)) || \ 254 ((ldcp)->lane_out.ver_major == (major) && \ 255 (ldcp)->lane_out.ver_minor < (minor))) 256 257 #define VSW_VER_GTEQ(ldcp, major, minor) \ 258 (((ldcp)->lane_out.ver_major > (major)) || \ 259 ((ldcp)->lane_out.ver_major == (major) && \ 260 (ldcp)->lane_out.ver_minor >= (minor))) 261 262 #define VSW_VER_LTEQ(ldcp, major, minor) \ 263 (((ldcp)->lane_out.ver_major < (major)) || \ 264 ((ldcp)->lane_out.ver_major == (major) && \ 265 (ldcp)->lane_out.ver_minor <= (minor))) 266 267 /* 268 * VIO Protocol Version Info: 269 * 270 * The version specified below represents the version of protocol currently 271 * supported in the driver. It means the driver can negotiate with peers with 272 * versions <= this version. Here is a summary of the feature(s) that are 273 * supported at each version of the protocol: 274 * 275 * 1.0 Basic VIO protocol. 276 * 1.1 vDisk protocol update (no virtual network update). 277 * 1.2 Support for priority frames (priority-ether-types). 278 * 1.3 VLAN and HybridIO support. 279 * 1.4 Jumbo Frame support. 280 * 1.5 Link State Notification support with optional support 281 * for Physical Link information. 282 * 1.6 Support for RxDringData mode. 283 */ 284 static ver_sup_t vsw_versions[] = { {1, 6} }; 285 286 /* 287 * For the moment the state dump routines have their own 288 * private flag. 289 */ 290 #define DUMP_STATE 0 291 292 #if DUMP_STATE 293 294 #define DUMP_TAG(tag) \ 295 { \ 296 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 297 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 298 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 299 } 300 301 #define DUMP_TAG_PTR(tag) \ 302 { \ 303 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 304 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 305 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 306 } 307 308 #define DUMP_FLAGS(flags) dump_flags(flags); 309 #define DISPLAY_STATE() display_state() 310 311 #else 312 313 #define DUMP_TAG(tag) 314 #define DUMP_TAG_PTR(tag) 315 #define DUMP_FLAGS(state) 316 #define DISPLAY_STATE() 317 318 #endif /* DUMP_STATE */ 319 320 /* 321 * Attach the specified port. 322 * 323 * Returns 0 on success, 1 on failure. 324 */ 325 int 326 vsw_port_attach(vsw_port_t *port) 327 { 328 vsw_t *vswp = port->p_vswp; 329 vsw_port_list_t *plist = &vswp->plist; 330 vsw_port_t *p, **pp; 331 int nids = port->num_ldcs; 332 uint64_t *ldcids; 333 int rv; 334 335 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 336 337 /* port already exists? */ 338 READ_ENTER(&plist->lockrw); 339 for (p = plist->head; p != NULL; p = p->p_next) { 340 if (p->p_instance == port->p_instance) { 341 DWARN(vswp, "%s: port instance %d already attached", 342 __func__, p->p_instance); 343 RW_EXIT(&plist->lockrw); 344 return (1); 345 } 346 } 347 RW_EXIT(&plist->lockrw); 348 349 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 350 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 351 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 352 353 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 354 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 355 port->state = VSW_PORT_INIT; 356 357 D2(vswp, "%s: %d nids", __func__, nids); 358 ldcids = port->ldc_ids; 359 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[0]); 360 if (vsw_ldc_attach(port, (uint64_t)ldcids[0]) != 0) { 361 DERR(vswp, "%s: ldc_attach failed", __func__); 362 goto exit_error; 363 } 364 365 if (vswp->switching_setup_done == B_TRUE) { 366 /* 367 * If the underlying network device has been setup, 368 * then open a mac client and porgram the mac address 369 * for this port. 370 */ 371 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 372 if (rv != 0) { 373 goto exit_error; 374 } 375 } 376 377 /* create the fdb entry for this port/mac address */ 378 vsw_fdbe_add(vswp, port); 379 380 vsw_create_vlans(port, VSW_VNETPORT); 381 382 WRITE_ENTER(&plist->lockrw); 383 384 /* link it into the list of ports for this vsw instance */ 385 pp = (vsw_port_t **)(&plist->head); 386 port->p_next = *pp; 387 *pp = port; 388 plist->num_ports++; 389 390 RW_EXIT(&plist->lockrw); 391 392 /* 393 * Initialise the port and any ldc's under it. 394 */ 395 (void) vsw_ldc_init(port->ldcp); 396 397 /* announce macaddr of vnet to the physical switch */ 398 if (vsw_publish_macaddr_count != 0) { /* enabled */ 399 vsw_publish_macaddr(vswp, port); 400 } 401 402 D1(vswp, "%s: exit", __func__); 403 return (0); 404 405 exit_error: 406 407 cv_destroy(&port->state_cv); 408 mutex_destroy(&port->state_lock); 409 410 rw_destroy(&port->maccl_rwlock); 411 mutex_destroy(&port->tx_lock); 412 mutex_destroy(&port->mca_lock); 413 kmem_free(port, sizeof (vsw_port_t)); 414 return (1); 415 } 416 417 /* 418 * Detach the specified port. 419 * 420 * Returns 0 on success, 1 on failure. 421 */ 422 int 423 vsw_port_detach(vsw_t *vswp, int p_instance) 424 { 425 vsw_port_t *port = NULL; 426 vsw_port_list_t *plist = &vswp->plist; 427 428 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 429 430 WRITE_ENTER(&plist->lockrw); 431 432 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 433 RW_EXIT(&plist->lockrw); 434 return (1); 435 } 436 437 if (vsw_plist_del_node(vswp, port)) { 438 RW_EXIT(&plist->lockrw); 439 return (1); 440 } 441 442 /* cleanup any HybridIO for this port */ 443 vsw_hio_stop_port(port); 444 445 /* 446 * No longer need to hold writer lock on port list now 447 * that we have unlinked the target port from the list. 448 */ 449 RW_EXIT(&plist->lockrw); 450 451 /* Cleanup and close the mac client */ 452 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 453 454 /* Remove the fdb entry for this port/mac address */ 455 vsw_fdbe_del(vswp, &(port->p_macaddr)); 456 vsw_destroy_vlans(port, VSW_VNETPORT); 457 458 /* Remove any multicast addresses.. */ 459 vsw_del_mcst_port(port); 460 461 vsw_port_delete(port); 462 463 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 464 return (0); 465 } 466 467 /* 468 * Detach all active ports. 469 */ 470 void 471 vsw_detach_ports(vsw_t *vswp) 472 { 473 vsw_port_list_t *plist = &vswp->plist; 474 vsw_port_t *port = NULL; 475 476 D1(vswp, "%s: enter", __func__); 477 478 WRITE_ENTER(&plist->lockrw); 479 480 while ((port = plist->head) != NULL) { 481 (void) vsw_plist_del_node(vswp, port); 482 483 /* cleanup any HybridIO for this port */ 484 vsw_hio_stop_port(port); 485 486 /* Cleanup and close the mac client */ 487 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 488 489 /* Remove the fdb entry for this port/mac address */ 490 vsw_fdbe_del(vswp, &(port->p_macaddr)); 491 vsw_destroy_vlans(port, VSW_VNETPORT); 492 493 /* Remove any multicast addresses.. */ 494 vsw_del_mcst_port(port); 495 496 /* 497 * No longer need to hold the lock on the port list 498 * now that we have unlinked the target port from the 499 * list. 500 */ 501 RW_EXIT(&plist->lockrw); 502 vsw_port_delete(port); 503 WRITE_ENTER(&plist->lockrw); 504 } 505 RW_EXIT(&plist->lockrw); 506 507 D1(vswp, "%s: exit", __func__); 508 } 509 510 /* 511 * Delete the specified port. 512 */ 513 static void 514 vsw_port_delete(vsw_port_t *port) 515 { 516 vsw_t *vswp = port->p_vswp; 517 518 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 519 520 vsw_ldc_uninit(port->ldcp); 521 522 /* 523 * Wait for any pending ctrl msg tasks which reference this 524 * port to finish. 525 */ 526 vsw_drain_port_taskq(port); 527 528 /* 529 * Wait for any active callbacks to finish 530 */ 531 vsw_ldc_drain(port->ldcp); 532 533 vsw_ldc_detach(port->ldcp); 534 535 rw_destroy(&port->maccl_rwlock); 536 mutex_destroy(&port->mca_lock); 537 mutex_destroy(&port->tx_lock); 538 539 cv_destroy(&port->state_cv); 540 mutex_destroy(&port->state_lock); 541 542 if (port->num_ldcs != 0) { 543 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 544 port->num_ldcs = 0; 545 } 546 547 if (port->nvids != 0) { 548 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 549 } 550 551 kmem_free(port, sizeof (vsw_port_t)); 552 553 D1(vswp, "%s: exit", __func__); 554 } 555 556 /* 557 * Attach a logical domain channel (ldc) under a specified port. 558 * 559 * Returns 0 on success, 1 on failure. 560 */ 561 static int 562 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 563 { 564 vsw_t *vswp = port->p_vswp; 565 vsw_ldc_t *ldcp = NULL; 566 ldc_attr_t attr; 567 ldc_status_t istatus; 568 int status = DDI_FAILURE; 569 char kname[MAXNAMELEN]; 570 enum { PROG_init = 0x0, 571 PROG_callback = 0x1, 572 PROG_tx_thread = 0x2} 573 progress; 574 575 progress = PROG_init; 576 577 D1(vswp, "%s: enter", __func__); 578 579 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 580 if (ldcp == NULL) { 581 DERR(vswp, "%s: kmem_zalloc failed", __func__); 582 return (1); 583 } 584 ldcp->ldc_id = ldc_id; 585 586 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 587 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 589 ldcp->msg_thr_flags = 0; 590 mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL); 591 cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL); 592 ldcp->rcv_thr_flags = 0; 593 mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL); 594 cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL); 595 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 596 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 597 598 /* required for handshake with peer */ 599 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 600 ldcp->peer_session = 0; 601 ldcp->session_status = 0; 602 ldcp->hss_id = 1; /* Initial handshake session id */ 603 ldcp->hphase = VSW_MILESTONE0; 604 605 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 606 607 /* only set for outbound lane, inbound set by peer */ 608 vsw_set_lane_attr(vswp, &ldcp->lane_out); 609 610 attr.devclass = LDC_DEV_NT_SVC; 611 attr.instance = ddi_get_instance(vswp->dip); 612 attr.mode = LDC_MODE_UNRELIABLE; 613 attr.mtu = VSW_LDC_MTU; 614 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 615 if (status != 0) { 616 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 617 __func__, ldc_id, status); 618 goto ldc_attach_fail; 619 } 620 621 if (vsw_ldc_txthr_enabled) { 622 ldcp->tx_thr_flags = 0; 623 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 624 625 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 626 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 627 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 628 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 629 630 progress |= PROG_tx_thread; 631 if (ldcp->tx_thread == NULL) { 632 DWARN(vswp, "%s(%lld): Failed to create worker thread", 633 __func__, ldc_id); 634 goto ldc_attach_fail; 635 } 636 } 637 638 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 639 if (status != 0) { 640 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 641 __func__, ldc_id, status); 642 (void) ldc_fini(ldcp->ldc_handle); 643 goto ldc_attach_fail; 644 } 645 /* 646 * allocate a message for ldc_read()s, big enough to hold ctrl and 647 * data msgs, including raw data msgs used to recv priority frames. 648 */ 649 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 650 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 651 652 progress |= PROG_callback; 653 654 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 655 656 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 657 DERR(vswp, "%s: ldc_status failed", __func__); 658 mutex_destroy(&ldcp->status_lock); 659 goto ldc_attach_fail; 660 } 661 662 ldcp->ldc_status = istatus; 663 ldcp->ldc_port = port; 664 ldcp->ldc_vswp = vswp; 665 666 vsw_reset_vnet_proto_ops(ldcp); 667 668 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 669 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 670 kname, &ldcp->ldc_stats); 671 if (ldcp->ksp == NULL) { 672 DERR(vswp, "%s: kstats setup failed", __func__); 673 goto ldc_attach_fail; 674 } 675 676 /* link it into this port */ 677 port->ldcp = ldcp; 678 679 D1(vswp, "%s: exit", __func__); 680 return (0); 681 682 ldc_attach_fail: 683 684 if (progress & PROG_callback) { 685 (void) ldc_unreg_callback(ldcp->ldc_handle); 686 kmem_free(ldcp->ldcmsg, ldcp->msglen); 687 } 688 689 if (progress & PROG_tx_thread) { 690 if (ldcp->tx_thread != NULL) { 691 vsw_stop_tx_thread(ldcp); 692 } 693 mutex_destroy(&ldcp->tx_thr_lock); 694 cv_destroy(&ldcp->tx_thr_cv); 695 } 696 if (ldcp->ksp != NULL) { 697 vgen_destroy_kstats(ldcp->ksp); 698 } 699 mutex_destroy(&ldcp->msg_thr_lock); 700 mutex_destroy(&ldcp->rcv_thr_lock); 701 mutex_destroy(&ldcp->ldc_txlock); 702 mutex_destroy(&ldcp->ldc_rxlock); 703 mutex_destroy(&ldcp->ldc_cblock); 704 mutex_destroy(&ldcp->drain_cv_lock); 705 cv_destroy(&ldcp->msg_thr_cv); 706 cv_destroy(&ldcp->rcv_thr_cv); 707 cv_destroy(&ldcp->drain_cv); 708 709 kmem_free(ldcp, sizeof (vsw_ldc_t)); 710 711 return (1); 712 } 713 714 /* 715 * Detach a logical domain channel (ldc) belonging to a 716 * particular port. 717 */ 718 static void 719 vsw_ldc_detach(vsw_ldc_t *ldcp) 720 { 721 int rv; 722 vsw_t *vswp = ldcp->ldc_port->p_vswp; 723 int retries = 0; 724 725 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 726 727 /* Stop msg/rcv thread */ 728 if (ldcp->rcv_thread != NULL) { 729 vsw_stop_rcv_thread(ldcp); 730 } else if (ldcp->msg_thread != NULL) { 731 vsw_stop_msg_thread(ldcp); 732 } 733 kmem_free(ldcp->ldcmsg, ldcp->msglen); 734 735 /* Stop the tx thread */ 736 if (ldcp->tx_thread != NULL) { 737 vsw_stop_tx_thread(ldcp); 738 mutex_destroy(&ldcp->tx_thr_lock); 739 cv_destroy(&ldcp->tx_thr_cv); 740 if (ldcp->tx_mhead != NULL) { 741 freemsgchain(ldcp->tx_mhead); 742 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 743 ldcp->tx_cnt = 0; 744 } 745 } 746 747 /* Destory kstats */ 748 vgen_destroy_kstats(ldcp->ksp); 749 750 /* 751 * Before we can close the channel we must release any mapped 752 * resources (e.g. drings). 753 */ 754 vsw_free_lane_resources(ldcp, INBOUND); 755 vsw_free_lane_resources(ldcp, OUTBOUND); 756 757 /* 758 * Close the channel, retry on EAAGIN. 759 */ 760 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 761 if (++retries > vsw_ldc_retries) { 762 break; 763 } 764 drv_usecwait(vsw_ldc_delay); 765 } 766 if (rv != 0) { 767 cmn_err(CE_NOTE, 768 "!vsw%d: Error(%d) closing the channel(0x%lx)\n", 769 vswp->instance, rv, ldcp->ldc_id); 770 } 771 772 (void) ldc_fini(ldcp->ldc_handle); 773 774 ldcp->ldc_status = LDC_INIT; 775 ldcp->ldc_handle = NULL; 776 ldcp->ldc_vswp = NULL; 777 778 mutex_destroy(&ldcp->msg_thr_lock); 779 mutex_destroy(&ldcp->rcv_thr_lock); 780 mutex_destroy(&ldcp->ldc_txlock); 781 mutex_destroy(&ldcp->ldc_rxlock); 782 mutex_destroy(&ldcp->ldc_cblock); 783 mutex_destroy(&ldcp->drain_cv_lock); 784 mutex_destroy(&ldcp->status_lock); 785 cv_destroy(&ldcp->msg_thr_cv); 786 cv_destroy(&ldcp->rcv_thr_cv); 787 cv_destroy(&ldcp->drain_cv); 788 789 kmem_free(ldcp, sizeof (vsw_ldc_t)); 790 } 791 792 /* 793 * Open and attempt to bring up the channel. Note that channel 794 * can only be brought up if peer has also opened channel. 795 * 796 * Returns 0 if can open and bring up channel, otherwise 797 * returns 1. 798 */ 799 static int 800 vsw_ldc_init(vsw_ldc_t *ldcp) 801 { 802 vsw_t *vswp = ldcp->ldc_vswp; 803 ldc_status_t istatus = 0; 804 int rv; 805 806 D1(vswp, "%s: enter", __func__); 807 808 LDC_ENTER_LOCK(ldcp); 809 810 /* don't start at 0 in case clients don't like that */ 811 ldcp->next_ident = 1; 812 813 rv = ldc_open(ldcp->ldc_handle); 814 if (rv != 0) { 815 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 816 __func__, ldcp->ldc_id, rv); 817 LDC_EXIT_LOCK(ldcp); 818 return (1); 819 } 820 821 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 822 DERR(vswp, "%s: unable to get status", __func__); 823 LDC_EXIT_LOCK(ldcp); 824 return (1); 825 826 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 827 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 828 __func__, ldcp->ldc_id, istatus); 829 LDC_EXIT_LOCK(ldcp); 830 return (1); 831 } 832 833 mutex_enter(&ldcp->status_lock); 834 ldcp->ldc_status = istatus; 835 mutex_exit(&ldcp->status_lock); 836 837 rv = ldc_up(ldcp->ldc_handle); 838 if (rv != 0) { 839 /* 840 * Not a fatal error for ldc_up() to fail, as peer 841 * end point may simply not be ready yet. 842 */ 843 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 844 ldcp->ldc_id, rv); 845 LDC_EXIT_LOCK(ldcp); 846 return (1); 847 } 848 849 /* 850 * ldc_up() call is non-blocking so need to explicitly 851 * check channel status to see if in fact the channel 852 * is UP. 853 */ 854 mutex_enter(&ldcp->status_lock); 855 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 856 DERR(vswp, "%s: unable to get status", __func__); 857 mutex_exit(&ldcp->status_lock); 858 LDC_EXIT_LOCK(ldcp); 859 return (1); 860 861 } 862 863 if (ldcp->ldc_status == LDC_UP) { 864 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 865 ldcp->ldc_id, istatus); 866 mutex_exit(&ldcp->status_lock); 867 LDC_EXIT_LOCK(ldcp); 868 869 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 870 return (0); 871 } 872 873 mutex_exit(&ldcp->status_lock); 874 LDC_EXIT_LOCK(ldcp); 875 876 D1(vswp, "%s: exit", __func__); 877 return (0); 878 } 879 880 /* disable callbacks on the channel */ 881 static void 882 vsw_ldc_uninit(vsw_ldc_t *ldcp) 883 { 884 vsw_t *vswp = ldcp->ldc_vswp; 885 int rv; 886 887 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 888 889 LDC_ENTER_LOCK(ldcp); 890 891 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 892 if (rv != 0) { 893 cmn_err(CE_NOTE, "!vsw_ldc_uninit(%ld): error disabling " 894 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 895 } 896 897 mutex_enter(&ldcp->status_lock); 898 ldcp->ldc_status = LDC_INIT; 899 mutex_exit(&ldcp->status_lock); 900 901 LDC_EXIT_LOCK(ldcp); 902 903 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 904 } 905 906 /* 907 * Wait until the callback(s) associated with the ldcs under the specified 908 * port have completed. 909 * 910 * Prior to this function being invoked each channel under this port 911 * should have been quiesced via ldc_set_cb_mode(DISABLE). 912 * 913 * A short explaination of what we are doing below.. 914 * 915 * The simplest approach would be to have a reference counter in 916 * the ldc structure which is increment/decremented by the callbacks as 917 * they use the channel. The drain function could then simply disable any 918 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 919 * there is a tiny window here - before the callback is able to get the lock 920 * on the channel it is interrupted and this function gets to execute. It 921 * sees that the ref count is zero and believes its free to delete the 922 * associated data structures. 923 * 924 * We get around this by taking advantage of the fact that before the ldc 925 * framework invokes a callback it sets a flag to indicate that there is a 926 * callback active (or about to become active). If when we attempt to 927 * unregister a callback when this active flag is set then the unregister 928 * will fail with EWOULDBLOCK. 929 * 930 * If the unregister fails we do a cv_timedwait. We will either be signaled 931 * by the callback as it is exiting (note we have to wait a short period to 932 * allow the callback to return fully to the ldc framework and it to clear 933 * the active flag), or by the timer expiring. In either case we again attempt 934 * the unregister. We repeat this until we can succesfully unregister the 935 * callback. 936 * 937 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 938 * the case where the callback has finished but the ldc framework has not yet 939 * cleared the active flag. In this case we would never get a cv_signal. 940 */ 941 static void 942 vsw_ldc_drain(vsw_ldc_t *ldcp) 943 { 944 vsw_t *vswp = ldcp->ldc_port->p_vswp; 945 946 D1(vswp, "%s: enter", __func__); 947 948 /* 949 * If we can unregister the channel callback then we 950 * know that there is no callback either running or 951 * scheduled to run for this channel so move on to next 952 * channel in the list. 953 */ 954 mutex_enter(&ldcp->drain_cv_lock); 955 956 /* prompt active callbacks to quit */ 957 ldcp->drain_state = VSW_LDC_DRAINING; 958 959 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 960 D2(vswp, "%s: unreg callback for chan %ld", __func__, 961 ldcp->ldc_id); 962 mutex_exit(&ldcp->drain_cv_lock); 963 } else { 964 /* 965 * If we end up here we know that either 1) a callback 966 * is currently executing, 2) is about to start (i.e. 967 * the ldc framework has set the active flag but 968 * has not actually invoked the callback yet, or 3) 969 * has finished and has returned to the ldc framework 970 * but the ldc framework has not yet cleared the 971 * active bit. 972 * 973 * Wait for it to finish. 974 */ 975 while (ldc_unreg_callback(ldcp->ldc_handle) == EWOULDBLOCK) { 976 (void) cv_timedwait(&ldcp->drain_cv, 977 &ldcp->drain_cv_lock, ddi_get_lbolt() + hz); 978 } 979 980 mutex_exit(&ldcp->drain_cv_lock); 981 D2(vswp, "%s: unreg callback for chan %ld after " 982 "timeout", __func__, ldcp->ldc_id); 983 } 984 985 D1(vswp, "%s: exit", __func__); 986 } 987 988 /* 989 * Wait until all tasks which reference this port have completed. 990 * 991 * Prior to this function being invoked each channel under this port 992 * should have been quiesced via ldc_set_cb_mode(DISABLE). 993 */ 994 static void 995 vsw_drain_port_taskq(vsw_port_t *port) 996 { 997 vsw_t *vswp = port->p_vswp; 998 999 D1(vswp, "%s: enter", __func__); 1000 1001 /* 1002 * Mark the port as in the process of being detached, and 1003 * dispatch a marker task to the queue so we know when all 1004 * relevant tasks have completed. 1005 */ 1006 mutex_enter(&port->state_lock); 1007 port->state = VSW_PORT_DETACHING; 1008 1009 if ((vswp->taskq_p == NULL) || 1010 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1011 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1012 cmn_err(CE_NOTE, "!vsw%d: unable to dispatch marker task", 1013 vswp->instance); 1014 mutex_exit(&port->state_lock); 1015 return; 1016 } 1017 1018 /* 1019 * Wait for the marker task to finish. 1020 */ 1021 while (port->state != VSW_PORT_DETACHABLE) 1022 cv_wait(&port->state_cv, &port->state_lock); 1023 1024 mutex_exit(&port->state_lock); 1025 1026 D1(vswp, "%s: exit", __func__); 1027 } 1028 1029 static void 1030 vsw_marker_task(void *arg) 1031 { 1032 vsw_port_t *port = arg; 1033 vsw_t *vswp = port->p_vswp; 1034 1035 D1(vswp, "%s: enter", __func__); 1036 1037 mutex_enter(&port->state_lock); 1038 1039 /* 1040 * No further tasks should be dispatched which reference 1041 * this port so ok to mark it as safe to detach. 1042 */ 1043 port->state = VSW_PORT_DETACHABLE; 1044 1045 cv_signal(&port->state_cv); 1046 1047 mutex_exit(&port->state_lock); 1048 1049 D1(vswp, "%s: exit", __func__); 1050 } 1051 1052 vsw_port_t * 1053 vsw_lookup_port(vsw_t *vswp, int p_instance) 1054 { 1055 vsw_port_list_t *plist = &vswp->plist; 1056 vsw_port_t *port; 1057 1058 for (port = plist->head; port != NULL; port = port->p_next) { 1059 if (port->p_instance == p_instance) { 1060 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1061 return (port); 1062 } 1063 } 1064 1065 return (NULL); 1066 } 1067 1068 void 1069 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1070 { 1071 vsw_ldc_t *ldcp = portp->ldcp; 1072 1073 mutex_enter(&ldcp->ldc_cblock); 1074 1075 /* 1076 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1077 * the connection. See comments in vsw_set_vnet_proto_ops(). 1078 */ 1079 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1080 portp->nvids != 0) { 1081 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1082 } 1083 1084 mutex_exit(&ldcp->ldc_cblock); 1085 } 1086 1087 void 1088 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1089 { 1090 vsw_ldc_t *ldcp = portp->ldcp; 1091 1092 mutex_enter(&ldcp->ldc_cblock); 1093 1094 /* 1095 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1096 * to trigger re-negotiation, which inturn trigger HybridIO 1097 * setup/cleanup. 1098 */ 1099 if ((ldcp->hphase == VSW_MILESTONE4) && 1100 (portp->p_hio_capable == B_TRUE)) { 1101 if (immediate == B_TRUE) { 1102 (void) ldc_down(ldcp->ldc_handle); 1103 } else { 1104 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1105 } 1106 } 1107 1108 mutex_exit(&ldcp->ldc_cblock); 1109 } 1110 1111 void 1112 vsw_port_reset(vsw_port_t *portp) 1113 { 1114 vsw_ldc_t *ldcp = portp->ldcp; 1115 1116 mutex_enter(&ldcp->ldc_cblock); 1117 1118 /* 1119 * reset channel and terminate the connection. 1120 */ 1121 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1122 1123 mutex_exit(&ldcp->ldc_cblock); 1124 } 1125 1126 void 1127 vsw_reset_ports(vsw_t *vswp) 1128 { 1129 vsw_port_list_t *plist = &vswp->plist; 1130 vsw_port_t *portp; 1131 1132 READ_ENTER(&plist->lockrw); 1133 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1134 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1135 vsw_hio_stop_port(portp); 1136 } 1137 vsw_port_reset(portp); 1138 } 1139 RW_EXIT(&plist->lockrw); 1140 } 1141 1142 static void 1143 vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state) 1144 { 1145 vnet_physlink_msg_t msg; 1146 vnet_physlink_msg_t *msgp = &msg; 1147 uint32_t physlink_info = 0; 1148 1149 if (plink_state == LINK_STATE_UP) { 1150 physlink_info |= VNET_PHYSLINK_STATE_UP; 1151 } else { 1152 physlink_info |= VNET_PHYSLINK_STATE_DOWN; 1153 } 1154 1155 msgp->tag.vio_msgtype = VIO_TYPE_CTRL; 1156 msgp->tag.vio_subtype = VIO_SUBTYPE_INFO; 1157 msgp->tag.vio_subtype_env = VNET_PHYSLINK_INFO; 1158 msgp->tag.vio_sid = ldcp->local_session; 1159 msgp->physlink_info = physlink_info; 1160 1161 (void) vsw_send_msg(ldcp, msgp, sizeof (msg), B_TRUE); 1162 } 1163 1164 static void 1165 vsw_port_physlink_update(vsw_port_t *portp) 1166 { 1167 vsw_ldc_t *ldcp; 1168 vsw_t *vswp; 1169 1170 vswp = portp->p_vswp; 1171 ldcp = portp->ldcp; 1172 1173 mutex_enter(&ldcp->ldc_cblock); 1174 1175 /* 1176 * If handshake has completed successfully and if the vnet device 1177 * has negotiated to get physical link state updates, send a message 1178 * with the current state. 1179 */ 1180 if (ldcp->hphase == VSW_MILESTONE4 && ldcp->pls_negotiated == B_TRUE) { 1181 vsw_send_physlink_msg(ldcp, vswp->phys_link_state); 1182 } 1183 1184 mutex_exit(&ldcp->ldc_cblock); 1185 } 1186 1187 void 1188 vsw_physlink_update_ports(vsw_t *vswp) 1189 { 1190 vsw_port_list_t *plist = &vswp->plist; 1191 vsw_port_t *portp; 1192 1193 READ_ENTER(&plist->lockrw); 1194 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1195 vsw_port_physlink_update(portp); 1196 } 1197 RW_EXIT(&plist->lockrw); 1198 } 1199 1200 /* 1201 * Search for and remove the specified port from the port 1202 * list. Returns 0 if able to locate and remove port, otherwise 1203 * returns 1. 1204 */ 1205 static int 1206 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1207 { 1208 vsw_port_list_t *plist = &vswp->plist; 1209 vsw_port_t *curr_p, *prev_p; 1210 1211 if (plist->head == NULL) 1212 return (1); 1213 1214 curr_p = prev_p = plist->head; 1215 1216 while (curr_p != NULL) { 1217 if (curr_p == port) { 1218 if (prev_p == curr_p) { 1219 plist->head = curr_p->p_next; 1220 } else { 1221 prev_p->p_next = curr_p->p_next; 1222 } 1223 plist->num_ports--; 1224 break; 1225 } else { 1226 prev_p = curr_p; 1227 curr_p = curr_p->p_next; 1228 } 1229 } 1230 return (0); 1231 } 1232 1233 /* 1234 * Interrupt handler for ldc messages. 1235 */ 1236 static uint_t 1237 vsw_ldc_cb(uint64_t event, caddr_t arg) 1238 { 1239 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1240 vsw_t *vswp = ldcp->ldc_vswp; 1241 1242 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1243 1244 mutex_enter(&ldcp->ldc_cblock); 1245 ldcp->ldc_stats.callbacks++; 1246 1247 mutex_enter(&ldcp->status_lock); 1248 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1249 mutex_exit(&ldcp->status_lock); 1250 mutex_exit(&ldcp->ldc_cblock); 1251 return (LDC_SUCCESS); 1252 } 1253 mutex_exit(&ldcp->status_lock); 1254 1255 if (event & LDC_EVT_UP) { 1256 /* 1257 * Channel has come up. 1258 */ 1259 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1260 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1261 1262 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1263 1264 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1265 } 1266 1267 if (event & LDC_EVT_READ) { 1268 /* 1269 * Data available for reading. 1270 */ 1271 D2(vswp, "%s: id(ld) event(%llx) data READ", 1272 __func__, ldcp->ldc_id, event); 1273 1274 vsw_process_evt_read(ldcp); 1275 1276 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1277 1278 goto vsw_cb_exit; 1279 } 1280 1281 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1282 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1283 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1284 1285 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1286 } 1287 1288 /* 1289 * Catch either LDC_EVT_WRITE which we don't support or any 1290 * unknown event. 1291 */ 1292 if (event & 1293 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1294 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1295 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1296 } 1297 1298 vsw_cb_exit: 1299 mutex_exit(&ldcp->ldc_cblock); 1300 1301 /* 1302 * Let the drain function know we are finishing if it 1303 * is waiting. 1304 */ 1305 mutex_enter(&ldcp->drain_cv_lock); 1306 if (ldcp->drain_state == VSW_LDC_DRAINING) 1307 cv_signal(&ldcp->drain_cv); 1308 mutex_exit(&ldcp->drain_cv_lock); 1309 1310 return (LDC_SUCCESS); 1311 } 1312 1313 /* 1314 * Reinitialise data structures associated with the channel. 1315 */ 1316 static void 1317 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1318 { 1319 vsw_t *vswp = ldcp->ldc_vswp; 1320 vsw_port_t *port; 1321 1322 D1(vswp, "%s: enter", __func__); 1323 1324 port = ldcp->ldc_port; 1325 1326 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1327 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1328 1329 vsw_free_lane_resources(ldcp, INBOUND); 1330 vsw_free_lane_resources(ldcp, OUTBOUND); 1331 1332 ldcp->lane_in.lstate = 0; 1333 ldcp->lane_out.lstate = 0; 1334 1335 /* Remove the fdb entry for this port/mac address */ 1336 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1337 1338 /* remove the port from vlans it has been assigned to */ 1339 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1340 1341 /* 1342 * Remove parent port from any multicast groups 1343 * it may have registered with. Client must resend 1344 * multicast add command after handshake completes. 1345 */ 1346 vsw_del_mcst_port(port); 1347 1348 ldcp->peer_session = 0; 1349 ldcp->session_status = 0; 1350 ldcp->hcnt = 0; 1351 ldcp->hphase = VSW_MILESTONE0; 1352 1353 vsw_reset_vnet_proto_ops(ldcp); 1354 1355 D1(vswp, "%s: exit", __func__); 1356 } 1357 1358 /* 1359 * Process a connection event. 1360 */ 1361 void 1362 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1363 { 1364 vsw_t *vswp = ldcp->ldc_vswp; 1365 vsw_conn_evt_t *conn = NULL; 1366 1367 D1(vswp, "%s: enter", __func__); 1368 1369 /* 1370 * Check if either a reset or restart event is pending 1371 * or in progress. If so just return. 1372 * 1373 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1374 * being received by the callback handler, or a ECONNRESET error 1375 * code being returned from a ldc_read() or ldc_write() call. 1376 * 1377 * A VSW_CONN_RESTART event occurs when some error checking code 1378 * decides that there is a problem with data from the channel, 1379 * and that the handshake should be restarted. 1380 */ 1381 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1382 (ldstub((uint8_t *)&ldcp->reset_active))) 1383 return; 1384 1385 /* 1386 * If it is an LDC_UP event we first check the recorded 1387 * state of the channel. If this is UP then we know that 1388 * the channel moving to the UP state has already been dealt 1389 * with and don't need to dispatch a new task. 1390 * 1391 * The reason for this check is that when we do a ldc_up(), 1392 * depending on the state of the peer, we may or may not get 1393 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1394 * every time we do ldc_up() we explicitly check the channel 1395 * status to see has it come up (ldc_up() is asynch and will 1396 * complete at some undefined time), and take the appropriate 1397 * action. 1398 * 1399 * The flip side of this is that we may get a LDC_UP event 1400 * when we have already seen that the channel is up and have 1401 * dealt with that. 1402 */ 1403 mutex_enter(&ldcp->status_lock); 1404 if (evt == VSW_CONN_UP) { 1405 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1406 mutex_exit(&ldcp->status_lock); 1407 return; 1408 } 1409 } 1410 mutex_exit(&ldcp->status_lock); 1411 1412 /* 1413 * The transaction group id allows us to identify and discard 1414 * any tasks which are still pending on the taskq and refer 1415 * to the handshake session we are about to restart or reset. 1416 * These stale messages no longer have any real meaning. 1417 */ 1418 (void) atomic_inc_32(&ldcp->hss_id); 1419 1420 ASSERT(vswp->taskq_p != NULL); 1421 1422 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1423 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1424 " connection event", vswp->instance); 1425 goto err_exit; 1426 } 1427 1428 conn->evt = evt; 1429 conn->ldcp = ldcp; 1430 1431 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1432 DDI_NOSLEEP) != DDI_SUCCESS) { 1433 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1434 vswp->instance); 1435 1436 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1437 goto err_exit; 1438 } 1439 1440 D1(vswp, "%s: exit", __func__); 1441 return; 1442 1443 err_exit: 1444 /* 1445 * Have mostly likely failed due to memory shortage. Clear the flag so 1446 * that future requests will at least be attempted and will hopefully 1447 * succeed. 1448 */ 1449 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1450 ldcp->reset_active = 0; 1451 } 1452 1453 /* 1454 * Deal with events relating to a connection. Invoked from a taskq. 1455 */ 1456 static void 1457 vsw_conn_task(void *arg) 1458 { 1459 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1460 vsw_ldc_t *ldcp = NULL; 1461 vsw_port_t *portp; 1462 vsw_t *vswp = NULL; 1463 uint16_t evt; 1464 ldc_status_t curr_status; 1465 1466 ldcp = conn->ldcp; 1467 evt = conn->evt; 1468 vswp = ldcp->ldc_vswp; 1469 portp = ldcp->ldc_port; 1470 1471 D1(vswp, "%s: enter", __func__); 1472 1473 /* can safely free now have copied out data */ 1474 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1475 1476 if (ldcp->rcv_thread != NULL) { 1477 vsw_stop_rcv_thread(ldcp); 1478 } else if (ldcp->msg_thread != NULL) { 1479 vsw_stop_msg_thread(ldcp); 1480 } 1481 1482 mutex_enter(&ldcp->status_lock); 1483 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1484 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1485 "channel %ld", vswp->instance, ldcp->ldc_id); 1486 mutex_exit(&ldcp->status_lock); 1487 return; 1488 } 1489 1490 /* 1491 * If we wish to restart the handshake on this channel, then if 1492 * the channel is UP we bring it DOWN to flush the underlying 1493 * ldc queue. 1494 */ 1495 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1496 (void) ldc_down(ldcp->ldc_handle); 1497 1498 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1499 vsw_hio_stop(vswp, ldcp); 1500 } 1501 1502 /* 1503 * re-init all the associated data structures. 1504 */ 1505 vsw_ldc_reinit(ldcp); 1506 1507 /* 1508 * Bring the channel back up (note it does no harm to 1509 * do this even if the channel is already UP, Just 1510 * becomes effectively a no-op). 1511 */ 1512 (void) ldc_up(ldcp->ldc_handle); 1513 1514 /* 1515 * Check if channel is now UP. This will only happen if 1516 * peer has also done a ldc_up(). 1517 */ 1518 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1519 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1520 "channel %ld", vswp->instance, ldcp->ldc_id); 1521 mutex_exit(&ldcp->status_lock); 1522 return; 1523 } 1524 1525 ldcp->ldc_status = curr_status; 1526 1527 /* channel UP so restart handshake by sending version info */ 1528 if (curr_status == LDC_UP) { 1529 if (ldcp->hcnt++ > vsw_num_handshakes) { 1530 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1531 " handshake attempts (%d) on channel %ld", 1532 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1533 mutex_exit(&ldcp->status_lock); 1534 return; 1535 } 1536 1537 if (vsw_obp_ver_proto_workaround == B_FALSE && 1538 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1539 DDI_NOSLEEP) != DDI_SUCCESS)) { 1540 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1541 vswp->instance); 1542 1543 /* 1544 * Don't count as valid restart attempt if couldn't 1545 * send version msg. 1546 */ 1547 if (ldcp->hcnt > 0) 1548 ldcp->hcnt--; 1549 } 1550 } 1551 1552 /* 1553 * Mark that the process is complete by clearing the flag. 1554 * 1555 * Note is it possible that the taskq dispatch above may have failed, 1556 * most likely due to memory shortage. We still clear the flag so 1557 * future attempts will at least be attempted and will hopefully 1558 * succeed. 1559 */ 1560 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1561 ldcp->reset_active = 0; 1562 1563 mutex_exit(&ldcp->status_lock); 1564 1565 D1(vswp, "%s: exit", __func__); 1566 } 1567 1568 /* 1569 * returns 0 if legal for event signified by flag to have 1570 * occured at the time it did. Otherwise returns 1. 1571 */ 1572 int 1573 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1574 { 1575 vsw_t *vswp = ldcp->ldc_vswp; 1576 uint64_t state; 1577 uint64_t phase; 1578 1579 if (dir == INBOUND) 1580 state = ldcp->lane_in.lstate; 1581 else 1582 state = ldcp->lane_out.lstate; 1583 1584 phase = ldcp->hphase; 1585 1586 switch (flag) { 1587 case VSW_VER_INFO_RECV: 1588 if (phase > VSW_MILESTONE0) { 1589 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1590 " when in state %d\n", ldcp->ldc_id, phase); 1591 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1592 return (1); 1593 } 1594 break; 1595 1596 case VSW_VER_ACK_RECV: 1597 case VSW_VER_NACK_RECV: 1598 if (!(state & VSW_VER_INFO_SENT)) { 1599 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1600 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1601 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1602 return (1); 1603 } else 1604 state &= ~VSW_VER_INFO_SENT; 1605 break; 1606 1607 case VSW_ATTR_INFO_RECV: 1608 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1609 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1610 " when in state %d\n", ldcp->ldc_id, phase); 1611 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1612 return (1); 1613 } 1614 break; 1615 1616 case VSW_ATTR_ACK_RECV: 1617 case VSW_ATTR_NACK_RECV: 1618 if (!(state & VSW_ATTR_INFO_SENT)) { 1619 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1620 " or ATTR_NACK when in state %d\n", 1621 ldcp->ldc_id, phase); 1622 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1623 return (1); 1624 } else 1625 state &= ~VSW_ATTR_INFO_SENT; 1626 break; 1627 1628 case VSW_DRING_INFO_RECV: 1629 if (phase < VSW_MILESTONE1) { 1630 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1631 " when in state %d\n", ldcp->ldc_id, phase); 1632 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1633 return (1); 1634 } 1635 break; 1636 1637 case VSW_DRING_ACK_RECV: 1638 case VSW_DRING_NACK_RECV: 1639 if (!(state & VSW_DRING_INFO_SENT)) { 1640 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1641 " or DRING_NACK when in state %d\n", 1642 ldcp->ldc_id, phase); 1643 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1644 return (1); 1645 } else 1646 state &= ~VSW_DRING_INFO_SENT; 1647 break; 1648 1649 case VSW_RDX_INFO_RECV: 1650 if (phase < VSW_MILESTONE3) { 1651 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1652 " when in state %d\n", ldcp->ldc_id, phase); 1653 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1654 return (1); 1655 } 1656 break; 1657 1658 case VSW_RDX_ACK_RECV: 1659 case VSW_RDX_NACK_RECV: 1660 if (!(state & VSW_RDX_INFO_SENT)) { 1661 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1662 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1663 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1664 return (1); 1665 } else 1666 state &= ~VSW_RDX_INFO_SENT; 1667 break; 1668 1669 case VSW_MCST_INFO_RECV: 1670 if (phase < VSW_MILESTONE3) { 1671 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1672 " when in state %d\n", ldcp->ldc_id, phase); 1673 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1674 return (1); 1675 } 1676 break; 1677 1678 default: 1679 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1680 ldcp->ldc_id, flag); 1681 return (1); 1682 } 1683 1684 if (dir == INBOUND) 1685 ldcp->lane_in.lstate = state; 1686 else 1687 ldcp->lane_out.lstate = state; 1688 1689 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1690 1691 return (0); 1692 } 1693 1694 void 1695 vsw_next_milestone(vsw_ldc_t *ldcp) 1696 { 1697 vsw_t *vswp = ldcp->ldc_vswp; 1698 vsw_port_t *portp = ldcp->ldc_port; 1699 lane_t *lane_out = &ldcp->lane_out; 1700 lane_t *lane_in = &ldcp->lane_in; 1701 1702 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1703 ldcp->ldc_id, ldcp->hphase); 1704 1705 DUMP_FLAGS(lane_in->lstate); 1706 DUMP_FLAGS(lane_out->lstate); 1707 1708 switch (ldcp->hphase) { 1709 1710 case VSW_MILESTONE0: 1711 /* 1712 * If we haven't started to handshake with our peer, 1713 * start to do so now. 1714 */ 1715 if (lane_out->lstate == 0) { 1716 D2(vswp, "%s: (chan %lld) starting handshake " 1717 "with peer", __func__, ldcp->ldc_id); 1718 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1719 } 1720 1721 /* 1722 * Only way to pass this milestone is to have successfully 1723 * negotiated version info. 1724 */ 1725 if ((lane_in->lstate & VSW_VER_ACK_SENT) && 1726 (lane_out->lstate & VSW_VER_ACK_RECV)) { 1727 1728 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1729 __func__, ldcp->ldc_id); 1730 1731 vsw_set_vnet_proto_ops(ldcp); 1732 1733 /* 1734 * Next milestone is passed when attribute 1735 * information has been successfully exchanged. 1736 */ 1737 ldcp->hphase = VSW_MILESTONE1; 1738 vsw_send_attr(ldcp); 1739 1740 } 1741 break; 1742 1743 case VSW_MILESTONE1: 1744 /* 1745 * Only way to pass this milestone is to have successfully 1746 * negotiated attribute information, in both directions. 1747 */ 1748 if (!((lane_in->lstate & VSW_ATTR_ACK_SENT) && 1749 (lane_out->lstate & VSW_ATTR_ACK_RECV))) { 1750 break; 1751 } 1752 1753 ldcp->hphase = VSW_MILESTONE2; 1754 1755 /* 1756 * If the peer device has said it wishes to 1757 * use descriptor rings then we send it our ring 1758 * info, otherwise we just set up a private ring 1759 * which we use an internal buffer 1760 */ 1761 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1762 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 1763 (VSW_VER_LT(ldcp, 1, 2) && 1764 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 1765 vsw_send_dring_info(ldcp); 1766 break; 1767 } 1768 1769 /* 1770 * The peer doesn't operate in dring mode; we 1771 * can simply fallthru to the RDX phase from 1772 * here. 1773 */ 1774 /*FALLTHRU*/ 1775 1776 case VSW_MILESTONE2: 1777 /* 1778 * If peer has indicated in its attribute message that 1779 * it wishes to use descriptor rings then the only way 1780 * to pass this milestone is for us to have received 1781 * valid dring info. 1782 * 1783 * If peer is not using descriptor rings then just fall 1784 * through. 1785 */ 1786 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1787 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 1788 (VSW_VER_LT(ldcp, 1, 2) && 1789 (lane_in->xfer_mode == 1790 VIO_DRING_MODE_V1_0))) { 1791 if (!(lane_in->lstate & VSW_DRING_ACK_SENT)) 1792 break; 1793 } 1794 1795 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1796 __func__, ldcp->ldc_id); 1797 1798 ldcp->hphase = VSW_MILESTONE3; 1799 vsw_send_rdx(ldcp); 1800 break; 1801 1802 case VSW_MILESTONE3: 1803 /* 1804 * Pass this milestone when all paramaters have been 1805 * successfully exchanged and RDX sent in both directions. 1806 * 1807 * Mark the relevant lane as available to transmit data. In 1808 * RxDringData mode, lane_in is associated with transmit and 1809 * lane_out is associated with receive. It is the reverse in 1810 * TxDring mode. 1811 */ 1812 if ((lane_out->lstate & VSW_RDX_ACK_SENT) && 1813 (lane_in->lstate & VSW_RDX_ACK_RECV)) { 1814 1815 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1816 __func__, ldcp->ldc_id); 1817 D2(vswp, "%s: ** handshake complete (0x%llx : " 1818 "0x%llx) **", __func__, lane_in->lstate, 1819 lane_out->lstate); 1820 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 1821 lane_in->lstate |= VSW_LANE_ACTIVE; 1822 } else { 1823 lane_out->lstate |= VSW_LANE_ACTIVE; 1824 } 1825 ldcp->hphase = VSW_MILESTONE4; 1826 ldcp->hcnt = 0; 1827 DISPLAY_STATE(); 1828 /* Start HIO if enabled and capable */ 1829 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1830 D2(vswp, "%s: start HybridIO setup", __func__); 1831 vsw_hio_start(vswp, ldcp); 1832 } 1833 1834 if (ldcp->pls_negotiated == B_TRUE) { 1835 /* 1836 * The vnet device has negotiated to get phys 1837 * link updates. Now that the handshake with 1838 * the vnet device is complete, send an initial 1839 * update with the current physical link state. 1840 */ 1841 vsw_send_physlink_msg(ldcp, 1842 vswp->phys_link_state); 1843 } 1844 1845 } else { 1846 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1847 __func__, lane_in->lstate, 1848 lane_out->lstate); 1849 } 1850 break; 1851 1852 case VSW_MILESTONE4: 1853 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1854 ldcp->ldc_id); 1855 break; 1856 1857 default: 1858 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1859 ldcp->ldc_id, ldcp->hphase); 1860 } 1861 1862 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1863 ldcp->hphase); 1864 } 1865 1866 /* 1867 * Check if major version is supported. 1868 * 1869 * Returns 0 if finds supported major number, and if necessary 1870 * adjusts the minor field. 1871 * 1872 * Returns 1 if can't match major number exactly. Sets mjor/minor 1873 * to next lowest support values, or to zero if no other values possible. 1874 */ 1875 static int 1876 vsw_supported_version(vio_ver_msg_t *vp) 1877 { 1878 int i; 1879 1880 D1(NULL, "vsw_supported_version: enter"); 1881 1882 for (i = 0; i < VSW_NUM_VER; i++) { 1883 if (vsw_versions[i].ver_major == vp->ver_major) { 1884 /* 1885 * Matching or lower major version found. Update 1886 * minor number if necessary. 1887 */ 1888 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1889 D2(NULL, "%s: adjusting minor value from %d " 1890 "to %d", __func__, vp->ver_minor, 1891 vsw_versions[i].ver_minor); 1892 vp->ver_minor = vsw_versions[i].ver_minor; 1893 } 1894 1895 return (0); 1896 } 1897 1898 /* 1899 * If the message contains a higher major version number, set 1900 * the message's major/minor versions to the current values 1901 * and return false, so this message will get resent with 1902 * these values. 1903 */ 1904 if (vsw_versions[i].ver_major < vp->ver_major) { 1905 D2(NULL, "%s: adjusting major and minor " 1906 "values to %d, %d\n", 1907 __func__, vsw_versions[i].ver_major, 1908 vsw_versions[i].ver_minor); 1909 vp->ver_major = vsw_versions[i].ver_major; 1910 vp->ver_minor = vsw_versions[i].ver_minor; 1911 return (1); 1912 } 1913 } 1914 1915 /* No match was possible, zero out fields */ 1916 vp->ver_major = 0; 1917 vp->ver_minor = 0; 1918 1919 D1(NULL, "vsw_supported_version: exit"); 1920 1921 return (1); 1922 } 1923 1924 /* 1925 * Set vnet-protocol-version dependent functions based on version. 1926 */ 1927 static void 1928 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1929 { 1930 vsw_t *vswp = ldcp->ldc_vswp; 1931 lane_t *lp = &ldcp->lane_out; 1932 1933 /* 1934 * Setup the appropriate dring data processing routine and any 1935 * associated thread based on the version. 1936 * 1937 * In versions < 1.6, we support only TxDring mode. In this mode, the 1938 * msg worker thread processes all types of VIO msgs (ctrl and data). 1939 * 1940 * In versions >= 1.6, we also support RxDringData mode. In this mode, 1941 * the rcv worker thread processes dring data messages (msgtype: 1942 * VIO_TYPE_DATA, subtype: VIO_SUBTYPE_INFO, env: VIO_DRING_DATA). The 1943 * rest of the data messages (including acks) and ctrl messages are 1944 * handled directly by the callback (intr) thread. 1945 * 1946 * However, for versions >= 1.6, we could still fallback to TxDring 1947 * mode. This could happen if RxDringData mode has been disabled (see 1948 * vsw_dring_mode) on this guest or on the peer guest. This info is 1949 * determined as part of attr exchange phase of handshake. Hence, we 1950 * setup these pointers for v1.6 after attr msg phase completes during 1951 * handshake. 1952 */ 1953 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 1954 /* 1955 * Set data dring mode for vsw_send_attr(). We setup msg worker 1956 * thread in TxDring mode or rcv worker thread in RxDringData 1957 * mode when attr phase of handshake completes. 1958 */ 1959 if (vsw_dring_mode == VIO_RX_DRING_DATA) { 1960 lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING); 1961 } else { 1962 lp->dring_mode = VIO_TX_DRING; 1963 } 1964 } else { 1965 lp->dring_mode = VIO_TX_DRING; 1966 } 1967 1968 /* 1969 * Setup the MTU for attribute negotiation based on the version. 1970 */ 1971 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 1972 /* 1973 * If the version negotiated with peer is >= 1.4(Jumbo Frame 1974 * Support), set the mtu in our attributes to max_frame_size. 1975 */ 1976 lp->mtu = vswp->max_frame_size; 1977 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 1978 /* 1979 * If the version negotiated with peer is == 1.3 (Vlan Tag 1980 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 1981 */ 1982 lp->mtu = ETHERMAX + VLAN_TAGSZ; 1983 } else { 1984 vsw_port_t *portp = ldcp->ldc_port; 1985 /* 1986 * Pre-1.3 peers expect max frame size of ETHERMAX. 1987 * We can negotiate that size with those peers provided only 1988 * pvid is defined for our peer and there are no vids. Then we 1989 * can send/recv only untagged frames of max size ETHERMAX. 1990 * Note that pvid of the peer can be different, as vsw has to 1991 * serve the vnet in that vlan even if itself is not assigned 1992 * to that vlan. 1993 */ 1994 if (portp->nvids == 0) { 1995 lp->mtu = ETHERMAX; 1996 } 1997 } 1998 1999 /* 2000 * Setup version dependent data processing functions. 2001 */ 2002 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2003 /* Versions >= 1.2 */ 2004 2005 if (VSW_PRI_ETH_DEFINED(vswp)) { 2006 /* 2007 * enable priority routines and pkt mode only if 2008 * at least one pri-eth-type is specified in MD. 2009 */ 2010 ldcp->tx = vsw_ldctx_pri; 2011 ldcp->rx_pktdata = vsw_process_pkt_data; 2012 2013 /* set xfer mode for vsw_send_attr() */ 2014 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2015 } else { 2016 /* no priority eth types defined in MD */ 2017 2018 ldcp->tx = vsw_ldctx; 2019 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2020 2021 /* set xfer mode for vsw_send_attr() */ 2022 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2023 } 2024 2025 } else { 2026 /* Versions prior to 1.2 */ 2027 2028 vsw_reset_vnet_proto_ops(ldcp); 2029 } 2030 } 2031 2032 /* 2033 * Reset vnet-protocol-version dependent functions to v1.0. 2034 */ 2035 static void 2036 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2037 { 2038 lane_t *lp = &ldcp->lane_out; 2039 2040 ldcp->tx = vsw_ldctx; 2041 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2042 2043 /* set xfer mode for vsw_send_attr() */ 2044 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2045 } 2046 2047 static void 2048 vsw_process_evt_read(vsw_ldc_t *ldcp) 2049 { 2050 if (ldcp->msg_thread != NULL) { 2051 /* 2052 * TxDring mode; wakeup message worker 2053 * thread to process the VIO messages. 2054 */ 2055 mutex_exit(&ldcp->ldc_cblock); 2056 mutex_enter(&ldcp->msg_thr_lock); 2057 if (!(ldcp->msg_thr_flags & VSW_WTHR_DATARCVD)) { 2058 ldcp->msg_thr_flags |= VSW_WTHR_DATARCVD; 2059 cv_signal(&ldcp->msg_thr_cv); 2060 } 2061 mutex_exit(&ldcp->msg_thr_lock); 2062 mutex_enter(&ldcp->ldc_cblock); 2063 } else { 2064 /* 2065 * We invoke vsw_process_pkt() in the context of the LDC 2066 * callback (vsw_ldc_cb()) during handshake, until the dring 2067 * mode is negotiated. After the dring mode is negotiated, the 2068 * msgs are processed by the msg worker thread (above case) if 2069 * the dring mode is TxDring. Otherwise (in RxDringData mode) 2070 * we continue to process the msgs directly in the callback 2071 * context. 2072 */ 2073 vsw_process_pkt(ldcp); 2074 } 2075 } 2076 2077 /* 2078 * Main routine for processing messages received over LDC. 2079 */ 2080 void 2081 vsw_process_pkt(void *arg) 2082 { 2083 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2084 vsw_t *vswp = ldcp->ldc_vswp; 2085 size_t msglen; 2086 vio_msg_tag_t *tagp; 2087 uint64_t *ldcmsg; 2088 int rv = 0; 2089 2090 2091 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2092 2093 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2094 2095 ldcmsg = ldcp->ldcmsg; 2096 /* 2097 * If channel is up read messages until channel is empty. 2098 */ 2099 do { 2100 msglen = ldcp->msglen; 2101 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2102 2103 if (rv != 0) { 2104 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2105 __func__, ldcp->ldc_id, rv, msglen); 2106 } 2107 2108 /* channel has been reset */ 2109 if (rv == ECONNRESET) { 2110 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2111 break; 2112 } 2113 2114 if (msglen == 0) { 2115 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2116 ldcp->ldc_id); 2117 break; 2118 } 2119 2120 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2121 ldcp->ldc_id, msglen); 2122 2123 /* 2124 * Figure out what sort of packet we have gotten by 2125 * examining the msg tag, and then switch it appropriately. 2126 */ 2127 tagp = (vio_msg_tag_t *)ldcmsg; 2128 2129 switch (tagp->vio_msgtype) { 2130 case VIO_TYPE_CTRL: 2131 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp, msglen); 2132 break; 2133 case VIO_TYPE_DATA: 2134 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2135 break; 2136 case VIO_TYPE_ERR: 2137 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2138 break; 2139 default: 2140 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2141 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2142 break; 2143 } 2144 } while (msglen); 2145 2146 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2147 } 2148 2149 /* 2150 * Dispatch a task to process a VIO control message. 2151 */ 2152 static void 2153 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp, 2154 int msglen) 2155 { 2156 vsw_ctrl_task_t *ctaskp = NULL; 2157 vsw_port_t *port = ldcp->ldc_port; 2158 vsw_t *vswp = port->p_vswp; 2159 2160 D1(vswp, "%s: enter", __func__); 2161 2162 /* 2163 * We need to handle RDX ACK messages in-band as once they 2164 * are exchanged it is possible that we will get an 2165 * immediate (legitimate) data packet. 2166 */ 2167 if ((tagp->vio_subtype_env == VIO_RDX) && 2168 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2169 2170 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2171 return; 2172 2173 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2174 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2175 "(ostate 0x%llx : hphase %d)", __func__, 2176 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2177 vsw_next_milestone(ldcp); 2178 return; 2179 } 2180 2181 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2182 2183 if (ctaskp == NULL) { 2184 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2185 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2186 return; 2187 } 2188 2189 ctaskp->ldcp = ldcp; 2190 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, msglen); 2191 ctaskp->hss_id = ldcp->hss_id; 2192 2193 /* 2194 * Dispatch task to processing taskq if port is not in 2195 * the process of being detached. 2196 */ 2197 mutex_enter(&port->state_lock); 2198 if (port->state == VSW_PORT_INIT) { 2199 if ((vswp->taskq_p == NULL) || 2200 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2201 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2202 mutex_exit(&port->state_lock); 2203 DERR(vswp, "%s: unable to dispatch task to taskq", 2204 __func__); 2205 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2206 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2207 return; 2208 } 2209 } else { 2210 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2211 DWARN(vswp, "%s: port %d detaching, not dispatching " 2212 "task", __func__, port->p_instance); 2213 } 2214 2215 mutex_exit(&port->state_lock); 2216 2217 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2218 ldcp->ldc_id); 2219 D1(vswp, "%s: exit", __func__); 2220 } 2221 2222 /* 2223 * Process a VIO ctrl message. Invoked from taskq. 2224 */ 2225 static void 2226 vsw_process_ctrl_pkt(void *arg) 2227 { 2228 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2229 vsw_ldc_t *ldcp = ctaskp->ldcp; 2230 vsw_t *vswp = ldcp->ldc_vswp; 2231 vio_msg_tag_t tag; 2232 uint16_t env; 2233 2234 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2235 2236 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2237 env = tag.vio_subtype_env; 2238 2239 /* stale pkt check */ 2240 if (ctaskp->hss_id < ldcp->hss_id) { 2241 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2242 " (%ld) handshake session", __func__, ctaskp->hss_id); 2243 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2244 return; 2245 } 2246 2247 /* session id check */ 2248 if (ldcp->session_status & VSW_PEER_SESSION) { 2249 if (ldcp->peer_session != tag.vio_sid) { 2250 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2251 __func__, ldcp->ldc_id, tag.vio_sid); 2252 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2253 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2254 return; 2255 } 2256 } 2257 2258 /* 2259 * Switch on vio_subtype envelope, then let lower routines 2260 * decide if its an INFO, ACK or NACK packet. 2261 */ 2262 switch (env) { 2263 case VIO_VER_INFO: 2264 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2265 break; 2266 case VIO_DRING_REG: 2267 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2268 break; 2269 case VIO_DRING_UNREG: 2270 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2271 break; 2272 case VIO_ATTR_INFO: 2273 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2274 break; 2275 case VNET_MCAST_INFO: 2276 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2277 break; 2278 case VIO_RDX: 2279 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2280 break; 2281 case VIO_DDS_INFO: 2282 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2283 break; 2284 2285 case VNET_PHYSLINK_INFO: 2286 vsw_process_physlink_msg(ldcp, &ctaskp->pktp); 2287 break; 2288 default: 2289 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2290 } 2291 2292 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2293 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2294 } 2295 2296 /* 2297 * Version negotiation. We can end up here either because our peer 2298 * has responded to a handshake message we have sent it, or our peer 2299 * has initiated a handshake with us. If its the former then can only 2300 * be ACK or NACK, if its the later can only be INFO. 2301 * 2302 * If its an ACK we move to the next stage of the handshake, namely 2303 * attribute exchange. If its a NACK we see if we can specify another 2304 * version, if we can't we stop. 2305 * 2306 * If it is an INFO we reset all params associated with communication 2307 * in that direction over this channel (remember connection is 2308 * essentially 2 independent simplex channels). 2309 */ 2310 void 2311 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2312 { 2313 vio_ver_msg_t *ver_pkt; 2314 vsw_t *vswp = ldcp->ldc_vswp; 2315 2316 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2317 2318 /* 2319 * We know this is a ctrl/version packet so 2320 * cast it into the correct structure. 2321 */ 2322 ver_pkt = (vio_ver_msg_t *)pkt; 2323 2324 switch (ver_pkt->tag.vio_subtype) { 2325 case VIO_SUBTYPE_INFO: 2326 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2327 2328 /* 2329 * Record the session id, which we will use from now 2330 * until we see another VER_INFO msg. Even then the 2331 * session id in most cases will be unchanged, execpt 2332 * if channel was reset. 2333 */ 2334 if ((ldcp->session_status & VSW_PEER_SESSION) && 2335 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2336 DERR(vswp, "%s: updating session id for chan %lld " 2337 "from %llx to %llx", __func__, ldcp->ldc_id, 2338 ldcp->peer_session, ver_pkt->tag.vio_sid); 2339 } 2340 2341 ldcp->peer_session = ver_pkt->tag.vio_sid; 2342 ldcp->session_status |= VSW_PEER_SESSION; 2343 2344 /* Legal message at this time ? */ 2345 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2346 return; 2347 2348 /* 2349 * First check the device class. Currently only expect 2350 * to be talking to a network device. In the future may 2351 * also talk to another switch. 2352 */ 2353 if (ver_pkt->dev_class != VDEV_NETWORK) { 2354 DERR(vswp, "%s: illegal device class %d", __func__, 2355 ver_pkt->dev_class); 2356 2357 ver_pkt->tag.vio_sid = ldcp->local_session; 2358 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2359 2360 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2361 2362 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2363 sizeof (vio_ver_msg_t), B_TRUE); 2364 2365 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2366 vsw_next_milestone(ldcp); 2367 return; 2368 } else { 2369 ldcp->dev_class = ver_pkt->dev_class; 2370 } 2371 2372 /* 2373 * Now check the version. 2374 */ 2375 if (vsw_supported_version(ver_pkt) == 0) { 2376 /* 2377 * Support this major version and possibly 2378 * adjusted minor version. 2379 */ 2380 2381 D2(vswp, "%s: accepted ver %d:%d", __func__, 2382 ver_pkt->ver_major, ver_pkt->ver_minor); 2383 2384 /* Store accepted values */ 2385 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2386 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2387 2388 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2389 2390 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2391 2392 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2393 /* 2394 * Send a version info message 2395 * using the accepted version that 2396 * we are about to ack. Also note that 2397 * we send our ver info before we ack. 2398 * Otherwise, as soon as receiving the 2399 * ack, obp sends attr info msg, which 2400 * breaks vsw_check_flag() invoked 2401 * from vsw_process_ctrl_attr_pkt(); 2402 * as we also need VSW_VER_ACK_RECV to 2403 * be set in lane_out.lstate, before 2404 * we can receive attr info. 2405 */ 2406 vsw_send_ver(ldcp); 2407 } 2408 } else { 2409 /* 2410 * NACK back with the next lower major/minor 2411 * pairing we support (if don't suuport any more 2412 * versions then they will be set to zero. 2413 */ 2414 2415 D2(vswp, "%s: replying with ver %d:%d", __func__, 2416 ver_pkt->ver_major, ver_pkt->ver_minor); 2417 2418 /* Store updated values */ 2419 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2420 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2421 2422 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2423 2424 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2425 } 2426 2427 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2428 ver_pkt->tag.vio_sid = ldcp->local_session; 2429 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2430 sizeof (vio_ver_msg_t), B_TRUE); 2431 2432 vsw_next_milestone(ldcp); 2433 break; 2434 2435 case VIO_SUBTYPE_ACK: 2436 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2437 2438 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2439 return; 2440 2441 /* Store updated values */ 2442 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2443 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2444 2445 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2446 vsw_next_milestone(ldcp); 2447 2448 break; 2449 2450 case VIO_SUBTYPE_NACK: 2451 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2452 2453 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2454 return; 2455 2456 /* 2457 * If our peer sent us a NACK with the ver fields set to 2458 * zero then there is nothing more we can do. Otherwise see 2459 * if we support either the version suggested, or a lesser 2460 * one. 2461 */ 2462 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2463 DERR(vswp, "%s: peer unable to negotiate any " 2464 "further.", __func__); 2465 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2466 vsw_next_milestone(ldcp); 2467 return; 2468 } 2469 2470 /* 2471 * Check to see if we support this major version or 2472 * a lower one. If we don't then maj/min will be set 2473 * to zero. 2474 */ 2475 (void) vsw_supported_version(ver_pkt); 2476 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2477 /* Nothing more we can do */ 2478 DERR(vswp, "%s: version negotiation failed.\n", 2479 __func__); 2480 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2481 vsw_next_milestone(ldcp); 2482 } else { 2483 /* found a supported major version */ 2484 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2485 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2486 2487 D2(vswp, "%s: resending with updated values (%x, %x)", 2488 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2489 2490 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2491 ver_pkt->tag.vio_sid = ldcp->local_session; 2492 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2493 2494 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2495 2496 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2497 sizeof (vio_ver_msg_t), B_TRUE); 2498 2499 vsw_next_milestone(ldcp); 2500 2501 } 2502 break; 2503 2504 default: 2505 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2506 ver_pkt->tag.vio_subtype); 2507 } 2508 2509 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2510 } 2511 2512 static int 2513 vsw_process_attr_info(vsw_ldc_t *ldcp, vnet_attr_msg_t *msg) 2514 { 2515 vsw_t *vswp = ldcp->ldc_vswp; 2516 vsw_port_t *port = ldcp->ldc_port; 2517 struct ether_addr ea; 2518 uint64_t macaddr = 0; 2519 lane_t *lane_out = &ldcp->lane_out; 2520 lane_t *lane_in = &ldcp->lane_in; 2521 uint32_t mtu; 2522 int i; 2523 uint8_t dring_mode; 2524 2525 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2526 2527 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) { 2528 return (1); 2529 } 2530 2531 if ((msg->xfer_mode != VIO_DESC_MODE) && 2532 (msg->xfer_mode != lane_out->xfer_mode)) { 2533 D2(NULL, "%s: unknown mode %x\n", __func__, msg->xfer_mode); 2534 return (1); 2535 } 2536 2537 /* Only support MAC addresses at moment. */ 2538 if ((msg->addr_type != ADDR_TYPE_MAC) || (msg->addr == 0)) { 2539 D2(NULL, "%s: invalid addr_type %x, or address 0x%llx\n", 2540 __func__, msg->addr_type, msg->addr); 2541 return (1); 2542 } 2543 2544 /* 2545 * MAC address supplied by device should match that stored 2546 * in the vsw-port OBP node. Need to decide what to do if they 2547 * don't match, for the moment just warn but don't fail. 2548 */ 2549 vnet_macaddr_ultostr(msg->addr, ea.ether_addr_octet); 2550 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 2551 DERR(NULL, "%s: device supplied address " 2552 "0x%llx doesn't match node address 0x%llx\n", 2553 __func__, msg->addr, port->p_macaddr); 2554 } 2555 2556 /* 2557 * Ack freq only makes sense in pkt mode, in shared 2558 * mode the ring descriptors say whether or not to 2559 * send back an ACK. 2560 */ 2561 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2562 (msg->xfer_mode & VIO_DRING_MODE_V1_2)) || 2563 (VSW_VER_LT(ldcp, 1, 2) && 2564 (msg->xfer_mode == VIO_DRING_MODE_V1_0))) { 2565 if (msg->ack_freq > 0) { 2566 D2(NULL, "%s: non zero ack freq in SHM mode\n", 2567 __func__); 2568 return (1); 2569 } 2570 } 2571 2572 /* 2573 * Process dring mode attribute. 2574 */ 2575 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 2576 /* 2577 * Versions >= 1.6: 2578 * Though we are operating in v1.6 mode, it is possible that 2579 * RxDringData mode has been disabled either on this guest or 2580 * on the peer guest. If so, we revert to pre v1.6 behavior of 2581 * TxDring mode. But this must be agreed upon in both 2582 * directions of attr exchange. We first determine the mode 2583 * that can be negotiated. 2584 */ 2585 if ((msg->options & VIO_RX_DRING_DATA) != 0 && 2586 vsw_dring_mode == VIO_RX_DRING_DATA) { 2587 /* 2588 * The peer is capable of handling RxDringData AND we 2589 * are also capable of it; we enable RxDringData mode 2590 * on this channel. 2591 */ 2592 dring_mode = VIO_RX_DRING_DATA; 2593 } else if ((msg->options & VIO_TX_DRING) != 0) { 2594 /* 2595 * If the peer is capable of TxDring mode, we 2596 * negotiate TxDring mode on this channel. 2597 */ 2598 dring_mode = VIO_TX_DRING; 2599 } else { 2600 /* 2601 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA 2602 * modes. We don't support VIO_RX_DRING mode. 2603 */ 2604 return (1); 2605 } 2606 2607 /* 2608 * If we have received an ack for the attr info that we sent, 2609 * then check if the dring mode matches what the peer had ack'd 2610 * (saved in lane_out). If they don't match, we fail the 2611 * handshake. 2612 */ 2613 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2614 if (msg->options != lane_out->dring_mode) { 2615 /* send NACK */ 2616 return (1); 2617 } 2618 } else { 2619 /* 2620 * Save the negotiated dring mode in our attr 2621 * parameters, so it gets sent in the attr info from us 2622 * to the peer. 2623 */ 2624 lane_out->dring_mode = dring_mode; 2625 } 2626 2627 /* save the negotiated dring mode in the msg to be replied */ 2628 msg->options = dring_mode; 2629 } 2630 2631 /* 2632 * Process MTU attribute. 2633 */ 2634 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2635 /* 2636 * Versions >= 1.4: 2637 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu 2638 * is negotiated down to the minimum of our mtu and peer's mtu. 2639 */ 2640 if (msg->mtu < ETHERMAX) { 2641 return (1); 2642 } 2643 2644 mtu = MIN(msg->mtu, vswp->max_frame_size); 2645 2646 /* 2647 * If we have received an ack for the attr info 2648 * that we sent, then check if the mtu computed 2649 * above matches the mtu that the peer had ack'd 2650 * (saved in local hparams). If they don't 2651 * match, we fail the handshake. 2652 */ 2653 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2654 if (mtu != lane_out->mtu) { 2655 /* send NACK */ 2656 return (1); 2657 } 2658 } else { 2659 /* 2660 * Save the mtu computed above in our 2661 * attr parameters, so it gets sent in 2662 * the attr info from us to the peer. 2663 */ 2664 lane_out->mtu = mtu; 2665 } 2666 2667 /* save the MIN mtu in the msg to be replied */ 2668 msg->mtu = mtu; 2669 } else { 2670 /* Versions < 1.4, mtu must match */ 2671 if (msg->mtu != lane_out->mtu) { 2672 D2(NULL, "%s: invalid MTU (0x%llx)\n", 2673 __func__, msg->mtu); 2674 return (1); 2675 } 2676 } 2677 2678 /* 2679 * Otherwise store attributes for this lane and update 2680 * lane state. 2681 */ 2682 lane_in->mtu = msg->mtu; 2683 lane_in->addr = msg->addr; 2684 lane_in->addr_type = msg->addr_type; 2685 lane_in->xfer_mode = msg->xfer_mode; 2686 lane_in->ack_freq = msg->ack_freq; 2687 lane_in->physlink_update = msg->physlink_update; 2688 lane_in->dring_mode = msg->options; 2689 2690 /* 2691 * Check if the client has requested physlink state updates. 2692 * If there is a physical device bound to this vswitch (L2 2693 * mode), set the ack bits to indicate it is supported. 2694 * Otherwise, set the nack bits. 2695 */ 2696 if (VSW_VER_GTEQ(ldcp, 1, 5)) { /* Protocol ver >= 1.5 */ 2697 2698 /* Does the vnet need phys link state updates ? */ 2699 if ((lane_in->physlink_update & 2700 PHYSLINK_UPDATE_STATE_MASK) == 2701 PHYSLINK_UPDATE_STATE) { 2702 2703 if (vswp->smode & VSW_LAYER2) { 2704 /* is a net-dev assigned to us ? */ 2705 msg->physlink_update = 2706 PHYSLINK_UPDATE_STATE_ACK; 2707 ldcp->pls_negotiated = B_TRUE; 2708 } else { 2709 /* not in L2 mode */ 2710 msg->physlink_update = 2711 PHYSLINK_UPDATE_STATE_NACK; 2712 ldcp->pls_negotiated = B_FALSE; 2713 } 2714 2715 } else { 2716 msg->physlink_update = 2717 PHYSLINK_UPDATE_NONE; 2718 ldcp->pls_negotiated = B_FALSE; 2719 } 2720 2721 } else { 2722 /* 2723 * physlink_update bits are ignored 2724 * if set by clients < v1.5 protocol. 2725 */ 2726 msg->physlink_update = PHYSLINK_UPDATE_NONE; 2727 ldcp->pls_negotiated = B_FALSE; 2728 } 2729 2730 macaddr = lane_in->addr; 2731 for (i = ETHERADDRL - 1; i >= 0; i--) { 2732 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2733 macaddr >>= 8; 2734 } 2735 2736 /* create the fdb entry for this port/mac address */ 2737 vsw_fdbe_add(vswp, port); 2738 2739 /* add the port to the specified vlans */ 2740 vsw_vlan_add_ids(port, VSW_VNETPORT); 2741 2742 /* 2743 * Setup device specific xmit routines. Note this could be changed 2744 * further in vsw_send_dring_info() for versions >= 1.6 if operating in 2745 * RxDringData mode. 2746 */ 2747 mutex_enter(&port->tx_lock); 2748 2749 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2750 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2751 (VSW_VER_LT(ldcp, 1, 2) && 2752 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2753 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2754 port->transmit = vsw_dringsend; 2755 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2756 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2757 vsw_create_privring(ldcp); 2758 port->transmit = vsw_descrsend; 2759 lane_out->xfer_mode = VIO_DESC_MODE; 2760 } 2761 2762 /* 2763 * HybridIO is supported only vnet, not by OBP. 2764 * So, set hio_capable to true only when in DRING mode. 2765 */ 2766 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2767 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2768 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2769 } else { 2770 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2771 } 2772 2773 mutex_exit(&port->tx_lock); 2774 2775 return (0); 2776 } 2777 2778 static int 2779 vsw_process_attr_ack(vsw_ldc_t *ldcp, vnet_attr_msg_t *msg) 2780 { 2781 vsw_t *vswp = ldcp->ldc_vswp; 2782 lane_t *lane_out = &ldcp->lane_out; 2783 lane_t *lane_in = &ldcp->lane_in; 2784 2785 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2786 2787 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) { 2788 return (1); 2789 } 2790 2791 /* 2792 * Process dring mode attribute. 2793 */ 2794 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 2795 /* 2796 * Versions >= 1.6: 2797 * The ack msg sent by the peer contains the negotiated dring 2798 * mode between our capability (that we had sent in our attr 2799 * info) and the peer's capability. 2800 */ 2801 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2802 /* 2803 * If we have sent an ack for the attr info msg from 2804 * the peer, check if the dring mode that was 2805 * negotiated then (saved in lane_out) matches the 2806 * mode that the peer has ack'd. If they don't match, 2807 * we fail the handshake. 2808 */ 2809 if (lane_out->dring_mode != msg->options) { 2810 return (1); 2811 } 2812 } else { 2813 if ((msg->options & lane_out->dring_mode) == 0) { 2814 /* 2815 * Peer ack'd with a mode that we don't 2816 * support; we fail the handshake. 2817 */ 2818 return (1); 2819 } 2820 if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA)) 2821 == (VIO_TX_DRING|VIO_RX_DRING_DATA)) { 2822 /* 2823 * Peer must ack with only one negotiated mode. 2824 * Otherwise fail handshake. 2825 */ 2826 return (1); 2827 } 2828 2829 /* 2830 * Save the negotiated mode, so we can validate it when 2831 * we receive attr info from the peer. 2832 */ 2833 lane_out->dring_mode = msg->options; 2834 } 2835 } 2836 2837 /* 2838 * Process MTU attribute. 2839 */ 2840 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2841 /* 2842 * Versions >= 1.4: 2843 * The ack msg sent by the peer contains the minimum of 2844 * our mtu (that we had sent in our attr info) and the 2845 * peer's mtu. 2846 * 2847 * If we have sent an ack for the attr info msg from 2848 * the peer, check if the mtu that was computed then 2849 * (saved in lane_out params) matches the mtu that the 2850 * peer has ack'd. If they don't match, we fail the 2851 * handshake. 2852 */ 2853 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2854 if (lane_out->mtu != msg->mtu) { 2855 return (1); 2856 } 2857 } else { 2858 /* 2859 * If the mtu ack'd by the peer is > our mtu 2860 * fail handshake. Otherwise, save the mtu, so 2861 * we can validate it when we receive attr info 2862 * from our peer. 2863 */ 2864 if (msg->mtu <= lane_out->mtu) { 2865 lane_out->mtu = msg->mtu; 2866 } else { 2867 return (1); 2868 } 2869 } 2870 } 2871 2872 return (0); 2873 } 2874 2875 /* 2876 * Process an attribute packet. We can end up here either because our peer 2877 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2878 * peer has sent us an attribute INFO message 2879 * 2880 * If its an ACK we then move to the next stage of the handshake which 2881 * is to send our descriptor ring info to our peer. If its a NACK then 2882 * there is nothing more we can (currently) do. 2883 * 2884 * If we get a valid/acceptable INFO packet (and we have already negotiated 2885 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2886 * NACK back and reset channel state to INACTIV. 2887 * 2888 * FUTURE: in time we will probably negotiate over attributes, but for 2889 * the moment unacceptable attributes are regarded as a fatal error. 2890 * 2891 */ 2892 void 2893 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2894 { 2895 vnet_attr_msg_t *attr_pkt; 2896 vsw_t *vswp = ldcp->ldc_vswp; 2897 lane_t *lane_out = &ldcp->lane_out; 2898 lane_t *lane_in = &ldcp->lane_in; 2899 int rv; 2900 2901 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2902 2903 /* 2904 * We know this is a ctrl/attr packet so 2905 * cast it into the correct structure. 2906 */ 2907 attr_pkt = (vnet_attr_msg_t *)pkt; 2908 2909 switch (attr_pkt->tag.vio_subtype) { 2910 case VIO_SUBTYPE_INFO: 2911 2912 rv = vsw_process_attr_info(ldcp, attr_pkt); 2913 if (rv != 0) { 2914 vsw_free_lane_resources(ldcp, INBOUND); 2915 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2916 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2917 } else { 2918 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2919 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2920 } 2921 attr_pkt->tag.vio_sid = ldcp->local_session; 2922 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2923 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2924 sizeof (vnet_attr_msg_t), B_TRUE); 2925 vsw_next_milestone(ldcp); 2926 break; 2927 2928 case VIO_SUBTYPE_ACK: 2929 2930 rv = vsw_process_attr_ack(ldcp, attr_pkt); 2931 if (rv != 0) { 2932 return; 2933 } 2934 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2935 vsw_next_milestone(ldcp); 2936 break; 2937 2938 case VIO_SUBTYPE_NACK: 2939 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2940 2941 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2942 return; 2943 2944 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2945 vsw_next_milestone(ldcp); 2946 break; 2947 2948 default: 2949 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2950 attr_pkt->tag.vio_subtype); 2951 } 2952 2953 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2954 } 2955 2956 static int 2957 vsw_process_dring_reg_info(vsw_ldc_t *ldcp, vio_msg_tag_t *tagp) 2958 { 2959 int rv; 2960 vsw_t *vswp = ldcp->ldc_vswp; 2961 lane_t *lp = &ldcp->lane_out; 2962 dring_info_t *dp = NULL; 2963 2964 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2965 2966 rv = vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV); 2967 if (rv != 0) { 2968 return (1); 2969 } 2970 2971 if (VSW_VER_GTEQ(ldcp, 1, 6) && 2972 (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) { 2973 /* 2974 * The earlier version of Solaris vnet driver doesn't set the 2975 * option (VIO_TX_DRING in its case) correctly in its dring reg 2976 * message. We workaround that here by doing the check only 2977 * for versions >= v1.6. 2978 */ 2979 DWARN(vswp, "%s(%lld): Rcvd dring reg option (%d), " 2980 "negotiated mode (%d)\n", __func__, ldcp->ldc_id, 2981 ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode); 2982 return (1); 2983 } 2984 2985 /* 2986 * Map dring exported by the peer. 2987 */ 2988 dp = vsw_map_dring(ldcp, (void *)tagp); 2989 if (dp == NULL) { 2990 return (1); 2991 } 2992 2993 /* 2994 * Map data buffers exported by the peer if we are in RxDringData mode. 2995 */ 2996 if (lp->dring_mode == VIO_RX_DRING_DATA) { 2997 rv = vsw_map_data(ldcp, dp, (void *)tagp); 2998 if (rv != 0) { 2999 vsw_unmap_dring(ldcp); 3000 return (1); 3001 } 3002 } 3003 3004 return (0); 3005 } 3006 3007 static int 3008 vsw_process_dring_reg_ack(vsw_ldc_t *ldcp, vio_msg_tag_t *tagp) 3009 { 3010 vsw_t *vswp = ldcp->ldc_vswp; 3011 dring_info_t *dp; 3012 3013 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3014 3015 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) { 3016 return (1); 3017 } 3018 3019 dp = ldcp->lane_out.dringp; 3020 3021 /* save dring_ident acked by peer */ 3022 dp->ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident; 3023 3024 return (0); 3025 } 3026 3027 /* 3028 * Process a dring info packet. We can end up here either because our peer 3029 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3030 * peer has sent us a dring INFO message. 3031 * 3032 * If we get a valid/acceptable INFO packet (and we have already negotiated 3033 * a version) we ACK back and update the lane state, otherwise we NACK back. 3034 * 3035 * FUTURE: nothing to stop client from sending us info on multiple dring's 3036 * but for the moment we will just use the first one we are given. 3037 * 3038 */ 3039 void 3040 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3041 { 3042 int rv; 3043 int msgsize; 3044 dring_info_t *dp; 3045 vio_msg_tag_t *tagp = (vio_msg_tag_t *)pkt; 3046 vsw_t *vswp = ldcp->ldc_vswp; 3047 lane_t *lane_out = &ldcp->lane_out; 3048 lane_t *lane_in = &ldcp->lane_in; 3049 3050 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3051 3052 switch (tagp->vio_subtype) { 3053 case VIO_SUBTYPE_INFO: 3054 rv = vsw_process_dring_reg_info(ldcp, tagp); 3055 if (rv != 0) { 3056 vsw_free_lane_resources(ldcp, INBOUND); 3057 tagp->vio_subtype = VIO_SUBTYPE_NACK; 3058 lane_in->lstate |= VSW_DRING_NACK_SENT; 3059 } else { 3060 tagp->vio_subtype = VIO_SUBTYPE_ACK; 3061 lane_in->lstate |= VSW_DRING_ACK_SENT; 3062 } 3063 tagp->vio_sid = ldcp->local_session; 3064 DUMP_TAG_PTR(tagp); 3065 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 3066 dp = lane_in->dringp; 3067 msgsize = 3068 VNET_DRING_REG_EXT_MSG_SIZE(dp->data_ncookies); 3069 } else { 3070 msgsize = sizeof (vio_dring_reg_msg_t); 3071 } 3072 (void) vsw_send_msg(ldcp, (void *)tagp, msgsize, B_TRUE); 3073 vsw_next_milestone(ldcp); 3074 break; 3075 3076 case VIO_SUBTYPE_ACK: 3077 rv = vsw_process_dring_reg_ack(ldcp, tagp); 3078 if (rv != 0) { 3079 return; 3080 } 3081 lane_out->lstate |= VSW_DRING_ACK_RECV; 3082 vsw_next_milestone(ldcp); 3083 break; 3084 3085 case VIO_SUBTYPE_NACK: 3086 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3087 3088 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3089 return; 3090 3091 lane_out->lstate |= VSW_DRING_NACK_RECV; 3092 vsw_next_milestone(ldcp); 3093 break; 3094 3095 default: 3096 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3097 tagp->vio_subtype); 3098 } 3099 3100 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3101 } 3102 3103 /* 3104 * Process a request from peer to unregister a dring. 3105 * 3106 * For the moment we just restart the handshake if our 3107 * peer endpoint attempts to unregister a dring. 3108 */ 3109 void 3110 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3111 { 3112 vsw_t *vswp = ldcp->ldc_vswp; 3113 vio_dring_unreg_msg_t *dring_pkt; 3114 3115 /* 3116 * We know this is a ctrl/dring packet so 3117 * cast it into the correct structure. 3118 */ 3119 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3120 3121 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3122 3123 switch (dring_pkt->tag.vio_subtype) { 3124 case VIO_SUBTYPE_INFO: 3125 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3126 3127 DWARN(vswp, "%s: restarting handshake..", __func__); 3128 break; 3129 3130 case VIO_SUBTYPE_ACK: 3131 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3132 3133 DWARN(vswp, "%s: restarting handshake..", __func__); 3134 break; 3135 3136 case VIO_SUBTYPE_NACK: 3137 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3138 3139 DWARN(vswp, "%s: restarting handshake..", __func__); 3140 break; 3141 3142 default: 3143 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3144 dring_pkt->tag.vio_subtype); 3145 } 3146 3147 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3148 3149 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3150 } 3151 3152 #define SND_MCST_NACK(ldcp, pkt) \ 3153 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3154 pkt->tag.vio_sid = ldcp->local_session; \ 3155 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3156 sizeof (vnet_mcast_msg_t), B_TRUE); 3157 3158 /* 3159 * Process a multicast request from a vnet. 3160 * 3161 * Vnet's specify a multicast address that they are interested in. This 3162 * address is used as a key into the hash table which forms the multicast 3163 * forwarding database (mFDB). 3164 * 3165 * The table keys are the multicast addresses, while the table entries 3166 * are pointers to lists of ports which wish to receive packets for the 3167 * specified multicast address. 3168 * 3169 * When a multicast packet is being switched we use the address as a key 3170 * into the hash table, and then walk the appropriate port list forwarding 3171 * the pkt to each port in turn. 3172 * 3173 * If a vnet is no longer interested in a particular multicast grouping 3174 * we simply find the correct location in the hash table and then delete 3175 * the relevant port from the port list. 3176 * 3177 * To deal with the case whereby a port is being deleted without first 3178 * removing itself from the lists in the hash table, we maintain a list 3179 * of multicast addresses the port has registered an interest in, within 3180 * the port structure itself. We then simply walk that list of addresses 3181 * using them as keys into the hash table and remove the port from the 3182 * appropriate lists. 3183 */ 3184 static void 3185 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3186 { 3187 vnet_mcast_msg_t *mcst_pkt; 3188 vsw_port_t *port = ldcp->ldc_port; 3189 vsw_t *vswp = ldcp->ldc_vswp; 3190 int i; 3191 3192 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3193 3194 /* 3195 * We know this is a ctrl/mcast packet so 3196 * cast it into the correct structure. 3197 */ 3198 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3199 3200 switch (mcst_pkt->tag.vio_subtype) { 3201 case VIO_SUBTYPE_INFO: 3202 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3203 3204 /* 3205 * Check if in correct state to receive a multicast 3206 * message (i.e. handshake complete). If not reset 3207 * the handshake. 3208 */ 3209 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3210 return; 3211 3212 /* 3213 * Before attempting to add or remove address check 3214 * that they are valid multicast addresses. 3215 * If not, then NACK back. 3216 */ 3217 for (i = 0; i < mcst_pkt->count; i++) { 3218 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3219 DERR(vswp, "%s: invalid multicast address", 3220 __func__); 3221 SND_MCST_NACK(ldcp, mcst_pkt); 3222 return; 3223 } 3224 } 3225 3226 /* 3227 * Now add/remove the addresses. If this fails we 3228 * NACK back. 3229 */ 3230 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3231 SND_MCST_NACK(ldcp, mcst_pkt); 3232 return; 3233 } 3234 3235 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3236 mcst_pkt->tag.vio_sid = ldcp->local_session; 3237 3238 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3239 3240 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3241 sizeof (vnet_mcast_msg_t), B_TRUE); 3242 break; 3243 3244 case VIO_SUBTYPE_ACK: 3245 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3246 3247 /* 3248 * We shouldn't ever get a multicast ACK message as 3249 * at the moment we never request multicast addresses 3250 * to be set on some other device. This may change in 3251 * the future if we have cascading switches. 3252 */ 3253 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3254 return; 3255 3256 /* Do nothing */ 3257 break; 3258 3259 case VIO_SUBTYPE_NACK: 3260 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3261 3262 /* 3263 * We shouldn't get a multicast NACK packet for the 3264 * same reasons as we shouldn't get a ACK packet. 3265 */ 3266 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3267 return; 3268 3269 /* Do nothing */ 3270 break; 3271 3272 default: 3273 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3274 mcst_pkt->tag.vio_subtype); 3275 } 3276 3277 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3278 } 3279 3280 static void 3281 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3282 { 3283 vio_rdx_msg_t *rdx_pkt; 3284 vsw_t *vswp = ldcp->ldc_vswp; 3285 3286 /* 3287 * We know this is a ctrl/rdx packet so 3288 * cast it into the correct structure. 3289 */ 3290 rdx_pkt = (vio_rdx_msg_t *)pkt; 3291 3292 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3293 3294 switch (rdx_pkt->tag.vio_subtype) { 3295 case VIO_SUBTYPE_INFO: 3296 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3297 3298 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3299 return; 3300 3301 rdx_pkt->tag.vio_sid = ldcp->local_session; 3302 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3303 3304 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3305 3306 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3307 3308 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3309 sizeof (vio_rdx_msg_t), B_TRUE); 3310 3311 vsw_next_milestone(ldcp); 3312 break; 3313 3314 case VIO_SUBTYPE_ACK: 3315 /* 3316 * Should be handled in-band by callback handler. 3317 */ 3318 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3319 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3320 break; 3321 3322 case VIO_SUBTYPE_NACK: 3323 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3324 3325 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3326 return; 3327 3328 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3329 vsw_next_milestone(ldcp); 3330 break; 3331 3332 default: 3333 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3334 rdx_pkt->tag.vio_subtype); 3335 } 3336 3337 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3338 } 3339 3340 static void 3341 vsw_process_physlink_msg(vsw_ldc_t *ldcp, void *pkt) 3342 { 3343 vnet_physlink_msg_t *msgp; 3344 vsw_t *vswp = ldcp->ldc_vswp; 3345 3346 msgp = (vnet_physlink_msg_t *)pkt; 3347 3348 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3349 3350 switch (msgp->tag.vio_subtype) { 3351 case VIO_SUBTYPE_INFO: 3352 3353 /* vsw shouldn't recv physlink info */ 3354 DWARN(vswp, "%s: Unexpected VIO_SUBTYPE_INFO", __func__); 3355 break; 3356 3357 case VIO_SUBTYPE_ACK: 3358 3359 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3360 break; 3361 3362 case VIO_SUBTYPE_NACK: 3363 3364 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3365 break; 3366 3367 default: 3368 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3369 msgp->tag.vio_subtype); 3370 } 3371 3372 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3373 } 3374 3375 static void 3376 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3377 uint32_t msglen) 3378 { 3379 uint16_t env = tagp->vio_subtype_env; 3380 vsw_t *vswp = ldcp->ldc_vswp; 3381 lane_t *lp = &ldcp->lane_out; 3382 uint8_t dring_mode = lp->dring_mode; 3383 3384 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3385 3386 /* session id check */ 3387 if (ldcp->session_status & VSW_PEER_SESSION) { 3388 if (ldcp->peer_session != tagp->vio_sid) { 3389 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3390 __func__, ldcp->ldc_id, tagp->vio_sid); 3391 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3392 return; 3393 } 3394 } 3395 3396 /* 3397 * It is an error for us to be getting data packets 3398 * before the handshake has completed. 3399 */ 3400 if (ldcp->hphase != VSW_MILESTONE4) { 3401 DERR(vswp, "%s: got data packet before handshake complete " 3402 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3403 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3404 DUMP_FLAGS(ldcp->lane_in.lstate); 3405 DUMP_FLAGS(ldcp->lane_out.lstate); 3406 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3407 return; 3408 } 3409 if (dring_mode == VIO_TX_DRING) { 3410 /* 3411 * To reduce the locking contention, release the ldc_cblock 3412 * here and re-acquire it once we are done receiving packets. 3413 * We do this only in TxDring mode to allow further callbaks to 3414 * continue while the msg worker thread processes the messages. 3415 * In RxDringData mode, we process the messages in the callback 3416 * itself and wake up rcv worker thread to process only data 3417 * info messages. 3418 */ 3419 mutex_exit(&ldcp->ldc_cblock); 3420 mutex_enter(&ldcp->ldc_rxlock); 3421 } 3422 3423 /* 3424 * Switch on vio_subtype envelope, then let lower routines 3425 * decide if its an INFO, ACK or NACK packet. 3426 */ 3427 if (env == VIO_DRING_DATA) { 3428 ldcp->rx_dringdata(ldcp, dpkt); 3429 } else if (env == VIO_PKT_DATA) { 3430 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3431 } else if (env == VIO_DESC_DATA) { 3432 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3433 } else { 3434 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", 3435 __func__, env); 3436 } 3437 3438 if (dring_mode == VIO_TX_DRING) { 3439 mutex_exit(&ldcp->ldc_rxlock); 3440 mutex_enter(&ldcp->ldc_cblock); 3441 } 3442 3443 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3444 } 3445 3446 /* 3447 * dummy pkt data handler function for vnet protocol version 1.0 3448 */ 3449 static void 3450 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3451 { 3452 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3453 } 3454 3455 /* 3456 * This function handles raw pkt data messages received over the channel. 3457 * Currently, only priority-eth-type frames are received through this mechanism. 3458 * In this case, the frame(data) is present within the message itself which 3459 * is copied into an mblk before switching it. 3460 */ 3461 static void 3462 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3463 { 3464 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3465 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3466 uint32_t size; 3467 mblk_t *mp; 3468 vio_mblk_t *vmp; 3469 vsw_t *vswp = ldcp->ldc_vswp; 3470 vgen_stats_t *statsp = &ldcp->ldc_stats; 3471 lane_t *lp = &ldcp->lane_out; 3472 3473 size = msglen - VIO_PKT_DATA_HDRSIZE; 3474 if (size < ETHERMIN || size > lp->mtu) { 3475 (void) atomic_inc_32(&statsp->rx_pri_fail); 3476 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3477 ldcp->ldc_id, size); 3478 return; 3479 } 3480 3481 vmp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3482 if (vmp == NULL) { 3483 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3484 if (mp == NULL) { 3485 (void) atomic_inc_32(&statsp->rx_pri_fail); 3486 DWARN(vswp, "%s(%lld) allocb failure, " 3487 "unable to process priority frame\n", __func__, 3488 ldcp->ldc_id); 3489 return; 3490 } 3491 } else { 3492 mp = vmp->mp; 3493 } 3494 3495 /* skip over the extra space for vlan tag */ 3496 mp->b_rptr += VLAN_TAGSZ; 3497 3498 /* copy the frame from the payload of raw data msg into the mblk */ 3499 bcopy(dpkt->data, mp->b_rptr, size); 3500 mp->b_wptr = mp->b_rptr + size; 3501 3502 if (vmp != NULL) { 3503 vmp->state = VIO_MBLK_HAS_DATA; 3504 } 3505 3506 /* update stats */ 3507 (void) atomic_inc_64(&statsp->rx_pri_packets); 3508 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3509 3510 /* 3511 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3512 */ 3513 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3514 3515 /* switch the frame to destination */ 3516 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3517 } 3518 3519 /* 3520 * Process an in-band descriptor message (most likely from 3521 * OBP). 3522 */ 3523 static void 3524 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3525 { 3526 vnet_ibnd_desc_t *ibnd_desc; 3527 dring_info_t *dp = NULL; 3528 vsw_private_desc_t *priv_addr = NULL; 3529 vsw_t *vswp = ldcp->ldc_vswp; 3530 mblk_t *mp = NULL; 3531 size_t nbytes = 0; 3532 size_t off = 0; 3533 uint64_t idx = 0; 3534 uint32_t num = 1, len, datalen = 0; 3535 uint64_t ncookies = 0; 3536 int i, rv; 3537 int j = 0; 3538 3539 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3540 3541 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3542 3543 switch (ibnd_desc->hdr.tag.vio_subtype) { 3544 case VIO_SUBTYPE_INFO: 3545 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3546 3547 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3548 return; 3549 3550 /* 3551 * Data is padded to align on a 8 byte boundary, 3552 * nbytes is actual data length, i.e. minus that 3553 * padding. 3554 */ 3555 datalen = ibnd_desc->nbytes; 3556 3557 D2(vswp, "%s(%lld): processing inband desc : " 3558 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3559 3560 ncookies = ibnd_desc->ncookies; 3561 3562 /* 3563 * allocb(9F) returns an aligned data block. We 3564 * need to ensure that we ask ldc for an aligned 3565 * number of bytes also. 3566 */ 3567 nbytes = datalen; 3568 if (nbytes & 0x7) { 3569 off = 8 - (nbytes & 0x7); 3570 nbytes += off; 3571 } 3572 3573 /* alloc extra space for VLAN_TAG */ 3574 mp = allocb(datalen + 8, BPRI_MED); 3575 if (mp == NULL) { 3576 DERR(vswp, "%s(%lld): allocb failed", 3577 __func__, ldcp->ldc_id); 3578 ldcp->ldc_stats.rx_allocb_fail++; 3579 return; 3580 } 3581 3582 /* skip over the extra space for VLAN_TAG */ 3583 mp->b_rptr += 8; 3584 3585 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3586 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3587 LDC_COPY_IN); 3588 3589 if (rv != 0) { 3590 DERR(vswp, "%s(%d): unable to copy in data from " 3591 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3592 freemsg(mp); 3593 ldcp->ldc_stats.ierrors++; 3594 return; 3595 } 3596 3597 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3598 __func__, ldcp->ldc_id, nbytes, ncookies); 3599 3600 /* point to the actual end of data */ 3601 mp->b_wptr = mp->b_rptr + datalen; 3602 ldcp->ldc_stats.ipackets++; 3603 ldcp->ldc_stats.rbytes += datalen; 3604 3605 /* 3606 * We ACK back every in-band descriptor message we process 3607 */ 3608 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3609 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3610 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3611 sizeof (vnet_ibnd_desc_t), B_TRUE); 3612 3613 /* 3614 * there is extra space alloc'd for VLAN_TAG 3615 */ 3616 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3617 3618 /* send the packet to be switched */ 3619 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3620 ldcp->ldc_port, NULL); 3621 3622 break; 3623 3624 case VIO_SUBTYPE_ACK: 3625 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3626 3627 /* Verify the ACK is valid */ 3628 idx = ibnd_desc->hdr.desc_handle; 3629 3630 if (idx >= vsw_num_descriptors) { 3631 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3632 "(idx %ld)", vswp->instance, idx); 3633 return; 3634 } 3635 3636 if ((dp = ldcp->lane_out.dringp) == NULL) { 3637 DERR(vswp, "%s: no dring found", __func__); 3638 return; 3639 } 3640 3641 len = dp->num_descriptors; 3642 /* 3643 * If the descriptor we are being ACK'ed for is not the 3644 * one we expected, then pkts were lost somwhere, either 3645 * when we tried to send a msg, or a previous ACK msg from 3646 * our peer. In either case we now reclaim the descriptors 3647 * in the range from the last ACK we received up to the 3648 * current ACK. 3649 */ 3650 if (idx != dp->last_ack_recv) { 3651 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3652 __func__, dp->last_ack_recv, idx); 3653 num = idx >= dp->last_ack_recv ? 3654 idx - dp->last_ack_recv + 1: 3655 (len - dp->last_ack_recv + 1) + idx; 3656 } 3657 3658 /* 3659 * When we sent the in-band message to our peer we 3660 * marked the copy in our private ring as READY. We now 3661 * check that the descriptor we are being ACK'ed for is in 3662 * fact READY, i.e. it is one we have shared with our peer. 3663 * 3664 * If its not we flag an error, but still reset the descr 3665 * back to FREE. 3666 */ 3667 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3668 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3669 mutex_enter(&priv_addr->dstate_lock); 3670 if (priv_addr->dstate != VIO_DESC_READY) { 3671 DERR(vswp, "%s: (%ld) desc at index %ld not " 3672 "READY (0x%lx)", __func__, 3673 ldcp->ldc_id, idx, priv_addr->dstate); 3674 DERR(vswp, "%s: bound %d: ncookies %ld : " 3675 "datalen %ld", __func__, 3676 priv_addr->bound, priv_addr->ncookies, 3677 priv_addr->datalen); 3678 } 3679 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3680 ldcp->ldc_id, idx); 3681 /* release resources associated with sent msg */ 3682 priv_addr->datalen = 0; 3683 priv_addr->dstate = VIO_DESC_FREE; 3684 mutex_exit(&priv_addr->dstate_lock); 3685 } 3686 /* update to next expected value */ 3687 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3688 3689 break; 3690 3691 case VIO_SUBTYPE_NACK: 3692 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3693 3694 /* 3695 * We should only get a NACK if our peer doesn't like 3696 * something about a message we have sent it. If this 3697 * happens we just release the resources associated with 3698 * the message. (We are relying on higher layers to decide 3699 * whether or not to resend. 3700 */ 3701 3702 /* limit check */ 3703 idx = ibnd_desc->hdr.desc_handle; 3704 3705 if (idx >= vsw_num_descriptors) { 3706 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3707 __func__, idx); 3708 return; 3709 } 3710 3711 if ((dp = ldcp->lane_out.dringp) == NULL) { 3712 DERR(vswp, "%s: no dring found", __func__); 3713 return; 3714 } 3715 3716 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3717 3718 /* move to correct location in ring */ 3719 priv_addr += idx; 3720 3721 /* release resources associated with sent msg */ 3722 mutex_enter(&priv_addr->dstate_lock); 3723 priv_addr->datalen = 0; 3724 priv_addr->dstate = VIO_DESC_FREE; 3725 mutex_exit(&priv_addr->dstate_lock); 3726 3727 break; 3728 3729 default: 3730 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3731 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3732 } 3733 3734 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3735 } 3736 3737 static void 3738 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3739 { 3740 _NOTE(ARGUNUSED(epkt)) 3741 3742 vsw_t *vswp = ldcp->ldc_vswp; 3743 uint16_t env = tagp->vio_subtype_env; 3744 3745 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3746 3747 /* 3748 * Error vio_subtypes have yet to be defined. So for 3749 * the moment we can't do anything. 3750 */ 3751 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3752 3753 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3754 } 3755 3756 /* transmit the packet over the given port */ 3757 int 3758 vsw_portsend(vsw_port_t *port, mblk_t *mp) 3759 { 3760 mblk_t *mpt; 3761 int count; 3762 vsw_ldc_t *ldcp = port->ldcp; 3763 int status = 0; 3764 3765 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3766 if (count != 0) { 3767 status = ldcp->tx(ldcp, mp, mpt, count); 3768 } 3769 return (status); 3770 } 3771 3772 /* 3773 * Break up frames into 2 seperate chains: normal and 3774 * priority, based on the frame type. The number of 3775 * priority frames is also counted and returned. 3776 * 3777 * Params: 3778 * vswp: pointer to the instance of vsw 3779 * np: head of packet chain to be broken 3780 * npt: tail of packet chain to be broken 3781 * 3782 * Returns: 3783 * np: head of normal data packets 3784 * npt: tail of normal data packets 3785 * hp: head of high priority packets 3786 * hpt: tail of high priority packets 3787 */ 3788 static uint32_t 3789 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 3790 mblk_t **hp, mblk_t **hpt) 3791 { 3792 mblk_t *tmp = NULL; 3793 mblk_t *smp = NULL; 3794 mblk_t *hmp = NULL; /* high prio pkts head */ 3795 mblk_t *hmpt = NULL; /* high prio pkts tail */ 3796 mblk_t *nmp = NULL; /* normal pkts head */ 3797 mblk_t *nmpt = NULL; /* normal pkts tail */ 3798 uint32_t count = 0; 3799 int i; 3800 struct ether_header *ehp; 3801 uint32_t num_types; 3802 uint16_t *types; 3803 3804 tmp = *np; 3805 while (tmp != NULL) { 3806 3807 smp = tmp; 3808 tmp = tmp->b_next; 3809 smp->b_next = NULL; 3810 smp->b_prev = NULL; 3811 3812 ehp = (struct ether_header *)smp->b_rptr; 3813 num_types = vswp->pri_num_types; 3814 types = vswp->pri_types; 3815 for (i = 0; i < num_types; i++) { 3816 if (ehp->ether_type == types[i]) { 3817 /* high priority frame */ 3818 3819 if (hmp != NULL) { 3820 hmpt->b_next = smp; 3821 hmpt = smp; 3822 } else { 3823 hmp = hmpt = smp; 3824 } 3825 count++; 3826 break; 3827 } 3828 } 3829 if (i == num_types) { 3830 /* normal data frame */ 3831 3832 if (nmp != NULL) { 3833 nmpt->b_next = smp; 3834 nmpt = smp; 3835 } else { 3836 nmp = nmpt = smp; 3837 } 3838 } 3839 } 3840 3841 *hp = hmp; 3842 *hpt = hmpt; 3843 *np = nmp; 3844 *npt = nmpt; 3845 3846 return (count); 3847 } 3848 3849 /* 3850 * Wrapper function to transmit normal and/or priority frames over the channel. 3851 */ 3852 static int 3853 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3854 { 3855 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3856 mblk_t *tmp; 3857 mblk_t *smp; 3858 mblk_t *hmp; /* high prio pkts head */ 3859 mblk_t *hmpt; /* high prio pkts tail */ 3860 mblk_t *nmp; /* normal pkts head */ 3861 mblk_t *nmpt; /* normal pkts tail */ 3862 uint32_t n = 0; 3863 vsw_t *vswp = ldcp->ldc_vswp; 3864 3865 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 3866 ASSERT(count != 0); 3867 3868 nmp = mp; 3869 nmpt = mpt; 3870 3871 /* gather any priority frames from the chain of packets */ 3872 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 3873 3874 /* transmit priority frames */ 3875 tmp = hmp; 3876 while (tmp != NULL) { 3877 smp = tmp; 3878 tmp = tmp->b_next; 3879 smp->b_next = NULL; 3880 vsw_ldcsend_pkt(ldcp, smp); 3881 } 3882 3883 count -= n; 3884 3885 if (count == 0) { 3886 /* no normal data frames to process */ 3887 return (0); 3888 } 3889 3890 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 3891 } 3892 3893 /* 3894 * Wrapper function to transmit normal frames over the channel. 3895 */ 3896 static int 3897 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3898 { 3899 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3900 mblk_t *tmp = NULL; 3901 3902 ASSERT(count != 0); 3903 /* 3904 * If the TX thread is enabled, then queue the 3905 * ordinary frames and signal the tx thread. 3906 */ 3907 if (ldcp->tx_thread != NULL) { 3908 3909 mutex_enter(&ldcp->tx_thr_lock); 3910 3911 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 3912 /* 3913 * If we reached queue limit, 3914 * do not queue new packets, 3915 * drop them. 3916 */ 3917 ldcp->ldc_stats.tx_qfull += count; 3918 mutex_exit(&ldcp->tx_thr_lock); 3919 freemsgchain(mp); 3920 goto exit; 3921 } 3922 if (ldcp->tx_mhead == NULL) { 3923 ldcp->tx_mhead = mp; 3924 ldcp->tx_mtail = mpt; 3925 cv_signal(&ldcp->tx_thr_cv); 3926 } else { 3927 ldcp->tx_mtail->b_next = mp; 3928 ldcp->tx_mtail = mpt; 3929 } 3930 ldcp->tx_cnt += count; 3931 mutex_exit(&ldcp->tx_thr_lock); 3932 } else { 3933 while (mp != NULL) { 3934 tmp = mp->b_next; 3935 mp->b_next = mp->b_prev = NULL; 3936 (void) vsw_ldcsend(ldcp, mp, 1); 3937 mp = tmp; 3938 } 3939 } 3940 3941 exit: 3942 return (0); 3943 } 3944 3945 /* 3946 * This function transmits the frame in the payload of a raw data 3947 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 3948 * send special frames with high priorities, without going through 3949 * the normal data path which uses descriptor ring mechanism. 3950 */ 3951 static void 3952 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 3953 { 3954 vio_raw_data_msg_t *pkt; 3955 mblk_t *bp; 3956 mblk_t *nmp = NULL; 3957 vio_mblk_t *vmp; 3958 caddr_t dst; 3959 uint32_t mblksz; 3960 uint32_t size; 3961 uint32_t nbytes; 3962 int rv; 3963 vsw_t *vswp = ldcp->ldc_vswp; 3964 vgen_stats_t *statsp = &ldcp->ldc_stats; 3965 3966 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3967 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3968 (void) atomic_inc_32(&statsp->tx_pri_fail); 3969 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 3970 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 3971 ldcp->lane_out.lstate); 3972 goto send_pkt_exit; 3973 } 3974 3975 size = msgsize(mp); 3976 3977 /* frame size bigger than available payload len of raw data msg ? */ 3978 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 3979 (void) atomic_inc_32(&statsp->tx_pri_fail); 3980 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3981 ldcp->ldc_id, size); 3982 goto send_pkt_exit; 3983 } 3984 3985 if (size < ETHERMIN) 3986 size = ETHERMIN; 3987 3988 /* alloc space for a raw data message */ 3989 vmp = vio_allocb(vswp->pri_tx_vmp); 3990 if (vmp == NULL) { 3991 (void) atomic_inc_32(&statsp->tx_pri_fail); 3992 DWARN(vswp, "vio_allocb failed\n"); 3993 goto send_pkt_exit; 3994 } else { 3995 nmp = vmp->mp; 3996 } 3997 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 3998 3999 /* copy frame into the payload of raw data message */ 4000 dst = (caddr_t)pkt->data; 4001 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4002 mblksz = MBLKL(bp); 4003 bcopy(bp->b_rptr, dst, mblksz); 4004 dst += mblksz; 4005 } 4006 4007 vmp->state = VIO_MBLK_HAS_DATA; 4008 4009 /* setup the raw data msg */ 4010 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4011 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4012 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4013 pkt->tag.vio_sid = ldcp->local_session; 4014 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4015 4016 /* send the msg over ldc */ 4017 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4018 if (rv != 0) { 4019 (void) atomic_inc_32(&statsp->tx_pri_fail); 4020 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4021 ldcp->ldc_id); 4022 goto send_pkt_exit; 4023 } 4024 4025 /* update stats */ 4026 (void) atomic_inc_64(&statsp->tx_pri_packets); 4027 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4028 4029 send_pkt_exit: 4030 if (nmp != NULL) 4031 freemsg(nmp); 4032 freemsg(mp); 4033 } 4034 4035 /* 4036 * Transmit the packet over the given LDC channel. 4037 * 4038 * The 'retries' argument indicates how many times a packet 4039 * is retried before it is dropped. Note, the retry is done 4040 * only for a resource related failure, for all other failures 4041 * the packet is dropped immediately. 4042 */ 4043 static int 4044 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4045 { 4046 int i; 4047 int rc; 4048 int status = 0; 4049 vsw_port_t *port = ldcp->ldc_port; 4050 dring_info_t *dp = NULL; 4051 lane_t *lp = &ldcp->lane_out; 4052 4053 for (i = 0; i < retries; ) { 4054 /* 4055 * Send the message out using the appropriate 4056 * transmit function which will free mblock when it 4057 * is finished with it. 4058 */ 4059 mutex_enter(&port->tx_lock); 4060 if (port->transmit != NULL) { 4061 status = (*port->transmit)(ldcp, mp); 4062 } 4063 if (status == LDC_TX_SUCCESS) { 4064 mutex_exit(&port->tx_lock); 4065 break; 4066 } 4067 i++; /* increment the counter here */ 4068 4069 /* If its the last retry, then update the oerror */ 4070 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4071 ldcp->ldc_stats.oerrors++; 4072 } 4073 mutex_exit(&port->tx_lock); 4074 4075 if (status != LDC_TX_NORESOURCES) { 4076 /* 4077 * No retrying required for errors un-related 4078 * to resources. 4079 */ 4080 break; 4081 } 4082 if (((dp = ldcp->lane_out.dringp) != NULL) && 4083 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4084 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4085 ((VSW_VER_LT(ldcp, 1, 2) && 4086 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4087 4088 /* Need to reclaim in TxDring mode. */ 4089 if (lp->dring_mode == VIO_TX_DRING) { 4090 rc = vsw_reclaim_dring(dp, dp->end_idx); 4091 } 4092 4093 } else { 4094 /* 4095 * If there is no dring or the xfer_mode is 4096 * set to DESC_MODE(ie., OBP), then simply break here. 4097 */ 4098 break; 4099 } 4100 4101 /* 4102 * Delay only if none were reclaimed 4103 * and its not the last retry. 4104 */ 4105 if ((rc == 0) && (i < retries)) { 4106 delay(drv_usectohz(vsw_ldc_tx_delay)); 4107 } 4108 } 4109 freemsg(mp); 4110 return (status); 4111 } 4112 4113 /* 4114 * Send an in-band descriptor message over ldc. 4115 */ 4116 static int 4117 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4118 { 4119 vsw_t *vswp = ldcp->ldc_vswp; 4120 vnet_ibnd_desc_t ibnd_msg; 4121 vsw_private_desc_t *priv_desc = NULL; 4122 dring_info_t *dp = NULL; 4123 size_t n, size = 0; 4124 caddr_t bufp; 4125 mblk_t *bp; 4126 int idx, i; 4127 int status = LDC_TX_SUCCESS; 4128 static int warn_msg = 1; 4129 lane_t *lp = &ldcp->lane_out; 4130 4131 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4132 4133 ASSERT(mp != NULL); 4134 4135 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4136 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4137 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4138 __func__, ldcp->ldc_id, ldcp->ldc_status, 4139 ldcp->lane_out.lstate); 4140 ldcp->ldc_stats.oerrors++; 4141 return (LDC_TX_FAILURE); 4142 } 4143 4144 /* 4145 * The dring here is as an internal buffer, 4146 * rather than a transfer channel. 4147 */ 4148 if ((dp = ldcp->lane_out.dringp) == NULL) { 4149 DERR(vswp, "%s(%lld): no dring for outbound lane", 4150 __func__, ldcp->ldc_id); 4151 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4152 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4153 ldcp->ldc_stats.oerrors++; 4154 return (LDC_TX_FAILURE); 4155 } 4156 4157 size = msgsize(mp); 4158 if (size > (size_t)lp->mtu) { 4159 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4160 ldcp->ldc_id, size); 4161 ldcp->ldc_stats.oerrors++; 4162 return (LDC_TX_FAILURE); 4163 } 4164 4165 /* 4166 * Find a free descriptor in our buffer ring 4167 */ 4168 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4169 if (warn_msg) { 4170 DERR(vswp, "%s(%lld): no descriptor available for ring " 4171 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4172 warn_msg = 0; 4173 } 4174 4175 /* nothing more we can do */ 4176 status = LDC_TX_NORESOURCES; 4177 goto vsw_descrsend_free_exit; 4178 } else { 4179 D2(vswp, "%s(%lld): free private descriptor found at pos " 4180 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4181 warn_msg = 1; 4182 } 4183 4184 /* copy data into the descriptor */ 4185 bufp = priv_desc->datap; 4186 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4187 n = MBLKL(bp); 4188 bcopy(bp->b_rptr, bufp, n); 4189 bufp += n; 4190 } 4191 4192 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4193 4194 /* create and send the in-band descp msg */ 4195 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4196 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4197 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4198 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4199 4200 /* 4201 * Copy the mem cookies describing the data from the 4202 * private region of the descriptor ring into the inband 4203 * descriptor. 4204 */ 4205 for (i = 0; i < priv_desc->ncookies; i++) { 4206 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4207 sizeof (ldc_mem_cookie_t)); 4208 } 4209 4210 ibnd_msg.hdr.desc_handle = idx; 4211 ibnd_msg.ncookies = priv_desc->ncookies; 4212 ibnd_msg.nbytes = size; 4213 4214 ldcp->ldc_stats.opackets++; 4215 ldcp->ldc_stats.obytes += size; 4216 4217 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4218 sizeof (vnet_ibnd_desc_t), B_TRUE); 4219 4220 vsw_descrsend_free_exit: 4221 4222 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4223 return (status); 4224 } 4225 4226 static void 4227 vsw_send_ver(void *arg) 4228 { 4229 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4230 vsw_t *vswp = ldcp->ldc_vswp; 4231 lane_t *lp = &ldcp->lane_out; 4232 vio_ver_msg_t ver_msg; 4233 4234 D1(vswp, "%s enter", __func__); 4235 4236 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4237 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4238 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4239 ver_msg.tag.vio_sid = ldcp->local_session; 4240 4241 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4242 ver_msg.ver_major = vsw_versions[0].ver_major; 4243 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4244 } else { 4245 /* use the major,minor that we've ack'd */ 4246 lane_t *lpi = &ldcp->lane_in; 4247 ver_msg.ver_major = lpi->ver_major; 4248 ver_msg.ver_minor = lpi->ver_minor; 4249 } 4250 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4251 4252 lp->lstate |= VSW_VER_INFO_SENT; 4253 lp->ver_major = ver_msg.ver_major; 4254 lp->ver_minor = ver_msg.ver_minor; 4255 4256 DUMP_TAG(ver_msg.tag); 4257 4258 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4259 4260 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4261 } 4262 4263 static void 4264 vsw_send_attr(vsw_ldc_t *ldcp) 4265 { 4266 vsw_t *vswp = ldcp->ldc_vswp; 4267 lane_t *lp = &ldcp->lane_out; 4268 vnet_attr_msg_t attr_msg; 4269 4270 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4271 4272 /* 4273 * Subtype is set to INFO by default 4274 */ 4275 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4276 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4277 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4278 attr_msg.tag.vio_sid = ldcp->local_session; 4279 4280 /* payload copied from default settings for lane */ 4281 attr_msg.mtu = lp->mtu; 4282 attr_msg.addr_type = lp->addr_type; 4283 attr_msg.xfer_mode = lp->xfer_mode; 4284 attr_msg.ack_freq = lp->xfer_mode; 4285 attr_msg.options = lp->dring_mode; 4286 4287 READ_ENTER(&vswp->if_lockrw); 4288 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4289 RW_EXIT(&vswp->if_lockrw); 4290 4291 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4292 4293 DUMP_TAG(attr_msg.tag); 4294 4295 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4296 4297 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4298 } 4299 4300 static void 4301 vsw_send_dring_info(vsw_ldc_t *ldcp) 4302 { 4303 int msgsize; 4304 void *msg; 4305 vsw_t *vswp = ldcp->ldc_vswp; 4306 vsw_port_t *port = ldcp->ldc_port; 4307 lane_t *lp = &ldcp->lane_out; 4308 vgen_stats_t *statsp = &ldcp->ldc_stats; 4309 4310 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4311 4312 /* dring mode has been negotiated in attr phase; save in stats */ 4313 statsp->dring_mode = lp->dring_mode; 4314 4315 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4316 /* 4317 * Change the transmit routine for RxDringData mode. 4318 */ 4319 port->transmit = vsw_dringsend_shm; 4320 msg = (void *) vsw_create_rx_dring_info(ldcp); 4321 if (msg == NULL) { 4322 return; 4323 } 4324 msgsize = 4325 VNET_DRING_REG_EXT_MSG_SIZE(lp->dringp->data_ncookies); 4326 ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 4327 vsw_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 4328 ldcp->rx_dringdata = vsw_process_dringdata_shm; 4329 } else { 4330 msg = (void *) vsw_create_tx_dring_info(ldcp); 4331 if (msg == NULL) { 4332 return; 4333 } 4334 msgsize = sizeof (vio_dring_reg_msg_t); 4335 ldcp->msg_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 4336 vsw_ldc_msg_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 4337 ldcp->rx_dringdata = vsw_process_dringdata; 4338 } 4339 4340 lp->lstate |= VSW_DRING_INFO_SENT; 4341 DUMP_TAG_PTR((vio_msg_tag_t *)msg); 4342 (void) vsw_send_msg(ldcp, msg, msgsize, B_TRUE); 4343 kmem_free(msg, msgsize); 4344 4345 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4346 } 4347 4348 static void 4349 vsw_send_rdx(vsw_ldc_t *ldcp) 4350 { 4351 vsw_t *vswp = ldcp->ldc_vswp; 4352 vio_rdx_msg_t rdx_msg; 4353 4354 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4355 4356 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4357 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4358 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4359 rdx_msg.tag.vio_sid = ldcp->local_session; 4360 4361 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4362 4363 DUMP_TAG(rdx_msg.tag); 4364 4365 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4366 4367 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4368 } 4369 4370 /* 4371 * Remove the specified address from the list of address maintained 4372 * in this port node. 4373 */ 4374 mcst_addr_t * 4375 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4376 { 4377 vsw_t *vswp = NULL; 4378 vsw_port_t *port = NULL; 4379 mcst_addr_t *prev_p = NULL; 4380 mcst_addr_t *curr_p = NULL; 4381 4382 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4383 __func__, devtype, addr); 4384 4385 if (devtype == VSW_VNETPORT) { 4386 port = (vsw_port_t *)arg; 4387 mutex_enter(&port->mca_lock); 4388 prev_p = curr_p = port->mcap; 4389 } else { 4390 vswp = (vsw_t *)arg; 4391 mutex_enter(&vswp->mca_lock); 4392 prev_p = curr_p = vswp->mcap; 4393 } 4394 4395 while (curr_p != NULL) { 4396 if (curr_p->addr == addr) { 4397 D2(NULL, "%s: address found", __func__); 4398 /* match found */ 4399 if (prev_p == curr_p) { 4400 /* list head */ 4401 if (devtype == VSW_VNETPORT) 4402 port->mcap = curr_p->nextp; 4403 else 4404 vswp->mcap = curr_p->nextp; 4405 } else { 4406 prev_p->nextp = curr_p->nextp; 4407 } 4408 break; 4409 } else { 4410 prev_p = curr_p; 4411 curr_p = curr_p->nextp; 4412 } 4413 } 4414 4415 if (devtype == VSW_VNETPORT) 4416 mutex_exit(&port->mca_lock); 4417 else 4418 mutex_exit(&vswp->mca_lock); 4419 4420 D1(NULL, "%s: exit", __func__); 4421 4422 return (curr_p); 4423 } 4424 4425 /* 4426 * Create a ring consisting of just a private portion and link 4427 * it into the list of rings for the outbound lane. 4428 * 4429 * These type of rings are used primarily for temporary data 4430 * storage (i.e. as data buffers). 4431 */ 4432 void 4433 vsw_create_privring(vsw_ldc_t *ldcp) 4434 { 4435 dring_info_t *dp; 4436 vsw_t *vswp = ldcp->ldc_vswp; 4437 4438 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4439 4440 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4441 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4442 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4443 ldcp->lane_out.dringp = dp; 4444 4445 /* no public section */ 4446 dp->pub_addr = NULL; 4447 dp->priv_addr = kmem_zalloc( 4448 (sizeof (vsw_private_desc_t) * vsw_num_descriptors), KM_SLEEP); 4449 dp->num_descriptors = vsw_num_descriptors; 4450 4451 if (vsw_setup_tx_dring(ldcp, dp)) { 4452 DERR(vswp, "%s: setup of ring failed", __func__); 4453 vsw_destroy_tx_dring(ldcp); 4454 return; 4455 } 4456 4457 /* haven't used any descriptors yet */ 4458 dp->end_idx = 0; 4459 dp->restart_reqd = B_TRUE; 4460 4461 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4462 } 4463 4464 /* 4465 * Set the default lane attributes. These are copied into 4466 * the attr msg we send to our peer. If they are not acceptable 4467 * then (currently) the handshake ends. 4468 */ 4469 static void 4470 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 4471 { 4472 bzero(lp, sizeof (lane_t)); 4473 4474 READ_ENTER(&vswp->if_lockrw); 4475 ether_copy(&(vswp->if_addr), &(lp->addr)); 4476 RW_EXIT(&vswp->if_lockrw); 4477 4478 lp->mtu = vswp->max_frame_size; 4479 lp->addr_type = ADDR_TYPE_MAC; 4480 lp->xfer_mode = VIO_DRING_MODE_V1_0; 4481 lp->ack_freq = 0; /* for shared mode */ 4482 lp->seq_num = VNET_ISS; 4483 } 4484 4485 /* 4486 * Map the descriptor ring exported by the peer. 4487 */ 4488 static dring_info_t * 4489 vsw_map_dring(vsw_ldc_t *ldcp, void *pkt) 4490 { 4491 dring_info_t *dp = NULL; 4492 lane_t *lp = &ldcp->lane_out; 4493 4494 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4495 /* 4496 * In RxDringData mode, dring that we map in 4497 * becomes our transmit descriptor ring. 4498 */ 4499 dp = vsw_map_tx_dring(ldcp, pkt); 4500 } else { 4501 /* 4502 * In TxDring mode, dring that we map in 4503 * becomes our receive descriptor ring. 4504 */ 4505 dp = vsw_map_rx_dring(ldcp, pkt); 4506 } 4507 return (dp); 4508 } 4509 4510 /* 4511 * Common dring mapping function used in both TxDring and RxDringData modes. 4512 */ 4513 dring_info_t * 4514 vsw_map_dring_cmn(vsw_ldc_t *ldcp, vio_dring_reg_msg_t *dring_pkt) 4515 { 4516 int rv; 4517 dring_info_t *dp; 4518 ldc_mem_info_t minfo; 4519 vsw_t *vswp = ldcp->ldc_vswp; 4520 4521 /* 4522 * If the dring params are unacceptable then we NACK back. 4523 */ 4524 if ((dring_pkt->num_descriptors == 0) || 4525 (dring_pkt->descriptor_size == 0) || 4526 (dring_pkt->ncookies != 1)) { 4527 DERR(vswp, "%s (%lld): invalid dring info", 4528 __func__, ldcp->ldc_id); 4529 return (NULL); 4530 } 4531 4532 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4533 4534 dp->num_descriptors = dring_pkt->num_descriptors; 4535 dp->descriptor_size = dring_pkt->descriptor_size; 4536 dp->options = dring_pkt->options; 4537 dp->dring_ncookies = dring_pkt->ncookies; 4538 4539 /* 4540 * Note: should only get one cookie. Enforced in 4541 * the ldc layer. 4542 */ 4543 bcopy(&dring_pkt->cookie[0], &dp->dring_cookie[0], 4544 sizeof (ldc_mem_cookie_t)); 4545 4546 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dp->dring_cookie[0], 4547 dp->dring_ncookies, dp->num_descriptors, dp->descriptor_size, 4548 LDC_DIRECT_MAP, &(dp->dring_handle)); 4549 if (rv != 0) { 4550 goto fail; 4551 } 4552 4553 rv = ldc_mem_dring_info(dp->dring_handle, &minfo); 4554 if (rv != 0) { 4555 goto fail; 4556 } 4557 /* store the address of the ring */ 4558 dp->pub_addr = minfo.vaddr; 4559 4560 /* cache the dring mtype */ 4561 dp->dring_mtype = minfo.mtype; 4562 4563 /* no private section as we are importing */ 4564 dp->priv_addr = NULL; 4565 4566 /* 4567 * Using simple mono increasing int for ident at the moment. 4568 */ 4569 dp->ident = ldcp->next_ident; 4570 ldcp->next_ident++; 4571 4572 /* 4573 * Acknowledge it; we send back a unique dring identifier that 4574 * the sending side will use in future to refer to this 4575 * descriptor ring. 4576 */ 4577 dring_pkt->dring_ident = dp->ident; 4578 4579 return (dp); 4580 fail: 4581 if (dp->dring_handle != NULL) { 4582 (void) ldc_mem_dring_unmap(dp->dring_handle); 4583 } 4584 kmem_free(dp, sizeof (*dp)); 4585 return (NULL); 4586 } 4587 4588 /* 4589 * Unmap the descriptor ring exported by the peer. 4590 */ 4591 static void 4592 vsw_unmap_dring(vsw_ldc_t *ldcp) 4593 { 4594 lane_t *lane_out = &ldcp->lane_out; 4595 4596 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 4597 vsw_unmap_tx_dring(ldcp); 4598 } else { 4599 vsw_unmap_rx_dring(ldcp); 4600 } 4601 } 4602 4603 /* 4604 * Map the shared memory data buffer area exported by the peer. 4605 * Used in RxDringData mode only. 4606 */ 4607 static int 4608 vsw_map_data(vsw_ldc_t *ldcp, dring_info_t *dp, void *pkt) 4609 { 4610 int rv; 4611 vio_dring_reg_ext_msg_t *emsg; 4612 vio_dring_reg_msg_t *msg = pkt; 4613 uint8_t *buf = (uint8_t *)msg->cookie; 4614 vsw_t *vswp = ldcp->ldc_vswp; 4615 4616 /* skip over dring cookies */ 4617 ASSERT(msg->ncookies == 1); 4618 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); 4619 4620 emsg = (vio_dring_reg_ext_msg_t *)buf; 4621 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) { 4622 return (1); 4623 } 4624 4625 /* save # of data area cookies */ 4626 dp->data_ncookies = emsg->data_ncookies; 4627 4628 /* save data area size */ 4629 dp->data_sz = emsg->data_area_size; 4630 4631 /* allocate ldc mem handle for data area */ 4632 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &dp->data_handle); 4633 if (rv != 0) { 4634 cmn_err(CE_WARN, "ldc_mem_alloc_handle failed\n"); 4635 DWARN(vswp, "%s (%lld) ldc_mem_alloc_handle() failed: %d\n", 4636 __func__, ldcp->ldc_id, rv); 4637 return (1); 4638 } 4639 4640 /* map the data area */ 4641 rv = ldc_mem_map(dp->data_handle, emsg->data_cookie, 4642 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_R, 4643 (caddr_t *)&dp->data_addr, NULL); 4644 if (rv != 0) { 4645 cmn_err(CE_WARN, "ldc_mem_map failed\n"); 4646 DWARN(vswp, "%s (%lld) ldc_mem_map() failed: %d\n", 4647 __func__, ldcp->ldc_id, rv); 4648 return (1); 4649 } 4650 4651 /* allocate memory for data area cookies */ 4652 dp->data_cookie = kmem_zalloc(emsg->data_ncookies * 4653 sizeof (ldc_mem_cookie_t), KM_SLEEP); 4654 4655 /* save data area cookies */ 4656 bcopy(emsg->data_cookie, dp->data_cookie, 4657 emsg->data_ncookies * sizeof (ldc_mem_cookie_t)); 4658 4659 return (0); 4660 } 4661 4662 /* 4663 * Reset and free all the resources associated with the channel. 4664 */ 4665 static void 4666 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 4667 { 4668 lane_t *lp; 4669 4670 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 4671 4672 if (dir == INBOUND) { 4673 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 4674 " of channel %lld", __func__, ldcp->ldc_id); 4675 lp = &ldcp->lane_in; 4676 } else { 4677 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 4678 " of channel %lld", __func__, ldcp->ldc_id); 4679 lp = &ldcp->lane_out; 4680 } 4681 4682 lp->lstate = VSW_LANE_INACTIV; 4683 lp->seq_num = VNET_ISS; 4684 4685 if (dir == INBOUND) { 4686 /* Unmap the remote dring which is imported from the peer */ 4687 vsw_unmap_dring(ldcp); 4688 } else { 4689 /* Destroy the local dring which is exported to the peer */ 4690 vsw_destroy_dring(ldcp); 4691 } 4692 4693 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 4694 } 4695 4696 /* 4697 * Destroy the descriptor ring. 4698 */ 4699 static void 4700 vsw_destroy_dring(vsw_ldc_t *ldcp) 4701 { 4702 lane_t *lp = &ldcp->lane_out; 4703 4704 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4705 vsw_destroy_rx_dring(ldcp); 4706 } else { 4707 vsw_destroy_tx_dring(ldcp); 4708 } 4709 } 4710 4711 /* 4712 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 4713 * This thread is woken up by the vsw_portsend to transmit 4714 * packets. 4715 */ 4716 static void 4717 vsw_ldc_tx_worker(void *arg) 4718 { 4719 callb_cpr_t cprinfo; 4720 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4721 vsw_t *vswp = ldcp->ldc_vswp; 4722 mblk_t *mp; 4723 mblk_t *tmp; 4724 4725 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 4726 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 4727 "vnet_tx_thread"); 4728 mutex_enter(&ldcp->tx_thr_lock); 4729 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 4730 4731 CALLB_CPR_SAFE_BEGIN(&cprinfo); 4732 /* 4733 * Wait until the data is received or a stop 4734 * request is received. 4735 */ 4736 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 4737 (ldcp->tx_mhead == NULL)) { 4738 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 4739 } 4740 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 4741 4742 /* 4743 * First process the stop request. 4744 */ 4745 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 4746 D2(vswp, "%s(%lld):tx thread stopped\n", 4747 __func__, ldcp->ldc_id); 4748 break; 4749 } 4750 mp = ldcp->tx_mhead; 4751 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 4752 ldcp->tx_cnt = 0; 4753 mutex_exit(&ldcp->tx_thr_lock); 4754 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 4755 __func__, ldcp->ldc_id); 4756 while (mp != NULL) { 4757 tmp = mp->b_next; 4758 mp->b_next = mp->b_prev = NULL; 4759 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 4760 mp = tmp; 4761 } 4762 mutex_enter(&ldcp->tx_thr_lock); 4763 } 4764 4765 /* 4766 * Update the run status and wakeup the thread that 4767 * has sent the stop request. 4768 */ 4769 ldcp->tx_thr_flags &= ~VSW_WTHR_STOP; 4770 ldcp->tx_thread = NULL; 4771 CALLB_CPR_EXIT(&cprinfo); 4772 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 4773 thread_exit(); 4774 } 4775 4776 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 4777 static void 4778 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 4779 { 4780 kt_did_t tid = 0; 4781 vsw_t *vswp = ldcp->ldc_vswp; 4782 4783 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 4784 /* 4785 * Send a stop request by setting the stop flag and 4786 * wait until the receive thread stops. 4787 */ 4788 mutex_enter(&ldcp->tx_thr_lock); 4789 if (ldcp->tx_thread != NULL) { 4790 tid = ldcp->tx_thread->t_did; 4791 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 4792 cv_signal(&ldcp->tx_thr_cv); 4793 } 4794 mutex_exit(&ldcp->tx_thr_lock); 4795 4796 if (tid != 0) { 4797 thread_join(tid); 4798 } 4799 4800 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 4801 } 4802 4803 /* 4804 * Debugging routines 4805 */ 4806 static void 4807 display_state(void) 4808 { 4809 vsw_t *vswp; 4810 vsw_port_list_t *plist; 4811 vsw_port_t *port; 4812 vsw_ldc_t *ldcp; 4813 extern vsw_t *vsw_head; 4814 4815 cmn_err(CE_NOTE, "***** system state *****"); 4816 4817 for (vswp = vsw_head; vswp; vswp = vswp->next) { 4818 plist = &vswp->plist; 4819 READ_ENTER(&plist->lockrw); 4820 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 4821 vswp->instance, plist->num_ports); 4822 4823 for (port = plist->head; port != NULL; port = port->p_next) { 4824 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 4825 port->p_instance, port->num_ldcs); 4826 ldcp = port->ldcp; 4827 cmn_err(CE_CONT, "chan %lu : dev %d : " 4828 "status %d : phase %u\n", 4829 ldcp->ldc_id, ldcp->dev_class, 4830 ldcp->ldc_status, ldcp->hphase); 4831 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 4832 "psession %lu\n", ldcp->ldc_id, 4833 ldcp->local_session, ldcp->peer_session); 4834 4835 cmn_err(CE_CONT, "Inbound lane:\n"); 4836 display_lane(&ldcp->lane_in); 4837 cmn_err(CE_CONT, "Outbound lane:\n"); 4838 display_lane(&ldcp->lane_out); 4839 } 4840 RW_EXIT(&plist->lockrw); 4841 } 4842 cmn_err(CE_NOTE, "***** system state *****"); 4843 } 4844 4845 static void 4846 display_lane(lane_t *lp) 4847 { 4848 dring_info_t *drp = lp->dringp; 4849 4850 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 4851 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 4852 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 4853 lp->addr_type, lp->addr, lp->xfer_mode); 4854 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 4855 4856 cmn_err(CE_CONT, "Dring info:\n"); 4857 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 4858 drp->num_descriptors, drp->descriptor_size); 4859 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->dring_handle); 4860 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 4861 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 4862 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 4863 drp->ident, drp->end_idx); 4864 display_ring(drp); 4865 } 4866 4867 static void 4868 display_ring(dring_info_t *dringp) 4869 { 4870 uint64_t i; 4871 uint64_t priv_count = 0; 4872 uint64_t pub_count = 0; 4873 vnet_public_desc_t *pub_addr = NULL; 4874 vsw_private_desc_t *priv_addr = NULL; 4875 4876 for (i = 0; i < vsw_num_descriptors; i++) { 4877 if (dringp->pub_addr != NULL) { 4878 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 4879 4880 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 4881 pub_count++; 4882 } 4883 4884 if (dringp->priv_addr != NULL) { 4885 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 4886 4887 if (priv_addr->dstate == VIO_DESC_FREE) 4888 priv_count++; 4889 } 4890 } 4891 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 4892 i, priv_count, pub_count); 4893 } 4894 4895 static void 4896 dump_flags(uint64_t state) 4897 { 4898 int i; 4899 4900 typedef struct flag_name { 4901 int flag_val; 4902 char *flag_name; 4903 } flag_name_t; 4904 4905 flag_name_t flags[] = { 4906 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 4907 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 4908 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 4909 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 4910 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 4911 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 4912 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 4913 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 4914 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 4915 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 4916 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 4917 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 4918 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 4919 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 4920 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 4921 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 4922 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 4923 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 4924 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 4925 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 4926 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 4927 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 4928 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 4929 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 4930 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 4931 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 4932 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 4933 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 4934 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 4935 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 4936 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 4937 4938 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 4939 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 4940 if (state & flags[i].flag_val) 4941 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 4942 } 4943 } 4944