1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/debug.h> 29 #include <sys/time.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/user.h> 33 #include <sys/stropts.h> 34 #include <sys/stream.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #include <sys/cmn_err.h> 38 #include <sys/cpu.h> 39 #include <sys/kmem.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/ksynch.h> 44 #include <sys/stat.h> 45 #include <sys/kstat.h> 46 #include <sys/vtrace.h> 47 #include <sys/strsun.h> 48 #include <sys/dlpi.h> 49 #include <sys/ethernet.h> 50 #include <net/if.h> 51 #include <sys/varargs.h> 52 #include <sys/machsystm.h> 53 #include <sys/modctl.h> 54 #include <sys/modhash.h> 55 #include <sys/mac.h> 56 #include <sys/mac_ether.h> 57 #include <sys/taskq.h> 58 #include <sys/note.h> 59 #include <sys/mach_descrip.h> 60 #include <sys/mdeg.h> 61 #include <sys/ldc.h> 62 #include <sys/vsw_fdb.h> 63 #include <sys/vsw.h> 64 #include <sys/vio_mailbox.h> 65 #include <sys/vnet_mailbox.h> 66 #include <sys/vnet_common.h> 67 #include <sys/vio_util.h> 68 #include <sys/sdt.h> 69 #include <sys/atomic.h> 70 #include <sys/callb.h> 71 #include <sys/vlan.h> 72 73 /* Port add/deletion/etc routines */ 74 static void vsw_port_delete(vsw_port_t *port); 75 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 76 static void vsw_ldc_detach(vsw_ldc_t *ldcp); 77 static int vsw_ldc_init(vsw_ldc_t *ldcp); 78 static void vsw_ldc_uninit(vsw_ldc_t *ldcp); 79 static void vsw_ldc_drain(vsw_ldc_t *ldcp); 80 static void vsw_drain_port_taskq(vsw_port_t *port); 81 static void vsw_marker_task(void *); 82 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 83 void vsw_detach_ports(vsw_t *vswp); 84 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 85 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 86 int vsw_port_detach(vsw_t *vswp, int p_instance); 87 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 88 int vsw_port_attach(vsw_port_t *portp); 89 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 90 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 91 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 92 void vsw_reset_ports(vsw_t *vswp); 93 void vsw_port_reset(vsw_port_t *portp); 94 void vsw_physlink_update_ports(vsw_t *vswp); 95 static void vsw_port_physlink_update(vsw_port_t *portp); 96 97 /* Interrupt routines */ 98 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 99 100 /* Handshake routines */ 101 static void vsw_ldc_reinit(vsw_ldc_t *); 102 static void vsw_conn_task(void *); 103 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 104 static void vsw_next_milestone(vsw_ldc_t *); 105 static int vsw_supported_version(vio_ver_msg_t *); 106 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 107 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 108 void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 109 110 /* Data processing routines */ 111 void vsw_process_pkt(void *); 112 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *, int); 113 static void vsw_process_ctrl_pkt(void *); 114 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 115 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 116 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_physlink_msg(vsw_ldc_t *, void *); 121 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 122 uint32_t); 123 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 124 static void vsw_process_pkt_data(void *, void *, uint32_t); 125 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 127 static void vsw_process_evt_read(vsw_ldc_t *ldcp); 128 static void vsw_ldc_rcv(vsw_ldc_t *ldcp); 129 130 /* Switching/data transmit routines */ 131 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 132 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 133 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 134 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 135 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 137 /* Packet creation routines */ 138 static void vsw_send_ver(void *); 139 static void vsw_send_attr(vsw_ldc_t *); 140 static void vsw_send_dring_info(vsw_ldc_t *); 141 static void vsw_send_rdx(vsw_ldc_t *); 142 static void vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state); 143 144 /* Dring routines */ 145 static void vsw_create_privring(vsw_ldc_t *); 146 static dring_info_t *vsw_map_dring(vsw_ldc_t *ldcp, void *pkt); 147 static void vsw_unmap_dring(vsw_ldc_t *ldcp); 148 static void vsw_destroy_dring(vsw_ldc_t *ldcp); 149 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 150 static int vsw_map_data(vsw_ldc_t *ldcp, dring_info_t *dp, void *pkt); 151 static void vsw_set_lane_attr(vsw_t *, lane_t *); 152 dring_info_t *vsw_map_dring_cmn(vsw_ldc_t *ldcp, 153 vio_dring_reg_msg_t *dring_pkt); 154 static int vsw_mapin_avail(vsw_ldc_t *ldcp); 155 156 /* tx/msg/rcv thread routines */ 157 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 158 static void vsw_ldc_tx_worker(void *arg); 159 160 /* Misc support routines */ 161 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 162 static int vsw_get_same_dest_list(struct ether_header *ehp, 163 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 164 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 165 166 /* Debugging routines */ 167 static void dump_flags(uint64_t); 168 static void display_state(void); 169 static void display_lane(lane_t *); 170 static void display_ring(dring_info_t *); 171 172 /* 173 * Functions imported from other files. 174 */ 175 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 176 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 177 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 178 extern void vsw_del_mcst_port(vsw_port_t *port); 179 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 180 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 181 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 182 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 183 extern void vsw_create_vlans(void *arg, int type); 184 extern void vsw_destroy_vlans(void *arg, int type); 185 extern void vsw_vlan_add_ids(void *arg, int type); 186 extern void vsw_vlan_remove_ids(void *arg, int type); 187 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 188 struct ether_header *ehp, uint16_t *vidp); 189 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 190 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 191 mblk_t **npt); 192 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 193 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 194 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 195 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 196 extern void vsw_hio_stop_port(vsw_port_t *portp); 197 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 198 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 199 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 200 extern void vsw_destroy_rxpools(void *arg); 201 extern void vsw_stop_msg_thread(vsw_ldc_t *ldcp); 202 extern int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 203 extern int vsw_dringsend(vsw_ldc_t *, mblk_t *); 204 extern int vsw_reclaim_dring(dring_info_t *dp, int start); 205 extern int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 206 int *); 207 extern vio_dring_reg_msg_t *vsw_create_tx_dring_info(vsw_ldc_t *); 208 extern int vsw_setup_tx_dring(vsw_ldc_t *ldcp, dring_info_t *dp); 209 extern void vsw_destroy_tx_dring(vsw_ldc_t *ldcp); 210 extern dring_info_t *vsw_map_rx_dring(vsw_ldc_t *ldcp, void *pkt); 211 extern void vsw_unmap_rx_dring(vsw_ldc_t *ldcp); 212 extern void vsw_ldc_msg_worker(void *arg); 213 extern void vsw_process_dringdata(void *, void *); 214 extern vio_dring_reg_msg_t *vsw_create_rx_dring_info(vsw_ldc_t *); 215 extern void vsw_destroy_rx_dring(vsw_ldc_t *ldcp); 216 extern dring_info_t *vsw_map_tx_dring(vsw_ldc_t *ldcp, void *pkt); 217 extern void vsw_unmap_tx_dring(vsw_ldc_t *ldcp); 218 extern void vsw_ldc_rcv_worker(void *arg); 219 extern void vsw_stop_rcv_thread(vsw_ldc_t *ldcp); 220 extern int vsw_dringsend_shm(vsw_ldc_t *, mblk_t *); 221 extern void vsw_process_dringdata_shm(void *, void *); 222 223 /* 224 * Tunables used in this file. 225 */ 226 extern int vsw_num_handshakes; 227 extern int vsw_ldc_tx_delay; 228 extern int vsw_ldc_tx_retries; 229 extern int vsw_ldc_retries; 230 extern int vsw_ldc_delay; 231 extern boolean_t vsw_ldc_rxthr_enabled; 232 extern boolean_t vsw_ldc_txthr_enabled; 233 extern uint32_t vsw_num_descriptors; 234 extern uint8_t vsw_dring_mode; 235 extern uint32_t vsw_max_tx_qcount; 236 extern boolean_t vsw_obp_ver_proto_workaround; 237 extern uint32_t vsw_publish_macaddr_count; 238 extern uint32_t vsw_nrbufs_factor; 239 240 #define LDC_ENTER_LOCK(ldcp) \ 241 mutex_enter(&((ldcp)->ldc_cblock));\ 242 mutex_enter(&((ldcp)->ldc_rxlock));\ 243 mutex_enter(&((ldcp)->ldc_txlock)); 244 #define LDC_EXIT_LOCK(ldcp) \ 245 mutex_exit(&((ldcp)->ldc_txlock));\ 246 mutex_exit(&((ldcp)->ldc_rxlock));\ 247 mutex_exit(&((ldcp)->ldc_cblock)); 248 249 #define VSW_VER_EQ(ldcp, major, minor) \ 250 ((ldcp)->lane_out.ver_major == (major) && \ 251 (ldcp)->lane_out.ver_minor == (minor)) 252 253 #define VSW_VER_LT(ldcp, major, minor) \ 254 (((ldcp)->lane_out.ver_major < (major)) || \ 255 ((ldcp)->lane_out.ver_major == (major) && \ 256 (ldcp)->lane_out.ver_minor < (minor))) 257 258 #define VSW_VER_GTEQ(ldcp, major, minor) \ 259 (((ldcp)->lane_out.ver_major > (major)) || \ 260 ((ldcp)->lane_out.ver_major == (major) && \ 261 (ldcp)->lane_out.ver_minor >= (minor))) 262 263 #define VSW_VER_LTEQ(ldcp, major, minor) \ 264 (((ldcp)->lane_out.ver_major < (major)) || \ 265 ((ldcp)->lane_out.ver_major == (major) && \ 266 (ldcp)->lane_out.ver_minor <= (minor))) 267 268 /* 269 * VIO Protocol Version Info: 270 * 271 * The version specified below represents the version of protocol currently 272 * supported in the driver. It means the driver can negotiate with peers with 273 * versions <= this version. Here is a summary of the feature(s) that are 274 * supported at each version of the protocol: 275 * 276 * 1.0 Basic VIO protocol. 277 * 1.1 vDisk protocol update (no virtual network update). 278 * 1.2 Support for priority frames (priority-ether-types). 279 * 1.3 VLAN and HybridIO support. 280 * 1.4 Jumbo Frame support. 281 * 1.5 Link State Notification support with optional support 282 * for Physical Link information. 283 * 1.6 Support for RxDringData mode. 284 */ 285 static ver_sup_t vsw_versions[] = { {1, 6} }; 286 287 /* 288 * For the moment the state dump routines have their own 289 * private flag. 290 */ 291 #define DUMP_STATE 0 292 293 #if DUMP_STATE 294 295 #define DUMP_TAG(tag) \ 296 { \ 297 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 298 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 299 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 300 } 301 302 #define DUMP_TAG_PTR(tag) \ 303 { \ 304 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 305 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 306 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 307 } 308 309 #define DUMP_FLAGS(flags) dump_flags(flags); 310 #define DISPLAY_STATE() display_state() 311 312 #else 313 314 #define DUMP_TAG(tag) 315 #define DUMP_TAG_PTR(tag) 316 #define DUMP_FLAGS(state) 317 #define DISPLAY_STATE() 318 319 #endif /* DUMP_STATE */ 320 321 /* 322 * Attach the specified port. 323 * 324 * Returns 0 on success, 1 on failure. 325 */ 326 int 327 vsw_port_attach(vsw_port_t *port) 328 { 329 vsw_t *vswp = port->p_vswp; 330 vsw_port_list_t *plist = &vswp->plist; 331 vsw_port_t *p, **pp; 332 int nids = port->num_ldcs; 333 uint64_t *ldcids; 334 int rv; 335 336 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 337 338 /* port already exists? */ 339 READ_ENTER(&plist->lockrw); 340 for (p = plist->head; p != NULL; p = p->p_next) { 341 if (p->p_instance == port->p_instance) { 342 DWARN(vswp, "%s: port instance %d already attached", 343 __func__, p->p_instance); 344 RW_EXIT(&plist->lockrw); 345 return (1); 346 } 347 } 348 RW_EXIT(&plist->lockrw); 349 350 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 351 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 352 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 353 354 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 355 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 356 port->state = VSW_PORT_INIT; 357 358 D2(vswp, "%s: %d nids", __func__, nids); 359 ldcids = port->ldc_ids; 360 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[0]); 361 if (vsw_ldc_attach(port, (uint64_t)ldcids[0]) != 0) { 362 DERR(vswp, "%s: ldc_attach failed", __func__); 363 goto exit_error; 364 } 365 366 if (vswp->switching_setup_done == B_TRUE) { 367 /* 368 * If the underlying network device has been setup, 369 * then open a mac client and porgram the mac address 370 * for this port. 371 */ 372 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 373 if (rv != 0) { 374 goto exit_error; 375 } 376 } 377 378 /* create the fdb entry for this port/mac address */ 379 vsw_fdbe_add(vswp, port); 380 381 vsw_create_vlans(port, VSW_VNETPORT); 382 383 WRITE_ENTER(&plist->lockrw); 384 385 /* link it into the list of ports for this vsw instance */ 386 pp = (vsw_port_t **)(&plist->head); 387 port->p_next = *pp; 388 *pp = port; 389 plist->num_ports++; 390 391 RW_EXIT(&plist->lockrw); 392 393 /* 394 * Initialise the port and any ldc's under it. 395 */ 396 (void) vsw_ldc_init(port->ldcp); 397 398 /* announce macaddr of vnet to the physical switch */ 399 if (vsw_publish_macaddr_count != 0) { /* enabled */ 400 vsw_publish_macaddr(vswp, port); 401 } 402 403 D1(vswp, "%s: exit", __func__); 404 return (0); 405 406 exit_error: 407 408 cv_destroy(&port->state_cv); 409 mutex_destroy(&port->state_lock); 410 411 rw_destroy(&port->maccl_rwlock); 412 mutex_destroy(&port->tx_lock); 413 mutex_destroy(&port->mca_lock); 414 kmem_free(port, sizeof (vsw_port_t)); 415 return (1); 416 } 417 418 /* 419 * Detach the specified port. 420 * 421 * Returns 0 on success, 1 on failure. 422 */ 423 int 424 vsw_port_detach(vsw_t *vswp, int p_instance) 425 { 426 vsw_port_t *port = NULL; 427 vsw_port_list_t *plist = &vswp->plist; 428 429 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 430 431 WRITE_ENTER(&plist->lockrw); 432 433 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 434 RW_EXIT(&plist->lockrw); 435 return (1); 436 } 437 438 if (vsw_plist_del_node(vswp, port)) { 439 RW_EXIT(&plist->lockrw); 440 return (1); 441 } 442 443 /* cleanup any HybridIO for this port */ 444 vsw_hio_stop_port(port); 445 446 /* 447 * No longer need to hold writer lock on port list now 448 * that we have unlinked the target port from the list. 449 */ 450 RW_EXIT(&plist->lockrw); 451 452 /* Cleanup and close the mac client */ 453 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 454 455 /* Remove the fdb entry for this port/mac address */ 456 vsw_fdbe_del(vswp, &(port->p_macaddr)); 457 vsw_destroy_vlans(port, VSW_VNETPORT); 458 459 /* Remove any multicast addresses.. */ 460 vsw_del_mcst_port(port); 461 462 vsw_port_delete(port); 463 464 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 465 return (0); 466 } 467 468 /* 469 * Detach all active ports. 470 */ 471 void 472 vsw_detach_ports(vsw_t *vswp) 473 { 474 vsw_port_list_t *plist = &vswp->plist; 475 vsw_port_t *port = NULL; 476 477 D1(vswp, "%s: enter", __func__); 478 479 WRITE_ENTER(&plist->lockrw); 480 481 while ((port = plist->head) != NULL) { 482 (void) vsw_plist_del_node(vswp, port); 483 484 /* cleanup any HybridIO for this port */ 485 vsw_hio_stop_port(port); 486 487 /* Cleanup and close the mac client */ 488 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 489 490 /* Remove the fdb entry for this port/mac address */ 491 vsw_fdbe_del(vswp, &(port->p_macaddr)); 492 vsw_destroy_vlans(port, VSW_VNETPORT); 493 494 /* Remove any multicast addresses.. */ 495 vsw_del_mcst_port(port); 496 497 /* 498 * No longer need to hold the lock on the port list 499 * now that we have unlinked the target port from the 500 * list. 501 */ 502 RW_EXIT(&plist->lockrw); 503 vsw_port_delete(port); 504 WRITE_ENTER(&plist->lockrw); 505 } 506 RW_EXIT(&plist->lockrw); 507 508 D1(vswp, "%s: exit", __func__); 509 } 510 511 /* 512 * Delete the specified port. 513 */ 514 static void 515 vsw_port_delete(vsw_port_t *port) 516 { 517 vsw_t *vswp = port->p_vswp; 518 519 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 520 521 vsw_ldc_uninit(port->ldcp); 522 523 /* 524 * Wait for any pending ctrl msg tasks which reference this 525 * port to finish. 526 */ 527 vsw_drain_port_taskq(port); 528 529 /* 530 * Wait for any active callbacks to finish 531 */ 532 vsw_ldc_drain(port->ldcp); 533 534 vsw_ldc_detach(port->ldcp); 535 536 rw_destroy(&port->maccl_rwlock); 537 mutex_destroy(&port->mca_lock); 538 mutex_destroy(&port->tx_lock); 539 540 cv_destroy(&port->state_cv); 541 mutex_destroy(&port->state_lock); 542 543 if (port->num_ldcs != 0) { 544 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 545 port->num_ldcs = 0; 546 } 547 548 if (port->nvids != 0) { 549 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 550 } 551 552 kmem_free(port, sizeof (vsw_port_t)); 553 554 D1(vswp, "%s: exit", __func__); 555 } 556 557 /* 558 * Attach a logical domain channel (ldc) under a specified port. 559 * 560 * Returns 0 on success, 1 on failure. 561 */ 562 static int 563 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 564 { 565 vsw_t *vswp = port->p_vswp; 566 vsw_ldc_t *ldcp = NULL; 567 ldc_attr_t attr; 568 ldc_status_t istatus; 569 int status = DDI_FAILURE; 570 char kname[MAXNAMELEN]; 571 enum { PROG_init = 0x0, 572 PROG_callback = 0x1, 573 PROG_tx_thread = 0x2} 574 progress; 575 576 progress = PROG_init; 577 578 D1(vswp, "%s: enter", __func__); 579 580 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 581 if (ldcp == NULL) { 582 DERR(vswp, "%s: kmem_zalloc failed", __func__); 583 return (1); 584 } 585 ldcp->ldc_id = ldc_id; 586 587 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 589 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 590 ldcp->msg_thr_flags = 0; 591 mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL); 592 cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL); 593 ldcp->rcv_thr_flags = 0; 594 mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL); 595 cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL); 596 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 597 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 598 599 /* required for handshake with peer */ 600 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 601 ldcp->peer_session = 0; 602 ldcp->session_status = 0; 603 ldcp->hss_id = 1; /* Initial handshake session id */ 604 ldcp->hphase = VSW_MILESTONE0; 605 606 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 607 608 /* only set for outbound lane, inbound set by peer */ 609 vsw_set_lane_attr(vswp, &ldcp->lane_out); 610 611 attr.devclass = LDC_DEV_NT_SVC; 612 attr.instance = ddi_get_instance(vswp->dip); 613 attr.mode = LDC_MODE_UNRELIABLE; 614 attr.mtu = VSW_LDC_MTU; 615 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 616 if (status != 0) { 617 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 618 __func__, ldc_id, status); 619 goto ldc_attach_fail; 620 } 621 622 if (vsw_ldc_txthr_enabled) { 623 ldcp->tx_thr_flags = 0; 624 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 625 626 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 627 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 628 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 629 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 630 631 progress |= PROG_tx_thread; 632 if (ldcp->tx_thread == NULL) { 633 DWARN(vswp, "%s(%lld): Failed to create worker thread", 634 __func__, ldc_id); 635 goto ldc_attach_fail; 636 } 637 } 638 639 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 640 if (status != 0) { 641 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 642 __func__, ldc_id, status); 643 (void) ldc_fini(ldcp->ldc_handle); 644 goto ldc_attach_fail; 645 } 646 /* 647 * allocate a message for ldc_read()s, big enough to hold ctrl and 648 * data msgs, including raw data msgs used to recv priority frames. 649 */ 650 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 651 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 652 653 progress |= PROG_callback; 654 655 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 656 657 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 658 DERR(vswp, "%s: ldc_status failed", __func__); 659 mutex_destroy(&ldcp->status_lock); 660 goto ldc_attach_fail; 661 } 662 663 ldcp->ldc_status = istatus; 664 ldcp->ldc_port = port; 665 ldcp->ldc_vswp = vswp; 666 667 vsw_reset_vnet_proto_ops(ldcp); 668 669 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 670 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 671 kname, &ldcp->ldc_stats); 672 if (ldcp->ksp == NULL) { 673 DERR(vswp, "%s: kstats setup failed", __func__); 674 goto ldc_attach_fail; 675 } 676 677 /* link it into this port */ 678 port->ldcp = ldcp; 679 680 D1(vswp, "%s: exit", __func__); 681 return (0); 682 683 ldc_attach_fail: 684 685 if (progress & PROG_callback) { 686 (void) ldc_unreg_callback(ldcp->ldc_handle); 687 kmem_free(ldcp->ldcmsg, ldcp->msglen); 688 } 689 690 if (progress & PROG_tx_thread) { 691 if (ldcp->tx_thread != NULL) { 692 vsw_stop_tx_thread(ldcp); 693 } 694 mutex_destroy(&ldcp->tx_thr_lock); 695 cv_destroy(&ldcp->tx_thr_cv); 696 } 697 if (ldcp->ksp != NULL) { 698 vgen_destroy_kstats(ldcp->ksp); 699 } 700 mutex_destroy(&ldcp->msg_thr_lock); 701 mutex_destroy(&ldcp->rcv_thr_lock); 702 mutex_destroy(&ldcp->ldc_txlock); 703 mutex_destroy(&ldcp->ldc_rxlock); 704 mutex_destroy(&ldcp->ldc_cblock); 705 mutex_destroy(&ldcp->drain_cv_lock); 706 cv_destroy(&ldcp->msg_thr_cv); 707 cv_destroy(&ldcp->rcv_thr_cv); 708 cv_destroy(&ldcp->drain_cv); 709 710 kmem_free(ldcp, sizeof (vsw_ldc_t)); 711 712 return (1); 713 } 714 715 /* 716 * Detach a logical domain channel (ldc) belonging to a 717 * particular port. 718 */ 719 static void 720 vsw_ldc_detach(vsw_ldc_t *ldcp) 721 { 722 int rv; 723 vsw_t *vswp = ldcp->ldc_port->p_vswp; 724 int retries = 0; 725 726 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 727 728 /* Stop msg/rcv thread */ 729 if (ldcp->rcv_thread != NULL) { 730 vsw_stop_rcv_thread(ldcp); 731 } else if (ldcp->msg_thread != NULL) { 732 vsw_stop_msg_thread(ldcp); 733 } 734 kmem_free(ldcp->ldcmsg, ldcp->msglen); 735 736 /* Stop the tx thread */ 737 if (ldcp->tx_thread != NULL) { 738 vsw_stop_tx_thread(ldcp); 739 mutex_destroy(&ldcp->tx_thr_lock); 740 cv_destroy(&ldcp->tx_thr_cv); 741 if (ldcp->tx_mhead != NULL) { 742 freemsgchain(ldcp->tx_mhead); 743 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 744 ldcp->tx_cnt = 0; 745 } 746 } 747 748 /* Destory kstats */ 749 vgen_destroy_kstats(ldcp->ksp); 750 751 /* 752 * Before we can close the channel we must release any mapped 753 * resources (e.g. drings). 754 */ 755 vsw_free_lane_resources(ldcp, INBOUND); 756 vsw_free_lane_resources(ldcp, OUTBOUND); 757 758 /* 759 * Close the channel, retry on EAAGIN. 760 */ 761 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 762 if (++retries > vsw_ldc_retries) { 763 break; 764 } 765 drv_usecwait(vsw_ldc_delay); 766 } 767 if (rv != 0) { 768 cmn_err(CE_NOTE, 769 "!vsw%d: Error(%d) closing the channel(0x%lx)\n", 770 vswp->instance, rv, ldcp->ldc_id); 771 } 772 773 (void) ldc_fini(ldcp->ldc_handle); 774 775 ldcp->ldc_status = LDC_INIT; 776 ldcp->ldc_handle = NULL; 777 ldcp->ldc_vswp = NULL; 778 779 mutex_destroy(&ldcp->msg_thr_lock); 780 mutex_destroy(&ldcp->rcv_thr_lock); 781 mutex_destroy(&ldcp->ldc_txlock); 782 mutex_destroy(&ldcp->ldc_rxlock); 783 mutex_destroy(&ldcp->ldc_cblock); 784 mutex_destroy(&ldcp->drain_cv_lock); 785 mutex_destroy(&ldcp->status_lock); 786 cv_destroy(&ldcp->msg_thr_cv); 787 cv_destroy(&ldcp->rcv_thr_cv); 788 cv_destroy(&ldcp->drain_cv); 789 790 kmem_free(ldcp, sizeof (vsw_ldc_t)); 791 } 792 793 /* 794 * Open and attempt to bring up the channel. Note that channel 795 * can only be brought up if peer has also opened channel. 796 * 797 * Returns 0 if can open and bring up channel, otherwise 798 * returns 1. 799 */ 800 static int 801 vsw_ldc_init(vsw_ldc_t *ldcp) 802 { 803 vsw_t *vswp = ldcp->ldc_vswp; 804 ldc_status_t istatus = 0; 805 int rv; 806 807 D1(vswp, "%s: enter", __func__); 808 809 LDC_ENTER_LOCK(ldcp); 810 811 /* don't start at 0 in case clients don't like that */ 812 ldcp->next_ident = 1; 813 814 rv = ldc_open(ldcp->ldc_handle); 815 if (rv != 0) { 816 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 817 __func__, ldcp->ldc_id, rv); 818 LDC_EXIT_LOCK(ldcp); 819 return (1); 820 } 821 822 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 823 DERR(vswp, "%s: unable to get status", __func__); 824 LDC_EXIT_LOCK(ldcp); 825 return (1); 826 827 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 828 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 829 __func__, ldcp->ldc_id, istatus); 830 LDC_EXIT_LOCK(ldcp); 831 return (1); 832 } 833 834 mutex_enter(&ldcp->status_lock); 835 ldcp->ldc_status = istatus; 836 mutex_exit(&ldcp->status_lock); 837 838 rv = ldc_up(ldcp->ldc_handle); 839 if (rv != 0) { 840 /* 841 * Not a fatal error for ldc_up() to fail, as peer 842 * end point may simply not be ready yet. 843 */ 844 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 845 ldcp->ldc_id, rv); 846 LDC_EXIT_LOCK(ldcp); 847 return (1); 848 } 849 850 /* 851 * ldc_up() call is non-blocking so need to explicitly 852 * check channel status to see if in fact the channel 853 * is UP. 854 */ 855 mutex_enter(&ldcp->status_lock); 856 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 857 DERR(vswp, "%s: unable to get status", __func__); 858 mutex_exit(&ldcp->status_lock); 859 LDC_EXIT_LOCK(ldcp); 860 return (1); 861 862 } 863 864 if (ldcp->ldc_status == LDC_UP) { 865 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 866 ldcp->ldc_id, istatus); 867 mutex_exit(&ldcp->status_lock); 868 LDC_EXIT_LOCK(ldcp); 869 870 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 871 return (0); 872 } 873 874 mutex_exit(&ldcp->status_lock); 875 LDC_EXIT_LOCK(ldcp); 876 877 D1(vswp, "%s: exit", __func__); 878 return (0); 879 } 880 881 /* disable callbacks on the channel */ 882 static void 883 vsw_ldc_uninit(vsw_ldc_t *ldcp) 884 { 885 vsw_t *vswp = ldcp->ldc_vswp; 886 int rv; 887 888 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 889 890 LDC_ENTER_LOCK(ldcp); 891 892 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 893 if (rv != 0) { 894 cmn_err(CE_NOTE, "!vsw_ldc_uninit(%ld): error disabling " 895 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 896 } 897 898 mutex_enter(&ldcp->status_lock); 899 ldcp->ldc_status = LDC_INIT; 900 mutex_exit(&ldcp->status_lock); 901 902 LDC_EXIT_LOCK(ldcp); 903 904 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 905 } 906 907 /* 908 * Wait until the callback(s) associated with the ldcs under the specified 909 * port have completed. 910 * 911 * Prior to this function being invoked each channel under this port 912 * should have been quiesced via ldc_set_cb_mode(DISABLE). 913 * 914 * A short explaination of what we are doing below.. 915 * 916 * The simplest approach would be to have a reference counter in 917 * the ldc structure which is increment/decremented by the callbacks as 918 * they use the channel. The drain function could then simply disable any 919 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 920 * there is a tiny window here - before the callback is able to get the lock 921 * on the channel it is interrupted and this function gets to execute. It 922 * sees that the ref count is zero and believes its free to delete the 923 * associated data structures. 924 * 925 * We get around this by taking advantage of the fact that before the ldc 926 * framework invokes a callback it sets a flag to indicate that there is a 927 * callback active (or about to become active). If when we attempt to 928 * unregister a callback when this active flag is set then the unregister 929 * will fail with EWOULDBLOCK. 930 * 931 * If the unregister fails we do a cv_timedwait. We will either be signaled 932 * by the callback as it is exiting (note we have to wait a short period to 933 * allow the callback to return fully to the ldc framework and it to clear 934 * the active flag), or by the timer expiring. In either case we again attempt 935 * the unregister. We repeat this until we can succesfully unregister the 936 * callback. 937 * 938 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 939 * the case where the callback has finished but the ldc framework has not yet 940 * cleared the active flag. In this case we would never get a cv_signal. 941 */ 942 static void 943 vsw_ldc_drain(vsw_ldc_t *ldcp) 944 { 945 vsw_t *vswp = ldcp->ldc_port->p_vswp; 946 947 D1(vswp, "%s: enter", __func__); 948 949 /* 950 * If we can unregister the channel callback then we 951 * know that there is no callback either running or 952 * scheduled to run for this channel so move on to next 953 * channel in the list. 954 */ 955 mutex_enter(&ldcp->drain_cv_lock); 956 957 /* prompt active callbacks to quit */ 958 ldcp->drain_state = VSW_LDC_DRAINING; 959 960 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 961 D2(vswp, "%s: unreg callback for chan %ld", __func__, 962 ldcp->ldc_id); 963 mutex_exit(&ldcp->drain_cv_lock); 964 } else { 965 /* 966 * If we end up here we know that either 1) a callback 967 * is currently executing, 2) is about to start (i.e. 968 * the ldc framework has set the active flag but 969 * has not actually invoked the callback yet, or 3) 970 * has finished and has returned to the ldc framework 971 * but the ldc framework has not yet cleared the 972 * active bit. 973 * 974 * Wait for it to finish. 975 */ 976 while (ldc_unreg_callback(ldcp->ldc_handle) == EWOULDBLOCK) { 977 (void) cv_timedwait(&ldcp->drain_cv, 978 &ldcp->drain_cv_lock, ddi_get_lbolt() + hz); 979 } 980 981 mutex_exit(&ldcp->drain_cv_lock); 982 D2(vswp, "%s: unreg callback for chan %ld after " 983 "timeout", __func__, ldcp->ldc_id); 984 } 985 986 D1(vswp, "%s: exit", __func__); 987 } 988 989 /* 990 * Wait until all tasks which reference this port have completed. 991 * 992 * Prior to this function being invoked each channel under this port 993 * should have been quiesced via ldc_set_cb_mode(DISABLE). 994 */ 995 static void 996 vsw_drain_port_taskq(vsw_port_t *port) 997 { 998 vsw_t *vswp = port->p_vswp; 999 1000 D1(vswp, "%s: enter", __func__); 1001 1002 /* 1003 * Mark the port as in the process of being detached, and 1004 * dispatch a marker task to the queue so we know when all 1005 * relevant tasks have completed. 1006 */ 1007 mutex_enter(&port->state_lock); 1008 port->state = VSW_PORT_DETACHING; 1009 1010 if ((vswp->taskq_p == NULL) || 1011 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1012 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1013 cmn_err(CE_NOTE, "!vsw%d: unable to dispatch marker task", 1014 vswp->instance); 1015 mutex_exit(&port->state_lock); 1016 return; 1017 } 1018 1019 /* 1020 * Wait for the marker task to finish. 1021 */ 1022 while (port->state != VSW_PORT_DETACHABLE) 1023 cv_wait(&port->state_cv, &port->state_lock); 1024 1025 mutex_exit(&port->state_lock); 1026 1027 D1(vswp, "%s: exit", __func__); 1028 } 1029 1030 static void 1031 vsw_marker_task(void *arg) 1032 { 1033 vsw_port_t *port = arg; 1034 vsw_t *vswp = port->p_vswp; 1035 1036 D1(vswp, "%s: enter", __func__); 1037 1038 mutex_enter(&port->state_lock); 1039 1040 /* 1041 * No further tasks should be dispatched which reference 1042 * this port so ok to mark it as safe to detach. 1043 */ 1044 port->state = VSW_PORT_DETACHABLE; 1045 1046 cv_signal(&port->state_cv); 1047 1048 mutex_exit(&port->state_lock); 1049 1050 D1(vswp, "%s: exit", __func__); 1051 } 1052 1053 vsw_port_t * 1054 vsw_lookup_port(vsw_t *vswp, int p_instance) 1055 { 1056 vsw_port_list_t *plist = &vswp->plist; 1057 vsw_port_t *port; 1058 1059 for (port = plist->head; port != NULL; port = port->p_next) { 1060 if (port->p_instance == p_instance) { 1061 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1062 return (port); 1063 } 1064 } 1065 1066 return (NULL); 1067 } 1068 1069 void 1070 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1071 { 1072 vsw_ldc_t *ldcp = portp->ldcp; 1073 1074 mutex_enter(&ldcp->ldc_cblock); 1075 1076 /* 1077 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1078 * the connection. See comments in vsw_set_vnet_proto_ops(). 1079 */ 1080 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1081 portp->nvids != 0) { 1082 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1083 } 1084 1085 mutex_exit(&ldcp->ldc_cblock); 1086 } 1087 1088 void 1089 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1090 { 1091 vsw_ldc_t *ldcp = portp->ldcp; 1092 1093 mutex_enter(&ldcp->ldc_cblock); 1094 1095 /* 1096 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1097 * to trigger re-negotiation, which inturn trigger HybridIO 1098 * setup/cleanup. 1099 */ 1100 if ((ldcp->hphase == VSW_MILESTONE4) && 1101 (portp->p_hio_capable == B_TRUE)) { 1102 if (immediate == B_TRUE) { 1103 (void) ldc_down(ldcp->ldc_handle); 1104 } else { 1105 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1106 } 1107 } 1108 1109 mutex_exit(&ldcp->ldc_cblock); 1110 } 1111 1112 void 1113 vsw_port_reset(vsw_port_t *portp) 1114 { 1115 vsw_ldc_t *ldcp = portp->ldcp; 1116 1117 mutex_enter(&ldcp->ldc_cblock); 1118 1119 /* 1120 * reset channel and terminate the connection. 1121 */ 1122 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1123 1124 mutex_exit(&ldcp->ldc_cblock); 1125 } 1126 1127 void 1128 vsw_reset_ports(vsw_t *vswp) 1129 { 1130 vsw_port_list_t *plist = &vswp->plist; 1131 vsw_port_t *portp; 1132 1133 READ_ENTER(&plist->lockrw); 1134 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1135 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1136 vsw_hio_stop_port(portp); 1137 } 1138 vsw_port_reset(portp); 1139 } 1140 RW_EXIT(&plist->lockrw); 1141 } 1142 1143 static void 1144 vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state) 1145 { 1146 vnet_physlink_msg_t msg; 1147 vnet_physlink_msg_t *msgp = &msg; 1148 uint32_t physlink_info = 0; 1149 1150 if (plink_state == LINK_STATE_UP) { 1151 physlink_info |= VNET_PHYSLINK_STATE_UP; 1152 } else { 1153 physlink_info |= VNET_PHYSLINK_STATE_DOWN; 1154 } 1155 1156 msgp->tag.vio_msgtype = VIO_TYPE_CTRL; 1157 msgp->tag.vio_subtype = VIO_SUBTYPE_INFO; 1158 msgp->tag.vio_subtype_env = VNET_PHYSLINK_INFO; 1159 msgp->tag.vio_sid = ldcp->local_session; 1160 msgp->physlink_info = physlink_info; 1161 1162 (void) vsw_send_msg(ldcp, msgp, sizeof (msg), B_TRUE); 1163 } 1164 1165 static void 1166 vsw_port_physlink_update(vsw_port_t *portp) 1167 { 1168 vsw_ldc_t *ldcp; 1169 vsw_t *vswp; 1170 1171 vswp = portp->p_vswp; 1172 ldcp = portp->ldcp; 1173 1174 mutex_enter(&ldcp->ldc_cblock); 1175 1176 /* 1177 * If handshake has completed successfully and if the vnet device 1178 * has negotiated to get physical link state updates, send a message 1179 * with the current state. 1180 */ 1181 if (ldcp->hphase == VSW_MILESTONE4 && ldcp->pls_negotiated == B_TRUE) { 1182 vsw_send_physlink_msg(ldcp, vswp->phys_link_state); 1183 } 1184 1185 mutex_exit(&ldcp->ldc_cblock); 1186 } 1187 1188 void 1189 vsw_physlink_update_ports(vsw_t *vswp) 1190 { 1191 vsw_port_list_t *plist = &vswp->plist; 1192 vsw_port_t *portp; 1193 1194 READ_ENTER(&plist->lockrw); 1195 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1196 vsw_port_physlink_update(portp); 1197 } 1198 RW_EXIT(&plist->lockrw); 1199 } 1200 1201 /* 1202 * Search for and remove the specified port from the port 1203 * list. Returns 0 if able to locate and remove port, otherwise 1204 * returns 1. 1205 */ 1206 static int 1207 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1208 { 1209 vsw_port_list_t *plist = &vswp->plist; 1210 vsw_port_t *curr_p, *prev_p; 1211 1212 if (plist->head == NULL) 1213 return (1); 1214 1215 curr_p = prev_p = plist->head; 1216 1217 while (curr_p != NULL) { 1218 if (curr_p == port) { 1219 if (prev_p == curr_p) { 1220 plist->head = curr_p->p_next; 1221 } else { 1222 prev_p->p_next = curr_p->p_next; 1223 } 1224 plist->num_ports--; 1225 break; 1226 } else { 1227 prev_p = curr_p; 1228 curr_p = curr_p->p_next; 1229 } 1230 } 1231 return (0); 1232 } 1233 1234 /* 1235 * Interrupt handler for ldc messages. 1236 */ 1237 static uint_t 1238 vsw_ldc_cb(uint64_t event, caddr_t arg) 1239 { 1240 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1241 vsw_t *vswp = ldcp->ldc_vswp; 1242 1243 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1244 1245 mutex_enter(&ldcp->ldc_cblock); 1246 ldcp->ldc_stats.callbacks++; 1247 1248 mutex_enter(&ldcp->status_lock); 1249 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1250 mutex_exit(&ldcp->status_lock); 1251 mutex_exit(&ldcp->ldc_cblock); 1252 return (LDC_SUCCESS); 1253 } 1254 mutex_exit(&ldcp->status_lock); 1255 1256 if (event & LDC_EVT_UP) { 1257 /* 1258 * Channel has come up. 1259 */ 1260 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1261 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1262 1263 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1264 1265 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1266 } 1267 1268 if (event & LDC_EVT_READ) { 1269 /* 1270 * Data available for reading. 1271 */ 1272 D2(vswp, "%s: id(ld) event(%llx) data READ", 1273 __func__, ldcp->ldc_id, event); 1274 1275 vsw_process_evt_read(ldcp); 1276 1277 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1278 1279 goto vsw_cb_exit; 1280 } 1281 1282 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1283 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1284 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1285 1286 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1287 } 1288 1289 /* 1290 * Catch either LDC_EVT_WRITE which we don't support or any 1291 * unknown event. 1292 */ 1293 if (event & 1294 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1295 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1296 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1297 } 1298 1299 vsw_cb_exit: 1300 mutex_exit(&ldcp->ldc_cblock); 1301 1302 /* 1303 * Let the drain function know we are finishing if it 1304 * is waiting. 1305 */ 1306 mutex_enter(&ldcp->drain_cv_lock); 1307 if (ldcp->drain_state == VSW_LDC_DRAINING) 1308 cv_signal(&ldcp->drain_cv); 1309 mutex_exit(&ldcp->drain_cv_lock); 1310 1311 return (LDC_SUCCESS); 1312 } 1313 1314 /* 1315 * Reinitialise data structures associated with the channel. 1316 */ 1317 static void 1318 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1319 { 1320 vsw_t *vswp = ldcp->ldc_vswp; 1321 vsw_port_t *port; 1322 1323 D1(vswp, "%s: enter", __func__); 1324 1325 port = ldcp->ldc_port; 1326 1327 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1328 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1329 1330 vsw_free_lane_resources(ldcp, INBOUND); 1331 vsw_free_lane_resources(ldcp, OUTBOUND); 1332 1333 ldcp->lane_in.lstate = 0; 1334 ldcp->lane_out.lstate = 0; 1335 1336 /* 1337 * Remove parent port from any multicast groups 1338 * it may have registered with. Client must resend 1339 * multicast add command after handshake completes. 1340 */ 1341 vsw_del_mcst_port(port); 1342 1343 ldcp->peer_session = 0; 1344 ldcp->session_status = 0; 1345 ldcp->hcnt = 0; 1346 ldcp->hphase = VSW_MILESTONE0; 1347 1348 vsw_reset_vnet_proto_ops(ldcp); 1349 1350 D1(vswp, "%s: exit", __func__); 1351 } 1352 1353 /* 1354 * Process a connection event. 1355 */ 1356 void 1357 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1358 { 1359 vsw_t *vswp = ldcp->ldc_vswp; 1360 vsw_conn_evt_t *conn = NULL; 1361 1362 D1(vswp, "%s: enter", __func__); 1363 1364 /* 1365 * Check if either a reset or restart event is pending 1366 * or in progress. If so just return. 1367 * 1368 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1369 * being received by the callback handler, or a ECONNRESET error 1370 * code being returned from a ldc_read() or ldc_write() call. 1371 * 1372 * A VSW_CONN_RESTART event occurs when some error checking code 1373 * decides that there is a problem with data from the channel, 1374 * and that the handshake should be restarted. 1375 */ 1376 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1377 (ldstub((uint8_t *)&ldcp->reset_active))) 1378 return; 1379 1380 /* 1381 * If it is an LDC_UP event we first check the recorded 1382 * state of the channel. If this is UP then we know that 1383 * the channel moving to the UP state has already been dealt 1384 * with and don't need to dispatch a new task. 1385 * 1386 * The reason for this check is that when we do a ldc_up(), 1387 * depending on the state of the peer, we may or may not get 1388 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1389 * every time we do ldc_up() we explicitly check the channel 1390 * status to see has it come up (ldc_up() is asynch and will 1391 * complete at some undefined time), and take the appropriate 1392 * action. 1393 * 1394 * The flip side of this is that we may get a LDC_UP event 1395 * when we have already seen that the channel is up and have 1396 * dealt with that. 1397 */ 1398 mutex_enter(&ldcp->status_lock); 1399 if (evt == VSW_CONN_UP) { 1400 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1401 mutex_exit(&ldcp->status_lock); 1402 return; 1403 } 1404 } 1405 mutex_exit(&ldcp->status_lock); 1406 1407 /* 1408 * The transaction group id allows us to identify and discard 1409 * any tasks which are still pending on the taskq and refer 1410 * to the handshake session we are about to restart or reset. 1411 * These stale messages no longer have any real meaning. 1412 */ 1413 (void) atomic_inc_32(&ldcp->hss_id); 1414 1415 ASSERT(vswp->taskq_p != NULL); 1416 1417 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1418 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1419 " connection event", vswp->instance); 1420 goto err_exit; 1421 } 1422 1423 conn->evt = evt; 1424 conn->ldcp = ldcp; 1425 1426 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1427 DDI_NOSLEEP) != DDI_SUCCESS) { 1428 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1429 vswp->instance); 1430 1431 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1432 goto err_exit; 1433 } 1434 1435 D1(vswp, "%s: exit", __func__); 1436 return; 1437 1438 err_exit: 1439 /* 1440 * Have mostly likely failed due to memory shortage. Clear the flag so 1441 * that future requests will at least be attempted and will hopefully 1442 * succeed. 1443 */ 1444 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1445 ldcp->reset_active = 0; 1446 } 1447 1448 /* 1449 * Deal with events relating to a connection. Invoked from a taskq. 1450 */ 1451 static void 1452 vsw_conn_task(void *arg) 1453 { 1454 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1455 vsw_ldc_t *ldcp = NULL; 1456 vsw_port_t *portp; 1457 vsw_t *vswp = NULL; 1458 uint16_t evt; 1459 ldc_status_t curr_status; 1460 1461 ldcp = conn->ldcp; 1462 evt = conn->evt; 1463 vswp = ldcp->ldc_vswp; 1464 portp = ldcp->ldc_port; 1465 1466 D1(vswp, "%s: enter", __func__); 1467 1468 /* can safely free now have copied out data */ 1469 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1470 1471 if (ldcp->rcv_thread != NULL) { 1472 vsw_stop_rcv_thread(ldcp); 1473 } else if (ldcp->msg_thread != NULL) { 1474 vsw_stop_msg_thread(ldcp); 1475 } 1476 1477 mutex_enter(&ldcp->status_lock); 1478 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1479 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1480 "channel %ld", vswp->instance, ldcp->ldc_id); 1481 mutex_exit(&ldcp->status_lock); 1482 return; 1483 } 1484 1485 /* 1486 * If we wish to restart the handshake on this channel, then if 1487 * the channel is UP we bring it DOWN to flush the underlying 1488 * ldc queue. 1489 */ 1490 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1491 (void) ldc_down(ldcp->ldc_handle); 1492 1493 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1494 vsw_hio_stop(vswp, ldcp); 1495 } 1496 1497 /* 1498 * re-init all the associated data structures. 1499 */ 1500 vsw_ldc_reinit(ldcp); 1501 1502 /* 1503 * Bring the channel back up (note it does no harm to 1504 * do this even if the channel is already UP, Just 1505 * becomes effectively a no-op). 1506 */ 1507 (void) ldc_up(ldcp->ldc_handle); 1508 1509 /* 1510 * Check if channel is now UP. This will only happen if 1511 * peer has also done a ldc_up(). 1512 */ 1513 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1514 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1515 "channel %ld", vswp->instance, ldcp->ldc_id); 1516 mutex_exit(&ldcp->status_lock); 1517 return; 1518 } 1519 1520 ldcp->ldc_status = curr_status; 1521 1522 /* channel UP so restart handshake by sending version info */ 1523 if (curr_status == LDC_UP) { 1524 if (ldcp->hcnt++ > vsw_num_handshakes) { 1525 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1526 " handshake attempts (%d) on channel %ld", 1527 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1528 mutex_exit(&ldcp->status_lock); 1529 return; 1530 } 1531 1532 if (vsw_obp_ver_proto_workaround == B_FALSE && 1533 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1534 DDI_NOSLEEP) != DDI_SUCCESS)) { 1535 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1536 vswp->instance); 1537 1538 /* 1539 * Don't count as valid restart attempt if couldn't 1540 * send version msg. 1541 */ 1542 if (ldcp->hcnt > 0) 1543 ldcp->hcnt--; 1544 } 1545 } 1546 1547 /* 1548 * Mark that the process is complete by clearing the flag. 1549 * 1550 * Note is it possible that the taskq dispatch above may have failed, 1551 * most likely due to memory shortage. We still clear the flag so 1552 * future attempts will at least be attempted and will hopefully 1553 * succeed. 1554 */ 1555 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1556 ldcp->reset_active = 0; 1557 1558 mutex_exit(&ldcp->status_lock); 1559 1560 D1(vswp, "%s: exit", __func__); 1561 } 1562 1563 /* 1564 * returns 0 if legal for event signified by flag to have 1565 * occured at the time it did. Otherwise returns 1. 1566 */ 1567 int 1568 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1569 { 1570 vsw_t *vswp = ldcp->ldc_vswp; 1571 uint64_t state; 1572 uint64_t phase; 1573 1574 if (dir == INBOUND) 1575 state = ldcp->lane_in.lstate; 1576 else 1577 state = ldcp->lane_out.lstate; 1578 1579 phase = ldcp->hphase; 1580 1581 switch (flag) { 1582 case VSW_VER_INFO_RECV: 1583 if (phase > VSW_MILESTONE0) { 1584 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1585 " when in state %d\n", ldcp->ldc_id, phase); 1586 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1587 return (1); 1588 } 1589 break; 1590 1591 case VSW_VER_ACK_RECV: 1592 case VSW_VER_NACK_RECV: 1593 if (!(state & VSW_VER_INFO_SENT)) { 1594 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1595 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1596 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1597 return (1); 1598 } else 1599 state &= ~VSW_VER_INFO_SENT; 1600 break; 1601 1602 case VSW_ATTR_INFO_RECV: 1603 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1604 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1605 " when in state %d\n", ldcp->ldc_id, phase); 1606 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1607 return (1); 1608 } 1609 break; 1610 1611 case VSW_ATTR_ACK_RECV: 1612 case VSW_ATTR_NACK_RECV: 1613 if (!(state & VSW_ATTR_INFO_SENT)) { 1614 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1615 " or ATTR_NACK when in state %d\n", 1616 ldcp->ldc_id, phase); 1617 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1618 return (1); 1619 } else 1620 state &= ~VSW_ATTR_INFO_SENT; 1621 break; 1622 1623 case VSW_DRING_INFO_RECV: 1624 if (phase < VSW_MILESTONE1) { 1625 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1626 " when in state %d\n", ldcp->ldc_id, phase); 1627 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1628 return (1); 1629 } 1630 break; 1631 1632 case VSW_DRING_ACK_RECV: 1633 case VSW_DRING_NACK_RECV: 1634 if (!(state & VSW_DRING_INFO_SENT)) { 1635 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1636 " or DRING_NACK when in state %d\n", 1637 ldcp->ldc_id, phase); 1638 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1639 return (1); 1640 } else 1641 state &= ~VSW_DRING_INFO_SENT; 1642 break; 1643 1644 case VSW_RDX_INFO_RECV: 1645 if (phase < VSW_MILESTONE3) { 1646 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1647 " when in state %d\n", ldcp->ldc_id, phase); 1648 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1649 return (1); 1650 } 1651 break; 1652 1653 case VSW_RDX_ACK_RECV: 1654 case VSW_RDX_NACK_RECV: 1655 if (!(state & VSW_RDX_INFO_SENT)) { 1656 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1657 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1658 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1659 return (1); 1660 } else 1661 state &= ~VSW_RDX_INFO_SENT; 1662 break; 1663 1664 case VSW_MCST_INFO_RECV: 1665 if (phase < VSW_MILESTONE3) { 1666 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1667 " when in state %d\n", ldcp->ldc_id, phase); 1668 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1669 return (1); 1670 } 1671 break; 1672 1673 default: 1674 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1675 ldcp->ldc_id, flag); 1676 return (1); 1677 } 1678 1679 if (dir == INBOUND) 1680 ldcp->lane_in.lstate = state; 1681 else 1682 ldcp->lane_out.lstate = state; 1683 1684 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1685 1686 return (0); 1687 } 1688 1689 void 1690 vsw_next_milestone(vsw_ldc_t *ldcp) 1691 { 1692 vsw_t *vswp = ldcp->ldc_vswp; 1693 vsw_port_t *portp = ldcp->ldc_port; 1694 lane_t *lane_out = &ldcp->lane_out; 1695 lane_t *lane_in = &ldcp->lane_in; 1696 1697 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1698 ldcp->ldc_id, ldcp->hphase); 1699 1700 DUMP_FLAGS(lane_in->lstate); 1701 DUMP_FLAGS(lane_out->lstate); 1702 1703 switch (ldcp->hphase) { 1704 1705 case VSW_MILESTONE0: 1706 /* 1707 * If we haven't started to handshake with our peer, 1708 * start to do so now. 1709 */ 1710 if (lane_out->lstate == 0) { 1711 D2(vswp, "%s: (chan %lld) starting handshake " 1712 "with peer", __func__, ldcp->ldc_id); 1713 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1714 } 1715 1716 /* 1717 * Only way to pass this milestone is to have successfully 1718 * negotiated version info. 1719 */ 1720 if ((lane_in->lstate & VSW_VER_ACK_SENT) && 1721 (lane_out->lstate & VSW_VER_ACK_RECV)) { 1722 1723 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1724 __func__, ldcp->ldc_id); 1725 1726 vsw_set_vnet_proto_ops(ldcp); 1727 1728 /* 1729 * Next milestone is passed when attribute 1730 * information has been successfully exchanged. 1731 */ 1732 ldcp->hphase = VSW_MILESTONE1; 1733 vsw_send_attr(ldcp); 1734 1735 } 1736 break; 1737 1738 case VSW_MILESTONE1: 1739 /* 1740 * Only way to pass this milestone is to have successfully 1741 * negotiated attribute information, in both directions. 1742 */ 1743 if (!((lane_in->lstate & VSW_ATTR_ACK_SENT) && 1744 (lane_out->lstate & VSW_ATTR_ACK_RECV))) { 1745 break; 1746 } 1747 1748 ldcp->hphase = VSW_MILESTONE2; 1749 1750 /* 1751 * If the peer device has said it wishes to 1752 * use descriptor rings then we send it our ring 1753 * info, otherwise we just set up a private ring 1754 * which we use an internal buffer 1755 */ 1756 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1757 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 1758 (VSW_VER_LT(ldcp, 1, 2) && 1759 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 1760 vsw_send_dring_info(ldcp); 1761 break; 1762 } 1763 1764 /* 1765 * The peer doesn't operate in dring mode; we 1766 * can simply fallthru to the RDX phase from 1767 * here. 1768 */ 1769 /*FALLTHRU*/ 1770 1771 case VSW_MILESTONE2: 1772 /* 1773 * If peer has indicated in its attribute message that 1774 * it wishes to use descriptor rings then the only way 1775 * to pass this milestone is for us to have received 1776 * valid dring info. 1777 * 1778 * If peer is not using descriptor rings then just fall 1779 * through. 1780 */ 1781 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1782 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 1783 (VSW_VER_LT(ldcp, 1, 2) && 1784 (lane_in->xfer_mode == 1785 VIO_DRING_MODE_V1_0))) { 1786 if (!(lane_in->lstate & VSW_DRING_ACK_SENT)) 1787 break; 1788 } 1789 1790 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1791 __func__, ldcp->ldc_id); 1792 1793 ldcp->hphase = VSW_MILESTONE3; 1794 vsw_send_rdx(ldcp); 1795 break; 1796 1797 case VSW_MILESTONE3: 1798 /* 1799 * Pass this milestone when all paramaters have been 1800 * successfully exchanged and RDX sent in both directions. 1801 * 1802 * Mark the relevant lane as available to transmit data. In 1803 * RxDringData mode, lane_in is associated with transmit and 1804 * lane_out is associated with receive. It is the reverse in 1805 * TxDring mode. 1806 */ 1807 if ((lane_out->lstate & VSW_RDX_ACK_SENT) && 1808 (lane_in->lstate & VSW_RDX_ACK_RECV)) { 1809 1810 D2(vswp, "%s: (chan %lld) leaving milestone 3", 1811 __func__, ldcp->ldc_id); 1812 D2(vswp, "%s: ** handshake complete (0x%llx : " 1813 "0x%llx) **", __func__, lane_in->lstate, 1814 lane_out->lstate); 1815 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 1816 lane_in->lstate |= VSW_LANE_ACTIVE; 1817 } else { 1818 lane_out->lstate |= VSW_LANE_ACTIVE; 1819 } 1820 ldcp->hphase = VSW_MILESTONE4; 1821 ldcp->hcnt = 0; 1822 DISPLAY_STATE(); 1823 /* Start HIO if enabled and capable */ 1824 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 1825 D2(vswp, "%s: start HybridIO setup", __func__); 1826 vsw_hio_start(vswp, ldcp); 1827 } 1828 1829 if (ldcp->pls_negotiated == B_TRUE) { 1830 /* 1831 * The vnet device has negotiated to get phys 1832 * link updates. Now that the handshake with 1833 * the vnet device is complete, send an initial 1834 * update with the current physical link state. 1835 */ 1836 vsw_send_physlink_msg(ldcp, 1837 vswp->phys_link_state); 1838 } 1839 1840 } else { 1841 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 1842 __func__, lane_in->lstate, 1843 lane_out->lstate); 1844 } 1845 break; 1846 1847 case VSW_MILESTONE4: 1848 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 1849 ldcp->ldc_id); 1850 break; 1851 1852 default: 1853 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 1854 ldcp->ldc_id, ldcp->hphase); 1855 } 1856 1857 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 1858 ldcp->hphase); 1859 } 1860 1861 /* 1862 * Check if major version is supported. 1863 * 1864 * Returns 0 if finds supported major number, and if necessary 1865 * adjusts the minor field. 1866 * 1867 * Returns 1 if can't match major number exactly. Sets mjor/minor 1868 * to next lowest support values, or to zero if no other values possible. 1869 */ 1870 static int 1871 vsw_supported_version(vio_ver_msg_t *vp) 1872 { 1873 int i; 1874 1875 D1(NULL, "vsw_supported_version: enter"); 1876 1877 for (i = 0; i < VSW_NUM_VER; i++) { 1878 if (vsw_versions[i].ver_major == vp->ver_major) { 1879 /* 1880 * Matching or lower major version found. Update 1881 * minor number if necessary. 1882 */ 1883 if (vp->ver_minor > vsw_versions[i].ver_minor) { 1884 D2(NULL, "%s: adjusting minor value from %d " 1885 "to %d", __func__, vp->ver_minor, 1886 vsw_versions[i].ver_minor); 1887 vp->ver_minor = vsw_versions[i].ver_minor; 1888 } 1889 1890 return (0); 1891 } 1892 1893 /* 1894 * If the message contains a higher major version number, set 1895 * the message's major/minor versions to the current values 1896 * and return false, so this message will get resent with 1897 * these values. 1898 */ 1899 if (vsw_versions[i].ver_major < vp->ver_major) { 1900 D2(NULL, "%s: adjusting major and minor " 1901 "values to %d, %d\n", 1902 __func__, vsw_versions[i].ver_major, 1903 vsw_versions[i].ver_minor); 1904 vp->ver_major = vsw_versions[i].ver_major; 1905 vp->ver_minor = vsw_versions[i].ver_minor; 1906 return (1); 1907 } 1908 } 1909 1910 /* No match was possible, zero out fields */ 1911 vp->ver_major = 0; 1912 vp->ver_minor = 0; 1913 1914 D1(NULL, "vsw_supported_version: exit"); 1915 1916 return (1); 1917 } 1918 1919 /* 1920 * Set vnet-protocol-version dependent functions based on version. 1921 */ 1922 static void 1923 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 1924 { 1925 vsw_t *vswp = ldcp->ldc_vswp; 1926 lane_t *lp = &ldcp->lane_out; 1927 1928 /* 1929 * Setup the appropriate dring data processing routine and any 1930 * associated thread based on the version. 1931 * 1932 * In versions < 1.6, we support only TxDring mode. In this mode, the 1933 * msg worker thread processes all types of VIO msgs (ctrl and data). 1934 * 1935 * In versions >= 1.6, we also support RxDringData mode. In this mode, 1936 * the rcv worker thread processes dring data messages (msgtype: 1937 * VIO_TYPE_DATA, subtype: VIO_SUBTYPE_INFO, env: VIO_DRING_DATA). The 1938 * rest of the data messages (including acks) and ctrl messages are 1939 * handled directly by the callback (intr) thread. 1940 * 1941 * However, for versions >= 1.6, we could still fallback to TxDring 1942 * mode. This could happen if RxDringData mode has been disabled (see 1943 * below) on this guest or on the peer guest. This info is determined 1944 * as part of attr exchange phase of handshake. Hence, we setup these 1945 * pointers for v1.6 after attr msg phase completes during handshake. 1946 */ 1947 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 1948 /* 1949 * Set data dring mode for vsw_send_attr(). We setup msg worker 1950 * thread in TxDring mode or rcv worker thread in RxDringData 1951 * mode when attr phase of handshake completes. 1952 */ 1953 if (vsw_mapin_avail(ldcp) == B_TRUE) { 1954 lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING); 1955 } else { 1956 lp->dring_mode = VIO_TX_DRING; 1957 } 1958 } else { 1959 lp->dring_mode = VIO_TX_DRING; 1960 } 1961 1962 /* 1963 * Setup the MTU for attribute negotiation based on the version. 1964 */ 1965 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 1966 /* 1967 * If the version negotiated with peer is >= 1.4(Jumbo Frame 1968 * Support), set the mtu in our attributes to max_frame_size. 1969 */ 1970 lp->mtu = vswp->max_frame_size; 1971 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 1972 /* 1973 * If the version negotiated with peer is == 1.3 (Vlan Tag 1974 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 1975 */ 1976 lp->mtu = ETHERMAX + VLAN_TAGSZ; 1977 } else { 1978 vsw_port_t *portp = ldcp->ldc_port; 1979 /* 1980 * Pre-1.3 peers expect max frame size of ETHERMAX. 1981 * We can negotiate that size with those peers provided only 1982 * pvid is defined for our peer and there are no vids. Then we 1983 * can send/recv only untagged frames of max size ETHERMAX. 1984 * Note that pvid of the peer can be different, as vsw has to 1985 * serve the vnet in that vlan even if itself is not assigned 1986 * to that vlan. 1987 */ 1988 if (portp->nvids == 0) { 1989 lp->mtu = ETHERMAX; 1990 } 1991 } 1992 1993 /* 1994 * Setup version dependent data processing functions. 1995 */ 1996 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 1997 /* Versions >= 1.2 */ 1998 1999 if (VSW_PRI_ETH_DEFINED(vswp)) { 2000 /* 2001 * enable priority routines and pkt mode only if 2002 * at least one pri-eth-type is specified in MD. 2003 */ 2004 ldcp->tx = vsw_ldctx_pri; 2005 ldcp->rx_pktdata = vsw_process_pkt_data; 2006 2007 /* set xfer mode for vsw_send_attr() */ 2008 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2009 } else { 2010 /* no priority eth types defined in MD */ 2011 2012 ldcp->tx = vsw_ldctx; 2013 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2014 2015 /* set xfer mode for vsw_send_attr() */ 2016 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2017 } 2018 2019 } else { 2020 /* Versions prior to 1.2 */ 2021 2022 vsw_reset_vnet_proto_ops(ldcp); 2023 } 2024 } 2025 2026 /* 2027 * Reset vnet-protocol-version dependent functions to v1.0. 2028 */ 2029 static void 2030 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2031 { 2032 lane_t *lp = &ldcp->lane_out; 2033 2034 ldcp->tx = vsw_ldctx; 2035 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2036 2037 /* set xfer mode for vsw_send_attr() */ 2038 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2039 } 2040 2041 static void 2042 vsw_process_evt_read(vsw_ldc_t *ldcp) 2043 { 2044 if (ldcp->msg_thread != NULL) { 2045 /* 2046 * TxDring mode; wakeup message worker 2047 * thread to process the VIO messages. 2048 */ 2049 mutex_exit(&ldcp->ldc_cblock); 2050 mutex_enter(&ldcp->msg_thr_lock); 2051 if (!(ldcp->msg_thr_flags & VSW_WTHR_DATARCVD)) { 2052 ldcp->msg_thr_flags |= VSW_WTHR_DATARCVD; 2053 cv_signal(&ldcp->msg_thr_cv); 2054 } 2055 mutex_exit(&ldcp->msg_thr_lock); 2056 mutex_enter(&ldcp->ldc_cblock); 2057 } else { 2058 /* 2059 * We invoke vsw_process_pkt() in the context of the LDC 2060 * callback (vsw_ldc_cb()) during handshake, until the dring 2061 * mode is negotiated. After the dring mode is negotiated, the 2062 * msgs are processed by the msg worker thread (above case) if 2063 * the dring mode is TxDring. Otherwise (in RxDringData mode) 2064 * we continue to process the msgs directly in the callback 2065 * context. 2066 */ 2067 vsw_process_pkt(ldcp); 2068 } 2069 } 2070 2071 /* 2072 * Main routine for processing messages received over LDC. 2073 */ 2074 void 2075 vsw_process_pkt(void *arg) 2076 { 2077 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2078 vsw_t *vswp = ldcp->ldc_vswp; 2079 size_t msglen; 2080 vio_msg_tag_t *tagp; 2081 uint64_t *ldcmsg; 2082 int rv = 0; 2083 2084 2085 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2086 2087 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2088 2089 ldcmsg = ldcp->ldcmsg; 2090 /* 2091 * If channel is up read messages until channel is empty. 2092 */ 2093 do { 2094 msglen = ldcp->msglen; 2095 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2096 2097 if (rv != 0) { 2098 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2099 __func__, ldcp->ldc_id, rv, msglen); 2100 } 2101 2102 /* channel has been reset */ 2103 if (rv == ECONNRESET) { 2104 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2105 break; 2106 } 2107 2108 if (msglen == 0) { 2109 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2110 ldcp->ldc_id); 2111 break; 2112 } 2113 2114 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2115 ldcp->ldc_id, msglen); 2116 2117 /* 2118 * Figure out what sort of packet we have gotten by 2119 * examining the msg tag, and then switch it appropriately. 2120 */ 2121 tagp = (vio_msg_tag_t *)ldcmsg; 2122 2123 switch (tagp->vio_msgtype) { 2124 case VIO_TYPE_CTRL: 2125 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp, msglen); 2126 break; 2127 case VIO_TYPE_DATA: 2128 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2129 break; 2130 case VIO_TYPE_ERR: 2131 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2132 break; 2133 default: 2134 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2135 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2136 break; 2137 } 2138 } while (msglen); 2139 2140 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2141 } 2142 2143 /* 2144 * Dispatch a task to process a VIO control message. 2145 */ 2146 static void 2147 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp, 2148 int msglen) 2149 { 2150 vsw_ctrl_task_t *ctaskp = NULL; 2151 vsw_port_t *port = ldcp->ldc_port; 2152 vsw_t *vswp = port->p_vswp; 2153 2154 D1(vswp, "%s: enter", __func__); 2155 2156 /* 2157 * We need to handle RDX ACK messages in-band as once they 2158 * are exchanged it is possible that we will get an 2159 * immediate (legitimate) data packet. 2160 */ 2161 if ((tagp->vio_subtype_env == VIO_RDX) && 2162 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2163 2164 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2165 return; 2166 2167 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2168 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2169 "(ostate 0x%llx : hphase %d)", __func__, 2170 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2171 vsw_next_milestone(ldcp); 2172 return; 2173 } 2174 2175 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2176 2177 if (ctaskp == NULL) { 2178 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2179 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2180 return; 2181 } 2182 2183 ctaskp->ldcp = ldcp; 2184 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, msglen); 2185 ctaskp->hss_id = ldcp->hss_id; 2186 2187 /* 2188 * Dispatch task to processing taskq if port is not in 2189 * the process of being detached. 2190 */ 2191 mutex_enter(&port->state_lock); 2192 if (port->state == VSW_PORT_INIT) { 2193 if ((vswp->taskq_p == NULL) || 2194 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2195 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2196 mutex_exit(&port->state_lock); 2197 DERR(vswp, "%s: unable to dispatch task to taskq", 2198 __func__); 2199 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2200 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2201 return; 2202 } 2203 } else { 2204 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2205 DWARN(vswp, "%s: port %d detaching, not dispatching " 2206 "task", __func__, port->p_instance); 2207 } 2208 2209 mutex_exit(&port->state_lock); 2210 2211 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2212 ldcp->ldc_id); 2213 D1(vswp, "%s: exit", __func__); 2214 } 2215 2216 /* 2217 * Process a VIO ctrl message. Invoked from taskq. 2218 */ 2219 static void 2220 vsw_process_ctrl_pkt(void *arg) 2221 { 2222 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2223 vsw_ldc_t *ldcp = ctaskp->ldcp; 2224 vsw_t *vswp = ldcp->ldc_vswp; 2225 vio_msg_tag_t tag; 2226 uint16_t env; 2227 2228 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2229 2230 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2231 env = tag.vio_subtype_env; 2232 2233 /* stale pkt check */ 2234 if (ctaskp->hss_id < ldcp->hss_id) { 2235 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2236 " (%ld) handshake session", __func__, ctaskp->hss_id); 2237 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2238 return; 2239 } 2240 2241 /* session id check */ 2242 if (ldcp->session_status & VSW_PEER_SESSION) { 2243 if (ldcp->peer_session != tag.vio_sid) { 2244 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2245 __func__, ldcp->ldc_id, tag.vio_sid); 2246 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2247 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2248 return; 2249 } 2250 } 2251 2252 /* 2253 * Switch on vio_subtype envelope, then let lower routines 2254 * decide if its an INFO, ACK or NACK packet. 2255 */ 2256 switch (env) { 2257 case VIO_VER_INFO: 2258 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2259 break; 2260 case VIO_DRING_REG: 2261 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2262 break; 2263 case VIO_DRING_UNREG: 2264 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2265 break; 2266 case VIO_ATTR_INFO: 2267 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2268 break; 2269 case VNET_MCAST_INFO: 2270 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2271 break; 2272 case VIO_RDX: 2273 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2274 break; 2275 case VIO_DDS_INFO: 2276 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2277 break; 2278 2279 case VNET_PHYSLINK_INFO: 2280 vsw_process_physlink_msg(ldcp, &ctaskp->pktp); 2281 break; 2282 default: 2283 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2284 } 2285 2286 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2287 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2288 } 2289 2290 /* 2291 * Version negotiation. We can end up here either because our peer 2292 * has responded to a handshake message we have sent it, or our peer 2293 * has initiated a handshake with us. If its the former then can only 2294 * be ACK or NACK, if its the later can only be INFO. 2295 * 2296 * If its an ACK we move to the next stage of the handshake, namely 2297 * attribute exchange. If its a NACK we see if we can specify another 2298 * version, if we can't we stop. 2299 * 2300 * If it is an INFO we reset all params associated with communication 2301 * in that direction over this channel (remember connection is 2302 * essentially 2 independent simplex channels). 2303 */ 2304 void 2305 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2306 { 2307 vio_ver_msg_t *ver_pkt; 2308 vsw_t *vswp = ldcp->ldc_vswp; 2309 2310 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2311 2312 /* 2313 * We know this is a ctrl/version packet so 2314 * cast it into the correct structure. 2315 */ 2316 ver_pkt = (vio_ver_msg_t *)pkt; 2317 2318 switch (ver_pkt->tag.vio_subtype) { 2319 case VIO_SUBTYPE_INFO: 2320 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2321 2322 /* 2323 * Record the session id, which we will use from now 2324 * until we see another VER_INFO msg. Even then the 2325 * session id in most cases will be unchanged, execpt 2326 * if channel was reset. 2327 */ 2328 if ((ldcp->session_status & VSW_PEER_SESSION) && 2329 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2330 DERR(vswp, "%s: updating session id for chan %lld " 2331 "from %llx to %llx", __func__, ldcp->ldc_id, 2332 ldcp->peer_session, ver_pkt->tag.vio_sid); 2333 } 2334 2335 ldcp->peer_session = ver_pkt->tag.vio_sid; 2336 ldcp->session_status |= VSW_PEER_SESSION; 2337 2338 /* Legal message at this time ? */ 2339 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2340 return; 2341 2342 /* 2343 * First check the device class. Currently only expect 2344 * to be talking to a network device. In the future may 2345 * also talk to another switch. 2346 */ 2347 if (ver_pkt->dev_class != VDEV_NETWORK) { 2348 DERR(vswp, "%s: illegal device class %d", __func__, 2349 ver_pkt->dev_class); 2350 2351 ver_pkt->tag.vio_sid = ldcp->local_session; 2352 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2353 2354 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2355 2356 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2357 sizeof (vio_ver_msg_t), B_TRUE); 2358 2359 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2360 vsw_next_milestone(ldcp); 2361 return; 2362 } else { 2363 ldcp->dev_class = ver_pkt->dev_class; 2364 } 2365 2366 /* 2367 * Now check the version. 2368 */ 2369 if (vsw_supported_version(ver_pkt) == 0) { 2370 /* 2371 * Support this major version and possibly 2372 * adjusted minor version. 2373 */ 2374 2375 D2(vswp, "%s: accepted ver %d:%d", __func__, 2376 ver_pkt->ver_major, ver_pkt->ver_minor); 2377 2378 /* Store accepted values */ 2379 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2380 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2381 2382 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2383 2384 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2385 2386 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2387 /* 2388 * Send a version info message 2389 * using the accepted version that 2390 * we are about to ack. Also note that 2391 * we send our ver info before we ack. 2392 * Otherwise, as soon as receiving the 2393 * ack, obp sends attr info msg, which 2394 * breaks vsw_check_flag() invoked 2395 * from vsw_process_ctrl_attr_pkt(); 2396 * as we also need VSW_VER_ACK_RECV to 2397 * be set in lane_out.lstate, before 2398 * we can receive attr info. 2399 */ 2400 vsw_send_ver(ldcp); 2401 } 2402 } else { 2403 /* 2404 * NACK back with the next lower major/minor 2405 * pairing we support (if don't suuport any more 2406 * versions then they will be set to zero. 2407 */ 2408 2409 D2(vswp, "%s: replying with ver %d:%d", __func__, 2410 ver_pkt->ver_major, ver_pkt->ver_minor); 2411 2412 /* Store updated values */ 2413 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2414 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2415 2416 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2417 2418 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2419 } 2420 2421 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2422 ver_pkt->tag.vio_sid = ldcp->local_session; 2423 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2424 sizeof (vio_ver_msg_t), B_TRUE); 2425 2426 vsw_next_milestone(ldcp); 2427 break; 2428 2429 case VIO_SUBTYPE_ACK: 2430 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2431 2432 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2433 return; 2434 2435 /* Store updated values */ 2436 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2437 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2438 2439 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2440 vsw_next_milestone(ldcp); 2441 2442 break; 2443 2444 case VIO_SUBTYPE_NACK: 2445 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2446 2447 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2448 return; 2449 2450 /* 2451 * If our peer sent us a NACK with the ver fields set to 2452 * zero then there is nothing more we can do. Otherwise see 2453 * if we support either the version suggested, or a lesser 2454 * one. 2455 */ 2456 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2457 DERR(vswp, "%s: peer unable to negotiate any " 2458 "further.", __func__); 2459 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2460 vsw_next_milestone(ldcp); 2461 return; 2462 } 2463 2464 /* 2465 * Check to see if we support this major version or 2466 * a lower one. If we don't then maj/min will be set 2467 * to zero. 2468 */ 2469 (void) vsw_supported_version(ver_pkt); 2470 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2471 /* Nothing more we can do */ 2472 DERR(vswp, "%s: version negotiation failed.\n", 2473 __func__); 2474 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2475 vsw_next_milestone(ldcp); 2476 } else { 2477 /* found a supported major version */ 2478 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2479 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2480 2481 D2(vswp, "%s: resending with updated values (%x, %x)", 2482 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2483 2484 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2485 ver_pkt->tag.vio_sid = ldcp->local_session; 2486 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2487 2488 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2489 2490 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2491 sizeof (vio_ver_msg_t), B_TRUE); 2492 2493 vsw_next_milestone(ldcp); 2494 2495 } 2496 break; 2497 2498 default: 2499 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2500 ver_pkt->tag.vio_subtype); 2501 } 2502 2503 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2504 } 2505 2506 static int 2507 vsw_process_attr_info(vsw_ldc_t *ldcp, vnet_attr_msg_t *msg) 2508 { 2509 vsw_t *vswp = ldcp->ldc_vswp; 2510 vsw_port_t *port = ldcp->ldc_port; 2511 struct ether_addr ea; 2512 uint64_t macaddr = 0; 2513 lane_t *lane_out = &ldcp->lane_out; 2514 lane_t *lane_in = &ldcp->lane_in; 2515 uint32_t mtu; 2516 int i; 2517 uint8_t dring_mode; 2518 2519 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2520 2521 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) { 2522 return (1); 2523 } 2524 2525 if ((msg->xfer_mode != VIO_DESC_MODE) && 2526 (msg->xfer_mode != lane_out->xfer_mode)) { 2527 D2(NULL, "%s: unknown mode %x\n", __func__, msg->xfer_mode); 2528 return (1); 2529 } 2530 2531 /* Only support MAC addresses at moment. */ 2532 if ((msg->addr_type != ADDR_TYPE_MAC) || (msg->addr == 0)) { 2533 D2(NULL, "%s: invalid addr_type %x, or address 0x%llx\n", 2534 __func__, msg->addr_type, msg->addr); 2535 return (1); 2536 } 2537 2538 /* 2539 * MAC address supplied by device should match that stored 2540 * in the vsw-port OBP node. Need to decide what to do if they 2541 * don't match, for the moment just warn but don't fail. 2542 */ 2543 vnet_macaddr_ultostr(msg->addr, ea.ether_addr_octet); 2544 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 2545 DERR(NULL, "%s: device supplied address " 2546 "0x%llx doesn't match node address 0x%llx\n", 2547 __func__, msg->addr, port->p_macaddr); 2548 } 2549 2550 /* 2551 * Ack freq only makes sense in pkt mode, in shared 2552 * mode the ring descriptors say whether or not to 2553 * send back an ACK. 2554 */ 2555 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2556 (msg->xfer_mode & VIO_DRING_MODE_V1_2)) || 2557 (VSW_VER_LT(ldcp, 1, 2) && 2558 (msg->xfer_mode == VIO_DRING_MODE_V1_0))) { 2559 if (msg->ack_freq > 0) { 2560 D2(NULL, "%s: non zero ack freq in SHM mode\n", 2561 __func__); 2562 return (1); 2563 } 2564 } 2565 2566 /* 2567 * Process dring mode attribute. 2568 */ 2569 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 2570 /* 2571 * Versions >= 1.6: 2572 * Though we are operating in v1.6 mode, it is possible that 2573 * RxDringData mode has been disabled either on this guest or 2574 * on the peer guest. If so, we revert to pre v1.6 behavior of 2575 * TxDring mode. But this must be agreed upon in both 2576 * directions of attr exchange. We first determine the mode 2577 * that can be negotiated. 2578 */ 2579 if ((msg->options & VIO_RX_DRING_DATA) != 0 && 2580 vsw_mapin_avail(ldcp) == B_TRUE) { 2581 /* 2582 * The peer is capable of handling RxDringData AND we 2583 * are also capable of it; we enable RxDringData mode 2584 * on this channel. 2585 */ 2586 dring_mode = VIO_RX_DRING_DATA; 2587 } else if ((msg->options & VIO_TX_DRING) != 0) { 2588 /* 2589 * If the peer is capable of TxDring mode, we 2590 * negotiate TxDring mode on this channel. 2591 */ 2592 dring_mode = VIO_TX_DRING; 2593 } else { 2594 /* 2595 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA 2596 * modes. We don't support VIO_RX_DRING mode. 2597 */ 2598 return (1); 2599 } 2600 2601 /* 2602 * If we have received an ack for the attr info that we sent, 2603 * then check if the dring mode matches what the peer had ack'd 2604 * (saved in lane_out). If they don't match, we fail the 2605 * handshake. 2606 */ 2607 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2608 if (msg->options != lane_out->dring_mode) { 2609 /* send NACK */ 2610 return (1); 2611 } 2612 } else { 2613 /* 2614 * Save the negotiated dring mode in our attr 2615 * parameters, so it gets sent in the attr info from us 2616 * to the peer. 2617 */ 2618 lane_out->dring_mode = dring_mode; 2619 } 2620 2621 /* save the negotiated dring mode in the msg to be replied */ 2622 msg->options = dring_mode; 2623 } 2624 2625 /* 2626 * Process MTU attribute. 2627 */ 2628 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2629 /* 2630 * Versions >= 1.4: 2631 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu 2632 * is negotiated down to the minimum of our mtu and peer's mtu. 2633 */ 2634 if (msg->mtu < ETHERMAX) { 2635 return (1); 2636 } 2637 2638 mtu = MIN(msg->mtu, vswp->max_frame_size); 2639 2640 /* 2641 * If we have received an ack for the attr info 2642 * that we sent, then check if the mtu computed 2643 * above matches the mtu that the peer had ack'd 2644 * (saved in local hparams). If they don't 2645 * match, we fail the handshake. 2646 */ 2647 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2648 if (mtu != lane_out->mtu) { 2649 /* send NACK */ 2650 return (1); 2651 } 2652 } else { 2653 /* 2654 * Save the mtu computed above in our 2655 * attr parameters, so it gets sent in 2656 * the attr info from us to the peer. 2657 */ 2658 lane_out->mtu = mtu; 2659 } 2660 2661 /* save the MIN mtu in the msg to be replied */ 2662 msg->mtu = mtu; 2663 } else { 2664 /* Versions < 1.4, mtu must match */ 2665 if (msg->mtu != lane_out->mtu) { 2666 D2(NULL, "%s: invalid MTU (0x%llx)\n", 2667 __func__, msg->mtu); 2668 return (1); 2669 } 2670 } 2671 2672 /* 2673 * Otherwise store attributes for this lane and update 2674 * lane state. 2675 */ 2676 lane_in->mtu = msg->mtu; 2677 lane_in->addr = msg->addr; 2678 lane_in->addr_type = msg->addr_type; 2679 lane_in->xfer_mode = msg->xfer_mode; 2680 lane_in->ack_freq = msg->ack_freq; 2681 lane_in->physlink_update = msg->physlink_update; 2682 lane_in->dring_mode = msg->options; 2683 2684 /* 2685 * Check if the client has requested physlink state updates. 2686 * If there is a physical device bound to this vswitch (L2 2687 * mode), set the ack bits to indicate it is supported. 2688 * Otherwise, set the nack bits. 2689 */ 2690 if (VSW_VER_GTEQ(ldcp, 1, 5)) { /* Protocol ver >= 1.5 */ 2691 2692 /* Does the vnet need phys link state updates ? */ 2693 if ((lane_in->physlink_update & 2694 PHYSLINK_UPDATE_STATE_MASK) == 2695 PHYSLINK_UPDATE_STATE) { 2696 2697 if (vswp->smode & VSW_LAYER2) { 2698 /* is a net-dev assigned to us ? */ 2699 msg->physlink_update = 2700 PHYSLINK_UPDATE_STATE_ACK; 2701 ldcp->pls_negotiated = B_TRUE; 2702 } else { 2703 /* not in L2 mode */ 2704 msg->physlink_update = 2705 PHYSLINK_UPDATE_STATE_NACK; 2706 ldcp->pls_negotiated = B_FALSE; 2707 } 2708 2709 } else { 2710 msg->physlink_update = 2711 PHYSLINK_UPDATE_NONE; 2712 ldcp->pls_negotiated = B_FALSE; 2713 } 2714 2715 } else { 2716 /* 2717 * physlink_update bits are ignored 2718 * if set by clients < v1.5 protocol. 2719 */ 2720 msg->physlink_update = PHYSLINK_UPDATE_NONE; 2721 ldcp->pls_negotiated = B_FALSE; 2722 } 2723 2724 macaddr = lane_in->addr; 2725 for (i = ETHERADDRL - 1; i >= 0; i--) { 2726 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2727 macaddr >>= 8; 2728 } 2729 2730 /* 2731 * Setup device specific xmit routines. Note this could be changed 2732 * further in vsw_send_dring_info() for versions >= 1.6 if operating in 2733 * RxDringData mode. 2734 */ 2735 mutex_enter(&port->tx_lock); 2736 2737 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2738 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2739 (VSW_VER_LT(ldcp, 1, 2) && 2740 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2741 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2742 port->transmit = vsw_dringsend; 2743 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2744 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2745 vsw_create_privring(ldcp); 2746 port->transmit = vsw_descrsend; 2747 lane_out->xfer_mode = VIO_DESC_MODE; 2748 } 2749 2750 /* 2751 * HybridIO is supported only vnet, not by OBP. 2752 * So, set hio_capable to true only when in DRING mode. 2753 */ 2754 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2755 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2756 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2757 } else { 2758 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2759 } 2760 2761 mutex_exit(&port->tx_lock); 2762 2763 return (0); 2764 } 2765 2766 static int 2767 vsw_process_attr_ack(vsw_ldc_t *ldcp, vnet_attr_msg_t *msg) 2768 { 2769 vsw_t *vswp = ldcp->ldc_vswp; 2770 lane_t *lane_out = &ldcp->lane_out; 2771 lane_t *lane_in = &ldcp->lane_in; 2772 2773 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2774 2775 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) { 2776 return (1); 2777 } 2778 2779 /* 2780 * Process dring mode attribute. 2781 */ 2782 if (VSW_VER_GTEQ(ldcp, 1, 6)) { 2783 /* 2784 * Versions >= 1.6: 2785 * The ack msg sent by the peer contains the negotiated dring 2786 * mode between our capability (that we had sent in our attr 2787 * info) and the peer's capability. 2788 */ 2789 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2790 /* 2791 * If we have sent an ack for the attr info msg from 2792 * the peer, check if the dring mode that was 2793 * negotiated then (saved in lane_out) matches the 2794 * mode that the peer has ack'd. If they don't match, 2795 * we fail the handshake. 2796 */ 2797 if (lane_out->dring_mode != msg->options) { 2798 return (1); 2799 } 2800 } else { 2801 if ((msg->options & lane_out->dring_mode) == 0) { 2802 /* 2803 * Peer ack'd with a mode that we don't 2804 * support; we fail the handshake. 2805 */ 2806 return (1); 2807 } 2808 if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA)) 2809 == (VIO_TX_DRING|VIO_RX_DRING_DATA)) { 2810 /* 2811 * Peer must ack with only one negotiated mode. 2812 * Otherwise fail handshake. 2813 */ 2814 return (1); 2815 } 2816 2817 /* 2818 * Save the negotiated mode, so we can validate it when 2819 * we receive attr info from the peer. 2820 */ 2821 lane_out->dring_mode = msg->options; 2822 } 2823 } 2824 2825 /* 2826 * Process MTU attribute. 2827 */ 2828 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2829 /* 2830 * Versions >= 1.4: 2831 * The ack msg sent by the peer contains the minimum of 2832 * our mtu (that we had sent in our attr info) and the 2833 * peer's mtu. 2834 * 2835 * If we have sent an ack for the attr info msg from 2836 * the peer, check if the mtu that was computed then 2837 * (saved in lane_out params) matches the mtu that the 2838 * peer has ack'd. If they don't match, we fail the 2839 * handshake. 2840 */ 2841 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2842 if (lane_out->mtu != msg->mtu) { 2843 return (1); 2844 } 2845 } else { 2846 /* 2847 * If the mtu ack'd by the peer is > our mtu 2848 * fail handshake. Otherwise, save the mtu, so 2849 * we can validate it when we receive attr info 2850 * from our peer. 2851 */ 2852 if (msg->mtu <= lane_out->mtu) { 2853 lane_out->mtu = msg->mtu; 2854 } else { 2855 return (1); 2856 } 2857 } 2858 } 2859 2860 return (0); 2861 } 2862 2863 /* 2864 * Process an attribute packet. We can end up here either because our peer 2865 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2866 * peer has sent us an attribute INFO message 2867 * 2868 * If its an ACK we then move to the next stage of the handshake which 2869 * is to send our descriptor ring info to our peer. If its a NACK then 2870 * there is nothing more we can (currently) do. 2871 * 2872 * If we get a valid/acceptable INFO packet (and we have already negotiated 2873 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2874 * NACK back and reset channel state to INACTIV. 2875 * 2876 * FUTURE: in time we will probably negotiate over attributes, but for 2877 * the moment unacceptable attributes are regarded as a fatal error. 2878 * 2879 */ 2880 void 2881 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2882 { 2883 vnet_attr_msg_t *attr_pkt; 2884 vsw_t *vswp = ldcp->ldc_vswp; 2885 lane_t *lane_out = &ldcp->lane_out; 2886 lane_t *lane_in = &ldcp->lane_in; 2887 int rv; 2888 2889 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2890 2891 /* 2892 * We know this is a ctrl/attr packet so 2893 * cast it into the correct structure. 2894 */ 2895 attr_pkt = (vnet_attr_msg_t *)pkt; 2896 2897 switch (attr_pkt->tag.vio_subtype) { 2898 case VIO_SUBTYPE_INFO: 2899 2900 rv = vsw_process_attr_info(ldcp, attr_pkt); 2901 if (rv != 0) { 2902 vsw_free_lane_resources(ldcp, INBOUND); 2903 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2904 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2905 } else { 2906 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2907 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2908 } 2909 attr_pkt->tag.vio_sid = ldcp->local_session; 2910 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2911 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2912 sizeof (vnet_attr_msg_t), B_TRUE); 2913 vsw_next_milestone(ldcp); 2914 break; 2915 2916 case VIO_SUBTYPE_ACK: 2917 2918 rv = vsw_process_attr_ack(ldcp, attr_pkt); 2919 if (rv != 0) { 2920 return; 2921 } 2922 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2923 vsw_next_milestone(ldcp); 2924 break; 2925 2926 case VIO_SUBTYPE_NACK: 2927 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2928 2929 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2930 return; 2931 2932 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2933 vsw_next_milestone(ldcp); 2934 break; 2935 2936 default: 2937 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2938 attr_pkt->tag.vio_subtype); 2939 } 2940 2941 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2942 } 2943 2944 static int 2945 vsw_process_dring_reg_info(vsw_ldc_t *ldcp, vio_msg_tag_t *tagp) 2946 { 2947 int rv; 2948 vsw_t *vswp = ldcp->ldc_vswp; 2949 lane_t *lp = &ldcp->lane_out; 2950 dring_info_t *dp = NULL; 2951 2952 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2953 2954 rv = vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV); 2955 if (rv != 0) { 2956 return (1); 2957 } 2958 2959 if (VSW_VER_GTEQ(ldcp, 1, 6) && 2960 (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) { 2961 /* 2962 * The earlier version of Solaris vnet driver doesn't set the 2963 * option (VIO_TX_DRING in its case) correctly in its dring reg 2964 * message. We workaround that here by doing the check only 2965 * for versions >= v1.6. 2966 */ 2967 DWARN(vswp, "%s(%lld): Rcvd dring reg option (%d), " 2968 "negotiated mode (%d)\n", __func__, ldcp->ldc_id, 2969 ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode); 2970 return (1); 2971 } 2972 2973 /* 2974 * Map dring exported by the peer. 2975 */ 2976 dp = vsw_map_dring(ldcp, (void *)tagp); 2977 if (dp == NULL) { 2978 return (1); 2979 } 2980 2981 /* 2982 * Map data buffers exported by the peer if we are in RxDringData mode. 2983 */ 2984 if (lp->dring_mode == VIO_RX_DRING_DATA) { 2985 rv = vsw_map_data(ldcp, dp, (void *)tagp); 2986 if (rv != 0) { 2987 vsw_unmap_dring(ldcp); 2988 return (1); 2989 } 2990 } 2991 2992 return (0); 2993 } 2994 2995 static int 2996 vsw_process_dring_reg_ack(vsw_ldc_t *ldcp, vio_msg_tag_t *tagp) 2997 { 2998 vsw_t *vswp = ldcp->ldc_vswp; 2999 dring_info_t *dp; 3000 3001 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3002 3003 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) { 3004 return (1); 3005 } 3006 3007 dp = ldcp->lane_out.dringp; 3008 3009 /* save dring_ident acked by peer */ 3010 dp->ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident; 3011 3012 return (0); 3013 } 3014 3015 /* 3016 * Process a dring info packet. We can end up here either because our peer 3017 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 3018 * peer has sent us a dring INFO message. 3019 * 3020 * If we get a valid/acceptable INFO packet (and we have already negotiated 3021 * a version) we ACK back and update the lane state, otherwise we NACK back. 3022 * 3023 * FUTURE: nothing to stop client from sending us info on multiple dring's 3024 * but for the moment we will just use the first one we are given. 3025 * 3026 */ 3027 void 3028 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3029 { 3030 int rv; 3031 int msgsize; 3032 dring_info_t *dp; 3033 vio_msg_tag_t *tagp = (vio_msg_tag_t *)pkt; 3034 vsw_t *vswp = ldcp->ldc_vswp; 3035 lane_t *lane_out = &ldcp->lane_out; 3036 lane_t *lane_in = &ldcp->lane_in; 3037 3038 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3039 3040 switch (tagp->vio_subtype) { 3041 case VIO_SUBTYPE_INFO: 3042 rv = vsw_process_dring_reg_info(ldcp, tagp); 3043 if (rv != 0) { 3044 vsw_free_lane_resources(ldcp, INBOUND); 3045 tagp->vio_subtype = VIO_SUBTYPE_NACK; 3046 lane_in->lstate |= VSW_DRING_NACK_SENT; 3047 } else { 3048 tagp->vio_subtype = VIO_SUBTYPE_ACK; 3049 lane_in->lstate |= VSW_DRING_ACK_SENT; 3050 } 3051 tagp->vio_sid = ldcp->local_session; 3052 DUMP_TAG_PTR(tagp); 3053 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 3054 dp = lane_in->dringp; 3055 msgsize = 3056 VNET_DRING_REG_EXT_MSG_SIZE(dp->data_ncookies); 3057 } else { 3058 msgsize = sizeof (vio_dring_reg_msg_t); 3059 } 3060 (void) vsw_send_msg(ldcp, (void *)tagp, msgsize, B_TRUE); 3061 vsw_next_milestone(ldcp); 3062 break; 3063 3064 case VIO_SUBTYPE_ACK: 3065 rv = vsw_process_dring_reg_ack(ldcp, tagp); 3066 if (rv != 0) { 3067 return; 3068 } 3069 lane_out->lstate |= VSW_DRING_ACK_RECV; 3070 vsw_next_milestone(ldcp); 3071 break; 3072 3073 case VIO_SUBTYPE_NACK: 3074 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3075 3076 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3077 return; 3078 3079 lane_out->lstate |= VSW_DRING_NACK_RECV; 3080 vsw_next_milestone(ldcp); 3081 break; 3082 3083 default: 3084 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3085 tagp->vio_subtype); 3086 } 3087 3088 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3089 } 3090 3091 /* 3092 * Process a request from peer to unregister a dring. 3093 * 3094 * For the moment we just restart the handshake if our 3095 * peer endpoint attempts to unregister a dring. 3096 */ 3097 void 3098 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3099 { 3100 vsw_t *vswp = ldcp->ldc_vswp; 3101 vio_dring_unreg_msg_t *dring_pkt; 3102 3103 /* 3104 * We know this is a ctrl/dring packet so 3105 * cast it into the correct structure. 3106 */ 3107 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3108 3109 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3110 3111 switch (dring_pkt->tag.vio_subtype) { 3112 case VIO_SUBTYPE_INFO: 3113 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3114 3115 DWARN(vswp, "%s: restarting handshake..", __func__); 3116 break; 3117 3118 case VIO_SUBTYPE_ACK: 3119 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3120 3121 DWARN(vswp, "%s: restarting handshake..", __func__); 3122 break; 3123 3124 case VIO_SUBTYPE_NACK: 3125 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3126 3127 DWARN(vswp, "%s: restarting handshake..", __func__); 3128 break; 3129 3130 default: 3131 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3132 dring_pkt->tag.vio_subtype); 3133 } 3134 3135 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3136 3137 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3138 } 3139 3140 #define SND_MCST_NACK(ldcp, pkt) \ 3141 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3142 pkt->tag.vio_sid = ldcp->local_session; \ 3143 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3144 sizeof (vnet_mcast_msg_t), B_TRUE); 3145 3146 /* 3147 * Process a multicast request from a vnet. 3148 * 3149 * Vnet's specify a multicast address that they are interested in. This 3150 * address is used as a key into the hash table which forms the multicast 3151 * forwarding database (mFDB). 3152 * 3153 * The table keys are the multicast addresses, while the table entries 3154 * are pointers to lists of ports which wish to receive packets for the 3155 * specified multicast address. 3156 * 3157 * When a multicast packet is being switched we use the address as a key 3158 * into the hash table, and then walk the appropriate port list forwarding 3159 * the pkt to each port in turn. 3160 * 3161 * If a vnet is no longer interested in a particular multicast grouping 3162 * we simply find the correct location in the hash table and then delete 3163 * the relevant port from the port list. 3164 * 3165 * To deal with the case whereby a port is being deleted without first 3166 * removing itself from the lists in the hash table, we maintain a list 3167 * of multicast addresses the port has registered an interest in, within 3168 * the port structure itself. We then simply walk that list of addresses 3169 * using them as keys into the hash table and remove the port from the 3170 * appropriate lists. 3171 */ 3172 static void 3173 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3174 { 3175 vnet_mcast_msg_t *mcst_pkt; 3176 vsw_port_t *port = ldcp->ldc_port; 3177 vsw_t *vswp = ldcp->ldc_vswp; 3178 int i; 3179 3180 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3181 3182 /* 3183 * We know this is a ctrl/mcast packet so 3184 * cast it into the correct structure. 3185 */ 3186 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3187 3188 switch (mcst_pkt->tag.vio_subtype) { 3189 case VIO_SUBTYPE_INFO: 3190 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3191 3192 /* 3193 * Check if in correct state to receive a multicast 3194 * message (i.e. handshake complete). If not reset 3195 * the handshake. 3196 */ 3197 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3198 return; 3199 3200 /* 3201 * Before attempting to add or remove address check 3202 * that they are valid multicast addresses. 3203 * If not, then NACK back. 3204 */ 3205 for (i = 0; i < mcst_pkt->count; i++) { 3206 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3207 DERR(vswp, "%s: invalid multicast address", 3208 __func__); 3209 SND_MCST_NACK(ldcp, mcst_pkt); 3210 return; 3211 } 3212 } 3213 3214 /* 3215 * Now add/remove the addresses. If this fails we 3216 * NACK back. 3217 */ 3218 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3219 SND_MCST_NACK(ldcp, mcst_pkt); 3220 return; 3221 } 3222 3223 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3224 mcst_pkt->tag.vio_sid = ldcp->local_session; 3225 3226 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3227 3228 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3229 sizeof (vnet_mcast_msg_t), B_TRUE); 3230 break; 3231 3232 case VIO_SUBTYPE_ACK: 3233 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3234 3235 /* 3236 * We shouldn't ever get a multicast ACK message as 3237 * at the moment we never request multicast addresses 3238 * to be set on some other device. This may change in 3239 * the future if we have cascading switches. 3240 */ 3241 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3242 return; 3243 3244 /* Do nothing */ 3245 break; 3246 3247 case VIO_SUBTYPE_NACK: 3248 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3249 3250 /* 3251 * We shouldn't get a multicast NACK packet for the 3252 * same reasons as we shouldn't get a ACK packet. 3253 */ 3254 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3255 return; 3256 3257 /* Do nothing */ 3258 break; 3259 3260 default: 3261 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3262 mcst_pkt->tag.vio_subtype); 3263 } 3264 3265 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3266 } 3267 3268 static void 3269 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3270 { 3271 vio_rdx_msg_t *rdx_pkt; 3272 vsw_t *vswp = ldcp->ldc_vswp; 3273 3274 /* 3275 * We know this is a ctrl/rdx packet so 3276 * cast it into the correct structure. 3277 */ 3278 rdx_pkt = (vio_rdx_msg_t *)pkt; 3279 3280 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3281 3282 switch (rdx_pkt->tag.vio_subtype) { 3283 case VIO_SUBTYPE_INFO: 3284 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3285 3286 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3287 return; 3288 3289 rdx_pkt->tag.vio_sid = ldcp->local_session; 3290 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3291 3292 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3293 3294 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3295 3296 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3297 sizeof (vio_rdx_msg_t), B_TRUE); 3298 3299 vsw_next_milestone(ldcp); 3300 break; 3301 3302 case VIO_SUBTYPE_ACK: 3303 /* 3304 * Should be handled in-band by callback handler. 3305 */ 3306 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3307 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3308 break; 3309 3310 case VIO_SUBTYPE_NACK: 3311 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3312 3313 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3314 return; 3315 3316 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3317 vsw_next_milestone(ldcp); 3318 break; 3319 3320 default: 3321 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3322 rdx_pkt->tag.vio_subtype); 3323 } 3324 3325 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3326 } 3327 3328 static void 3329 vsw_process_physlink_msg(vsw_ldc_t *ldcp, void *pkt) 3330 { 3331 vnet_physlink_msg_t *msgp; 3332 vsw_t *vswp = ldcp->ldc_vswp; 3333 3334 msgp = (vnet_physlink_msg_t *)pkt; 3335 3336 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3337 3338 switch (msgp->tag.vio_subtype) { 3339 case VIO_SUBTYPE_INFO: 3340 3341 /* vsw shouldn't recv physlink info */ 3342 DWARN(vswp, "%s: Unexpected VIO_SUBTYPE_INFO", __func__); 3343 break; 3344 3345 case VIO_SUBTYPE_ACK: 3346 3347 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3348 break; 3349 3350 case VIO_SUBTYPE_NACK: 3351 3352 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3353 break; 3354 3355 default: 3356 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3357 msgp->tag.vio_subtype); 3358 } 3359 3360 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3361 } 3362 3363 static void 3364 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3365 uint32_t msglen) 3366 { 3367 uint16_t env = tagp->vio_subtype_env; 3368 vsw_t *vswp = ldcp->ldc_vswp; 3369 lane_t *lp = &ldcp->lane_out; 3370 uint8_t dring_mode = lp->dring_mode; 3371 3372 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3373 3374 /* session id check */ 3375 if (ldcp->session_status & VSW_PEER_SESSION) { 3376 if (ldcp->peer_session != tagp->vio_sid) { 3377 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3378 __func__, ldcp->ldc_id, tagp->vio_sid); 3379 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3380 return; 3381 } 3382 } 3383 3384 /* 3385 * It is an error for us to be getting data packets 3386 * before the handshake has completed. 3387 */ 3388 if (ldcp->hphase != VSW_MILESTONE4) { 3389 DERR(vswp, "%s: got data packet before handshake complete " 3390 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3391 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3392 DUMP_FLAGS(ldcp->lane_in.lstate); 3393 DUMP_FLAGS(ldcp->lane_out.lstate); 3394 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3395 return; 3396 } 3397 if (dring_mode == VIO_TX_DRING) { 3398 /* 3399 * To reduce the locking contention, release the ldc_cblock 3400 * here and re-acquire it once we are done receiving packets. 3401 * We do this only in TxDring mode to allow further callbaks to 3402 * continue while the msg worker thread processes the messages. 3403 * In RxDringData mode, we process the messages in the callback 3404 * itself and wake up rcv worker thread to process only data 3405 * info messages. 3406 */ 3407 mutex_exit(&ldcp->ldc_cblock); 3408 mutex_enter(&ldcp->ldc_rxlock); 3409 } 3410 3411 /* 3412 * Switch on vio_subtype envelope, then let lower routines 3413 * decide if its an INFO, ACK or NACK packet. 3414 */ 3415 if (env == VIO_DRING_DATA) { 3416 ldcp->rx_dringdata(ldcp, dpkt); 3417 } else if (env == VIO_PKT_DATA) { 3418 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3419 } else if (env == VIO_DESC_DATA) { 3420 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3421 } else { 3422 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", 3423 __func__, env); 3424 } 3425 3426 if (dring_mode == VIO_TX_DRING) { 3427 mutex_exit(&ldcp->ldc_rxlock); 3428 mutex_enter(&ldcp->ldc_cblock); 3429 } 3430 3431 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3432 } 3433 3434 /* 3435 * dummy pkt data handler function for vnet protocol version 1.0 3436 */ 3437 static void 3438 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3439 { 3440 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3441 } 3442 3443 /* 3444 * This function handles raw pkt data messages received over the channel. 3445 * Currently, only priority-eth-type frames are received through this mechanism. 3446 * In this case, the frame(data) is present within the message itself which 3447 * is copied into an mblk before switching it. 3448 */ 3449 static void 3450 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3451 { 3452 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3453 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3454 uint32_t size; 3455 mblk_t *mp; 3456 vio_mblk_t *vmp; 3457 vsw_t *vswp = ldcp->ldc_vswp; 3458 vgen_stats_t *statsp = &ldcp->ldc_stats; 3459 lane_t *lp = &ldcp->lane_out; 3460 3461 size = msglen - VIO_PKT_DATA_HDRSIZE; 3462 if (size < ETHERMIN || size > lp->mtu) { 3463 (void) atomic_inc_32(&statsp->rx_pri_fail); 3464 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3465 ldcp->ldc_id, size); 3466 return; 3467 } 3468 3469 vmp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3470 if (vmp == NULL) { 3471 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3472 if (mp == NULL) { 3473 (void) atomic_inc_32(&statsp->rx_pri_fail); 3474 DWARN(vswp, "%s(%lld) allocb failure, " 3475 "unable to process priority frame\n", __func__, 3476 ldcp->ldc_id); 3477 return; 3478 } 3479 } else { 3480 mp = vmp->mp; 3481 } 3482 3483 /* skip over the extra space for vlan tag */ 3484 mp->b_rptr += VLAN_TAGSZ; 3485 3486 /* copy the frame from the payload of raw data msg into the mblk */ 3487 bcopy(dpkt->data, mp->b_rptr, size); 3488 mp->b_wptr = mp->b_rptr + size; 3489 3490 if (vmp != NULL) { 3491 vmp->state = VIO_MBLK_HAS_DATA; 3492 } 3493 3494 /* update stats */ 3495 (void) atomic_inc_64(&statsp->rx_pri_packets); 3496 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3497 3498 /* 3499 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3500 */ 3501 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3502 3503 /* switch the frame to destination */ 3504 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3505 } 3506 3507 /* 3508 * Process an in-band descriptor message (most likely from 3509 * OBP). 3510 */ 3511 static void 3512 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3513 { 3514 vnet_ibnd_desc_t *ibnd_desc; 3515 dring_info_t *dp = NULL; 3516 vsw_private_desc_t *priv_addr = NULL; 3517 vsw_t *vswp = ldcp->ldc_vswp; 3518 mblk_t *mp = NULL; 3519 size_t nbytes = 0; 3520 size_t off = 0; 3521 uint64_t idx = 0; 3522 uint32_t num = 1, len, datalen = 0; 3523 uint64_t ncookies = 0; 3524 int i, rv; 3525 int j = 0; 3526 3527 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3528 3529 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3530 3531 switch (ibnd_desc->hdr.tag.vio_subtype) { 3532 case VIO_SUBTYPE_INFO: 3533 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3534 3535 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3536 return; 3537 3538 /* 3539 * Data is padded to align on a 8 byte boundary, 3540 * nbytes is actual data length, i.e. minus that 3541 * padding. 3542 */ 3543 datalen = ibnd_desc->nbytes; 3544 3545 D2(vswp, "%s(%lld): processing inband desc : " 3546 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3547 3548 ncookies = ibnd_desc->ncookies; 3549 3550 /* 3551 * allocb(9F) returns an aligned data block. We 3552 * need to ensure that we ask ldc for an aligned 3553 * number of bytes also. 3554 */ 3555 nbytes = datalen; 3556 if (nbytes & 0x7) { 3557 off = 8 - (nbytes & 0x7); 3558 nbytes += off; 3559 } 3560 3561 /* alloc extra space for VLAN_TAG */ 3562 mp = allocb(datalen + 8, BPRI_MED); 3563 if (mp == NULL) { 3564 DERR(vswp, "%s(%lld): allocb failed", 3565 __func__, ldcp->ldc_id); 3566 ldcp->ldc_stats.rx_allocb_fail++; 3567 return; 3568 } 3569 3570 /* skip over the extra space for VLAN_TAG */ 3571 mp->b_rptr += 8; 3572 3573 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3574 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 3575 LDC_COPY_IN); 3576 3577 if (rv != 0) { 3578 DERR(vswp, "%s(%d): unable to copy in data from " 3579 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 3580 freemsg(mp); 3581 ldcp->ldc_stats.ierrors++; 3582 return; 3583 } 3584 3585 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 3586 __func__, ldcp->ldc_id, nbytes, ncookies); 3587 3588 /* point to the actual end of data */ 3589 mp->b_wptr = mp->b_rptr + datalen; 3590 ldcp->ldc_stats.ipackets++; 3591 ldcp->ldc_stats.rbytes += datalen; 3592 3593 /* 3594 * We ACK back every in-band descriptor message we process 3595 */ 3596 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 3597 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 3598 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 3599 sizeof (vnet_ibnd_desc_t), B_TRUE); 3600 3601 /* 3602 * there is extra space alloc'd for VLAN_TAG 3603 */ 3604 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3605 3606 /* send the packet to be switched */ 3607 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 3608 ldcp->ldc_port, NULL); 3609 3610 break; 3611 3612 case VIO_SUBTYPE_ACK: 3613 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3614 3615 /* Verify the ACK is valid */ 3616 idx = ibnd_desc->hdr.desc_handle; 3617 3618 if (idx >= vsw_num_descriptors) { 3619 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 3620 "(idx %ld)", vswp->instance, idx); 3621 return; 3622 } 3623 3624 if ((dp = ldcp->lane_out.dringp) == NULL) { 3625 DERR(vswp, "%s: no dring found", __func__); 3626 return; 3627 } 3628 3629 len = dp->num_descriptors; 3630 /* 3631 * If the descriptor we are being ACK'ed for is not the 3632 * one we expected, then pkts were lost somwhere, either 3633 * when we tried to send a msg, or a previous ACK msg from 3634 * our peer. In either case we now reclaim the descriptors 3635 * in the range from the last ACK we received up to the 3636 * current ACK. 3637 */ 3638 if (idx != dp->last_ack_recv) { 3639 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 3640 __func__, dp->last_ack_recv, idx); 3641 num = idx >= dp->last_ack_recv ? 3642 idx - dp->last_ack_recv + 1: 3643 (len - dp->last_ack_recv + 1) + idx; 3644 } 3645 3646 /* 3647 * When we sent the in-band message to our peer we 3648 * marked the copy in our private ring as READY. We now 3649 * check that the descriptor we are being ACK'ed for is in 3650 * fact READY, i.e. it is one we have shared with our peer. 3651 * 3652 * If its not we flag an error, but still reset the descr 3653 * back to FREE. 3654 */ 3655 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 3656 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3657 mutex_enter(&priv_addr->dstate_lock); 3658 if (priv_addr->dstate != VIO_DESC_READY) { 3659 DERR(vswp, "%s: (%ld) desc at index %ld not " 3660 "READY (0x%lx)", __func__, 3661 ldcp->ldc_id, idx, priv_addr->dstate); 3662 DERR(vswp, "%s: bound %d: ncookies %ld : " 3663 "datalen %ld", __func__, 3664 priv_addr->bound, priv_addr->ncookies, 3665 priv_addr->datalen); 3666 } 3667 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 3668 ldcp->ldc_id, idx); 3669 /* release resources associated with sent msg */ 3670 priv_addr->datalen = 0; 3671 priv_addr->dstate = VIO_DESC_FREE; 3672 mutex_exit(&priv_addr->dstate_lock); 3673 } 3674 /* update to next expected value */ 3675 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 3676 3677 break; 3678 3679 case VIO_SUBTYPE_NACK: 3680 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3681 3682 /* 3683 * We should only get a NACK if our peer doesn't like 3684 * something about a message we have sent it. If this 3685 * happens we just release the resources associated with 3686 * the message. (We are relying on higher layers to decide 3687 * whether or not to resend. 3688 */ 3689 3690 /* limit check */ 3691 idx = ibnd_desc->hdr.desc_handle; 3692 3693 if (idx >= vsw_num_descriptors) { 3694 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 3695 __func__, idx); 3696 return; 3697 } 3698 3699 if ((dp = ldcp->lane_out.dringp) == NULL) { 3700 DERR(vswp, "%s: no dring found", __func__); 3701 return; 3702 } 3703 3704 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 3705 3706 /* move to correct location in ring */ 3707 priv_addr += idx; 3708 3709 /* release resources associated with sent msg */ 3710 mutex_enter(&priv_addr->dstate_lock); 3711 priv_addr->datalen = 0; 3712 priv_addr->dstate = VIO_DESC_FREE; 3713 mutex_exit(&priv_addr->dstate_lock); 3714 3715 break; 3716 3717 default: 3718 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3719 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 3720 } 3721 3722 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3723 } 3724 3725 static void 3726 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 3727 { 3728 _NOTE(ARGUNUSED(epkt)) 3729 3730 vsw_t *vswp = ldcp->ldc_vswp; 3731 uint16_t env = tagp->vio_subtype_env; 3732 3733 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 3734 3735 /* 3736 * Error vio_subtypes have yet to be defined. So for 3737 * the moment we can't do anything. 3738 */ 3739 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 3740 3741 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 3742 } 3743 3744 /* transmit the packet over the given port */ 3745 int 3746 vsw_portsend(vsw_port_t *port, mblk_t *mp) 3747 { 3748 mblk_t *mpt; 3749 int count; 3750 vsw_ldc_t *ldcp = port->ldcp; 3751 int status = 0; 3752 3753 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 3754 if (count != 0) { 3755 status = ldcp->tx(ldcp, mp, mpt, count); 3756 } 3757 return (status); 3758 } 3759 3760 /* 3761 * Break up frames into 2 seperate chains: normal and 3762 * priority, based on the frame type. The number of 3763 * priority frames is also counted and returned. 3764 * 3765 * Params: 3766 * vswp: pointer to the instance of vsw 3767 * np: head of packet chain to be broken 3768 * npt: tail of packet chain to be broken 3769 * 3770 * Returns: 3771 * np: head of normal data packets 3772 * npt: tail of normal data packets 3773 * hp: head of high priority packets 3774 * hpt: tail of high priority packets 3775 */ 3776 static uint32_t 3777 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 3778 mblk_t **hp, mblk_t **hpt) 3779 { 3780 mblk_t *tmp = NULL; 3781 mblk_t *smp = NULL; 3782 mblk_t *hmp = NULL; /* high prio pkts head */ 3783 mblk_t *hmpt = NULL; /* high prio pkts tail */ 3784 mblk_t *nmp = NULL; /* normal pkts head */ 3785 mblk_t *nmpt = NULL; /* normal pkts tail */ 3786 uint32_t count = 0; 3787 int i; 3788 struct ether_header *ehp; 3789 uint32_t num_types; 3790 uint16_t *types; 3791 3792 tmp = *np; 3793 while (tmp != NULL) { 3794 3795 smp = tmp; 3796 tmp = tmp->b_next; 3797 smp->b_next = NULL; 3798 smp->b_prev = NULL; 3799 3800 ehp = (struct ether_header *)smp->b_rptr; 3801 num_types = vswp->pri_num_types; 3802 types = vswp->pri_types; 3803 for (i = 0; i < num_types; i++) { 3804 if (ehp->ether_type == types[i]) { 3805 /* high priority frame */ 3806 3807 if (hmp != NULL) { 3808 hmpt->b_next = smp; 3809 hmpt = smp; 3810 } else { 3811 hmp = hmpt = smp; 3812 } 3813 count++; 3814 break; 3815 } 3816 } 3817 if (i == num_types) { 3818 /* normal data frame */ 3819 3820 if (nmp != NULL) { 3821 nmpt->b_next = smp; 3822 nmpt = smp; 3823 } else { 3824 nmp = nmpt = smp; 3825 } 3826 } 3827 } 3828 3829 *hp = hmp; 3830 *hpt = hmpt; 3831 *np = nmp; 3832 *npt = nmpt; 3833 3834 return (count); 3835 } 3836 3837 /* 3838 * Wrapper function to transmit normal and/or priority frames over the channel. 3839 */ 3840 static int 3841 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3842 { 3843 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3844 mblk_t *tmp; 3845 mblk_t *smp; 3846 mblk_t *hmp; /* high prio pkts head */ 3847 mblk_t *hmpt; /* high prio pkts tail */ 3848 mblk_t *nmp; /* normal pkts head */ 3849 mblk_t *nmpt; /* normal pkts tail */ 3850 uint32_t n = 0; 3851 vsw_t *vswp = ldcp->ldc_vswp; 3852 3853 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 3854 ASSERT(count != 0); 3855 3856 nmp = mp; 3857 nmpt = mpt; 3858 3859 /* gather any priority frames from the chain of packets */ 3860 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 3861 3862 /* transmit priority frames */ 3863 tmp = hmp; 3864 while (tmp != NULL) { 3865 smp = tmp; 3866 tmp = tmp->b_next; 3867 smp->b_next = NULL; 3868 vsw_ldcsend_pkt(ldcp, smp); 3869 } 3870 3871 count -= n; 3872 3873 if (count == 0) { 3874 /* no normal data frames to process */ 3875 return (0); 3876 } 3877 3878 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 3879 } 3880 3881 /* 3882 * Wrapper function to transmit normal frames over the channel. 3883 */ 3884 static int 3885 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 3886 { 3887 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 3888 mblk_t *tmp = NULL; 3889 3890 ASSERT(count != 0); 3891 /* 3892 * If the TX thread is enabled, then queue the 3893 * ordinary frames and signal the tx thread. 3894 */ 3895 if (ldcp->tx_thread != NULL) { 3896 3897 mutex_enter(&ldcp->tx_thr_lock); 3898 3899 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 3900 /* 3901 * If we reached queue limit, 3902 * do not queue new packets, 3903 * drop them. 3904 */ 3905 ldcp->ldc_stats.tx_qfull += count; 3906 mutex_exit(&ldcp->tx_thr_lock); 3907 freemsgchain(mp); 3908 goto exit; 3909 } 3910 if (ldcp->tx_mhead == NULL) { 3911 ldcp->tx_mhead = mp; 3912 ldcp->tx_mtail = mpt; 3913 cv_signal(&ldcp->tx_thr_cv); 3914 } else { 3915 ldcp->tx_mtail->b_next = mp; 3916 ldcp->tx_mtail = mpt; 3917 } 3918 ldcp->tx_cnt += count; 3919 mutex_exit(&ldcp->tx_thr_lock); 3920 } else { 3921 while (mp != NULL) { 3922 tmp = mp->b_next; 3923 mp->b_next = mp->b_prev = NULL; 3924 (void) vsw_ldcsend(ldcp, mp, 1); 3925 mp = tmp; 3926 } 3927 } 3928 3929 exit: 3930 return (0); 3931 } 3932 3933 /* 3934 * This function transmits the frame in the payload of a raw data 3935 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 3936 * send special frames with high priorities, without going through 3937 * the normal data path which uses descriptor ring mechanism. 3938 */ 3939 static void 3940 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 3941 { 3942 vio_raw_data_msg_t *pkt; 3943 mblk_t *bp; 3944 mblk_t *nmp = NULL; 3945 vio_mblk_t *vmp; 3946 caddr_t dst; 3947 uint32_t mblksz; 3948 uint32_t size; 3949 uint32_t nbytes; 3950 int rv; 3951 vsw_t *vswp = ldcp->ldc_vswp; 3952 vgen_stats_t *statsp = &ldcp->ldc_stats; 3953 3954 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 3955 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 3956 (void) atomic_inc_32(&statsp->tx_pri_fail); 3957 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 3958 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 3959 ldcp->lane_out.lstate); 3960 goto send_pkt_exit; 3961 } 3962 3963 size = msgsize(mp); 3964 3965 /* frame size bigger than available payload len of raw data msg ? */ 3966 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 3967 (void) atomic_inc_32(&statsp->tx_pri_fail); 3968 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3969 ldcp->ldc_id, size); 3970 goto send_pkt_exit; 3971 } 3972 3973 if (size < ETHERMIN) 3974 size = ETHERMIN; 3975 3976 /* alloc space for a raw data message */ 3977 vmp = vio_allocb(vswp->pri_tx_vmp); 3978 if (vmp == NULL) { 3979 (void) atomic_inc_32(&statsp->tx_pri_fail); 3980 DWARN(vswp, "vio_allocb failed\n"); 3981 goto send_pkt_exit; 3982 } else { 3983 nmp = vmp->mp; 3984 } 3985 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 3986 3987 /* copy frame into the payload of raw data message */ 3988 dst = (caddr_t)pkt->data; 3989 for (bp = mp; bp != NULL; bp = bp->b_cont) { 3990 mblksz = MBLKL(bp); 3991 bcopy(bp->b_rptr, dst, mblksz); 3992 dst += mblksz; 3993 } 3994 3995 vmp->state = VIO_MBLK_HAS_DATA; 3996 3997 /* setup the raw data msg */ 3998 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 3999 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4000 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4001 pkt->tag.vio_sid = ldcp->local_session; 4002 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4003 4004 /* send the msg over ldc */ 4005 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4006 if (rv != 0) { 4007 (void) atomic_inc_32(&statsp->tx_pri_fail); 4008 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4009 ldcp->ldc_id); 4010 goto send_pkt_exit; 4011 } 4012 4013 /* update stats */ 4014 (void) atomic_inc_64(&statsp->tx_pri_packets); 4015 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4016 4017 send_pkt_exit: 4018 if (nmp != NULL) 4019 freemsg(nmp); 4020 freemsg(mp); 4021 } 4022 4023 /* 4024 * Transmit the packet over the given LDC channel. 4025 * 4026 * The 'retries' argument indicates how many times a packet 4027 * is retried before it is dropped. Note, the retry is done 4028 * only for a resource related failure, for all other failures 4029 * the packet is dropped immediately. 4030 */ 4031 static int 4032 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4033 { 4034 int i; 4035 int rc; 4036 int status = 0; 4037 vsw_port_t *port = ldcp->ldc_port; 4038 dring_info_t *dp = NULL; 4039 lane_t *lp = &ldcp->lane_out; 4040 4041 for (i = 0; i < retries; ) { 4042 /* 4043 * Send the message out using the appropriate 4044 * transmit function which will free mblock when it 4045 * is finished with it. 4046 */ 4047 mutex_enter(&port->tx_lock); 4048 if (port->transmit != NULL) { 4049 status = (*port->transmit)(ldcp, mp); 4050 } 4051 if (status == LDC_TX_SUCCESS) { 4052 mutex_exit(&port->tx_lock); 4053 break; 4054 } 4055 i++; /* increment the counter here */ 4056 4057 /* If its the last retry, then update the oerror */ 4058 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4059 ldcp->ldc_stats.oerrors++; 4060 } 4061 mutex_exit(&port->tx_lock); 4062 4063 if (status != LDC_TX_NORESOURCES) { 4064 /* 4065 * No retrying required for errors un-related 4066 * to resources. 4067 */ 4068 break; 4069 } 4070 if (((dp = ldcp->lane_out.dringp) != NULL) && 4071 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4072 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4073 ((VSW_VER_LT(ldcp, 1, 2) && 4074 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4075 4076 /* Need to reclaim in TxDring mode. */ 4077 if (lp->dring_mode == VIO_TX_DRING) { 4078 rc = vsw_reclaim_dring(dp, dp->end_idx); 4079 } 4080 4081 } else { 4082 /* 4083 * If there is no dring or the xfer_mode is 4084 * set to DESC_MODE(ie., OBP), then simply break here. 4085 */ 4086 break; 4087 } 4088 4089 /* 4090 * Delay only if none were reclaimed 4091 * and its not the last retry. 4092 */ 4093 if ((rc == 0) && (i < retries)) { 4094 delay(drv_usectohz(vsw_ldc_tx_delay)); 4095 } 4096 } 4097 freemsg(mp); 4098 return (status); 4099 } 4100 4101 /* 4102 * Send an in-band descriptor message over ldc. 4103 */ 4104 static int 4105 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4106 { 4107 vsw_t *vswp = ldcp->ldc_vswp; 4108 vnet_ibnd_desc_t ibnd_msg; 4109 vsw_private_desc_t *priv_desc = NULL; 4110 dring_info_t *dp = NULL; 4111 size_t n, size = 0; 4112 caddr_t bufp; 4113 mblk_t *bp; 4114 int idx, i; 4115 int status = LDC_TX_SUCCESS; 4116 static int warn_msg = 1; 4117 lane_t *lp = &ldcp->lane_out; 4118 4119 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4120 4121 ASSERT(mp != NULL); 4122 4123 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4124 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4125 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4126 __func__, ldcp->ldc_id, ldcp->ldc_status, 4127 ldcp->lane_out.lstate); 4128 ldcp->ldc_stats.oerrors++; 4129 return (LDC_TX_FAILURE); 4130 } 4131 4132 /* 4133 * The dring here is as an internal buffer, 4134 * rather than a transfer channel. 4135 */ 4136 if ((dp = ldcp->lane_out.dringp) == NULL) { 4137 DERR(vswp, "%s(%lld): no dring for outbound lane", 4138 __func__, ldcp->ldc_id); 4139 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4140 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4141 ldcp->ldc_stats.oerrors++; 4142 return (LDC_TX_FAILURE); 4143 } 4144 4145 size = msgsize(mp); 4146 if (size > (size_t)lp->mtu) { 4147 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4148 ldcp->ldc_id, size); 4149 ldcp->ldc_stats.oerrors++; 4150 return (LDC_TX_FAILURE); 4151 } 4152 4153 /* 4154 * Find a free descriptor in our buffer ring 4155 */ 4156 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4157 if (warn_msg) { 4158 DERR(vswp, "%s(%lld): no descriptor available for ring " 4159 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4160 warn_msg = 0; 4161 } 4162 4163 /* nothing more we can do */ 4164 status = LDC_TX_NORESOURCES; 4165 goto vsw_descrsend_free_exit; 4166 } else { 4167 D2(vswp, "%s(%lld): free private descriptor found at pos " 4168 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4169 warn_msg = 1; 4170 } 4171 4172 /* copy data into the descriptor */ 4173 bufp = priv_desc->datap; 4174 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4175 n = MBLKL(bp); 4176 bcopy(bp->b_rptr, bufp, n); 4177 bufp += n; 4178 } 4179 4180 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4181 4182 /* create and send the in-band descp msg */ 4183 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4184 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4185 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4186 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4187 4188 /* 4189 * Copy the mem cookies describing the data from the 4190 * private region of the descriptor ring into the inband 4191 * descriptor. 4192 */ 4193 for (i = 0; i < priv_desc->ncookies; i++) { 4194 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4195 sizeof (ldc_mem_cookie_t)); 4196 } 4197 4198 ibnd_msg.hdr.desc_handle = idx; 4199 ibnd_msg.ncookies = priv_desc->ncookies; 4200 ibnd_msg.nbytes = size; 4201 4202 ldcp->ldc_stats.opackets++; 4203 ldcp->ldc_stats.obytes += size; 4204 4205 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4206 sizeof (vnet_ibnd_desc_t), B_TRUE); 4207 4208 vsw_descrsend_free_exit: 4209 4210 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4211 return (status); 4212 } 4213 4214 static void 4215 vsw_send_ver(void *arg) 4216 { 4217 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4218 vsw_t *vswp = ldcp->ldc_vswp; 4219 lane_t *lp = &ldcp->lane_out; 4220 vio_ver_msg_t ver_msg; 4221 4222 D1(vswp, "%s enter", __func__); 4223 4224 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4225 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4226 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4227 ver_msg.tag.vio_sid = ldcp->local_session; 4228 4229 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4230 ver_msg.ver_major = vsw_versions[0].ver_major; 4231 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4232 } else { 4233 /* use the major,minor that we've ack'd */ 4234 lane_t *lpi = &ldcp->lane_in; 4235 ver_msg.ver_major = lpi->ver_major; 4236 ver_msg.ver_minor = lpi->ver_minor; 4237 } 4238 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4239 4240 lp->lstate |= VSW_VER_INFO_SENT; 4241 lp->ver_major = ver_msg.ver_major; 4242 lp->ver_minor = ver_msg.ver_minor; 4243 4244 DUMP_TAG(ver_msg.tag); 4245 4246 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4247 4248 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4249 } 4250 4251 static void 4252 vsw_send_attr(vsw_ldc_t *ldcp) 4253 { 4254 vsw_t *vswp = ldcp->ldc_vswp; 4255 lane_t *lp = &ldcp->lane_out; 4256 vnet_attr_msg_t attr_msg; 4257 4258 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4259 4260 /* 4261 * Subtype is set to INFO by default 4262 */ 4263 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4264 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4265 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4266 attr_msg.tag.vio_sid = ldcp->local_session; 4267 4268 /* payload copied from default settings for lane */ 4269 attr_msg.mtu = lp->mtu; 4270 attr_msg.addr_type = lp->addr_type; 4271 attr_msg.xfer_mode = lp->xfer_mode; 4272 attr_msg.ack_freq = lp->xfer_mode; 4273 attr_msg.options = lp->dring_mode; 4274 4275 READ_ENTER(&vswp->if_lockrw); 4276 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4277 RW_EXIT(&vswp->if_lockrw); 4278 4279 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4280 4281 DUMP_TAG(attr_msg.tag); 4282 4283 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4284 4285 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4286 } 4287 4288 static void 4289 vsw_send_dring_info(vsw_ldc_t *ldcp) 4290 { 4291 int msgsize; 4292 void *msg; 4293 vsw_t *vswp = ldcp->ldc_vswp; 4294 vsw_port_t *port = ldcp->ldc_port; 4295 lane_t *lp = &ldcp->lane_out; 4296 vgen_stats_t *statsp = &ldcp->ldc_stats; 4297 4298 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4299 4300 /* dring mode has been negotiated in attr phase; save in stats */ 4301 statsp->dring_mode = lp->dring_mode; 4302 4303 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4304 /* 4305 * Change the transmit routine for RxDringData mode. 4306 */ 4307 port->transmit = vsw_dringsend_shm; 4308 msg = (void *) vsw_create_rx_dring_info(ldcp); 4309 if (msg == NULL) { 4310 return; 4311 } 4312 msgsize = 4313 VNET_DRING_REG_EXT_MSG_SIZE(lp->dringp->data_ncookies); 4314 ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 4315 vsw_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 4316 ldcp->rx_dringdata = vsw_process_dringdata_shm; 4317 } else { 4318 msg = (void *) vsw_create_tx_dring_info(ldcp); 4319 if (msg == NULL) { 4320 return; 4321 } 4322 msgsize = sizeof (vio_dring_reg_msg_t); 4323 ldcp->msg_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 4324 vsw_ldc_msg_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 4325 ldcp->rx_dringdata = vsw_process_dringdata; 4326 } 4327 4328 lp->lstate |= VSW_DRING_INFO_SENT; 4329 DUMP_TAG_PTR((vio_msg_tag_t *)msg); 4330 (void) vsw_send_msg(ldcp, msg, msgsize, B_TRUE); 4331 kmem_free(msg, msgsize); 4332 4333 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4334 } 4335 4336 static void 4337 vsw_send_rdx(vsw_ldc_t *ldcp) 4338 { 4339 vsw_t *vswp = ldcp->ldc_vswp; 4340 vio_rdx_msg_t rdx_msg; 4341 4342 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4343 4344 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4345 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4346 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4347 rdx_msg.tag.vio_sid = ldcp->local_session; 4348 4349 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4350 4351 DUMP_TAG(rdx_msg.tag); 4352 4353 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4354 4355 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4356 } 4357 4358 /* 4359 * Remove the specified address from the list of address maintained 4360 * in this port node. 4361 */ 4362 mcst_addr_t * 4363 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 4364 { 4365 vsw_t *vswp = NULL; 4366 vsw_port_t *port = NULL; 4367 mcst_addr_t *prev_p = NULL; 4368 mcst_addr_t *curr_p = NULL; 4369 4370 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 4371 __func__, devtype, addr); 4372 4373 if (devtype == VSW_VNETPORT) { 4374 port = (vsw_port_t *)arg; 4375 mutex_enter(&port->mca_lock); 4376 prev_p = curr_p = port->mcap; 4377 } else { 4378 vswp = (vsw_t *)arg; 4379 mutex_enter(&vswp->mca_lock); 4380 prev_p = curr_p = vswp->mcap; 4381 } 4382 4383 while (curr_p != NULL) { 4384 if (curr_p->addr == addr) { 4385 D2(NULL, "%s: address found", __func__); 4386 /* match found */ 4387 if (prev_p == curr_p) { 4388 /* list head */ 4389 if (devtype == VSW_VNETPORT) 4390 port->mcap = curr_p->nextp; 4391 else 4392 vswp->mcap = curr_p->nextp; 4393 } else { 4394 prev_p->nextp = curr_p->nextp; 4395 } 4396 break; 4397 } else { 4398 prev_p = curr_p; 4399 curr_p = curr_p->nextp; 4400 } 4401 } 4402 4403 if (devtype == VSW_VNETPORT) 4404 mutex_exit(&port->mca_lock); 4405 else 4406 mutex_exit(&vswp->mca_lock); 4407 4408 D1(NULL, "%s: exit", __func__); 4409 4410 return (curr_p); 4411 } 4412 4413 /* 4414 * Create a ring consisting of just a private portion and link 4415 * it into the list of rings for the outbound lane. 4416 * 4417 * These type of rings are used primarily for temporary data 4418 * storage (i.e. as data buffers). 4419 */ 4420 void 4421 vsw_create_privring(vsw_ldc_t *ldcp) 4422 { 4423 dring_info_t *dp; 4424 vsw_t *vswp = ldcp->ldc_vswp; 4425 4426 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4427 4428 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4429 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 4430 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 4431 ldcp->lane_out.dringp = dp; 4432 4433 /* no public section */ 4434 dp->pub_addr = NULL; 4435 dp->priv_addr = kmem_zalloc( 4436 (sizeof (vsw_private_desc_t) * vsw_num_descriptors), KM_SLEEP); 4437 dp->num_descriptors = vsw_num_descriptors; 4438 4439 if (vsw_setup_tx_dring(ldcp, dp)) { 4440 DERR(vswp, "%s: setup of ring failed", __func__); 4441 vsw_destroy_tx_dring(ldcp); 4442 return; 4443 } 4444 4445 /* haven't used any descriptors yet */ 4446 dp->end_idx = 0; 4447 dp->restart_reqd = B_TRUE; 4448 4449 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4450 } 4451 4452 /* 4453 * Set the default lane attributes. These are copied into 4454 * the attr msg we send to our peer. If they are not acceptable 4455 * then (currently) the handshake ends. 4456 */ 4457 static void 4458 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 4459 { 4460 bzero(lp, sizeof (lane_t)); 4461 4462 READ_ENTER(&vswp->if_lockrw); 4463 ether_copy(&(vswp->if_addr), &(lp->addr)); 4464 RW_EXIT(&vswp->if_lockrw); 4465 4466 lp->mtu = vswp->max_frame_size; 4467 lp->addr_type = ADDR_TYPE_MAC; 4468 lp->xfer_mode = VIO_DRING_MODE_V1_0; 4469 lp->ack_freq = 0; /* for shared mode */ 4470 lp->seq_num = VNET_ISS; 4471 } 4472 4473 /* 4474 * Map the descriptor ring exported by the peer. 4475 */ 4476 static dring_info_t * 4477 vsw_map_dring(vsw_ldc_t *ldcp, void *pkt) 4478 { 4479 dring_info_t *dp = NULL; 4480 lane_t *lp = &ldcp->lane_out; 4481 4482 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4483 /* 4484 * In RxDringData mode, dring that we map in 4485 * becomes our transmit descriptor ring. 4486 */ 4487 dp = vsw_map_tx_dring(ldcp, pkt); 4488 } else { 4489 /* 4490 * In TxDring mode, dring that we map in 4491 * becomes our receive descriptor ring. 4492 */ 4493 dp = vsw_map_rx_dring(ldcp, pkt); 4494 } 4495 return (dp); 4496 } 4497 4498 /* 4499 * Common dring mapping function used in both TxDring and RxDringData modes. 4500 */ 4501 dring_info_t * 4502 vsw_map_dring_cmn(vsw_ldc_t *ldcp, vio_dring_reg_msg_t *dring_pkt) 4503 { 4504 int rv; 4505 dring_info_t *dp; 4506 ldc_mem_info_t minfo; 4507 vsw_t *vswp = ldcp->ldc_vswp; 4508 4509 /* 4510 * If the dring params are unacceptable then we NACK back. 4511 */ 4512 if ((dring_pkt->num_descriptors == 0) || 4513 (dring_pkt->descriptor_size == 0) || 4514 (dring_pkt->ncookies != 1)) { 4515 DERR(vswp, "%s (%lld): invalid dring info", 4516 __func__, ldcp->ldc_id); 4517 return (NULL); 4518 } 4519 4520 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 4521 4522 dp->num_descriptors = dring_pkt->num_descriptors; 4523 dp->descriptor_size = dring_pkt->descriptor_size; 4524 dp->options = dring_pkt->options; 4525 dp->dring_ncookies = dring_pkt->ncookies; 4526 4527 /* 4528 * Note: should only get one cookie. Enforced in 4529 * the ldc layer. 4530 */ 4531 bcopy(&dring_pkt->cookie[0], &dp->dring_cookie[0], 4532 sizeof (ldc_mem_cookie_t)); 4533 4534 rv = ldc_mem_dring_map(ldcp->ldc_handle, &dp->dring_cookie[0], 4535 dp->dring_ncookies, dp->num_descriptors, dp->descriptor_size, 4536 LDC_DIRECT_MAP, &(dp->dring_handle)); 4537 if (rv != 0) { 4538 goto fail; 4539 } 4540 4541 rv = ldc_mem_dring_info(dp->dring_handle, &minfo); 4542 if (rv != 0) { 4543 goto fail; 4544 } 4545 /* store the address of the ring */ 4546 dp->pub_addr = minfo.vaddr; 4547 4548 /* cache the dring mtype */ 4549 dp->dring_mtype = minfo.mtype; 4550 4551 /* no private section as we are importing */ 4552 dp->priv_addr = NULL; 4553 4554 /* 4555 * Using simple mono increasing int for ident at the moment. 4556 */ 4557 dp->ident = ldcp->next_ident; 4558 ldcp->next_ident++; 4559 4560 /* 4561 * Acknowledge it; we send back a unique dring identifier that 4562 * the sending side will use in future to refer to this 4563 * descriptor ring. 4564 */ 4565 dring_pkt->dring_ident = dp->ident; 4566 4567 return (dp); 4568 fail: 4569 if (dp->dring_handle != NULL) { 4570 (void) ldc_mem_dring_unmap(dp->dring_handle); 4571 } 4572 kmem_free(dp, sizeof (*dp)); 4573 return (NULL); 4574 } 4575 4576 /* 4577 * Unmap the descriptor ring exported by the peer. 4578 */ 4579 static void 4580 vsw_unmap_dring(vsw_ldc_t *ldcp) 4581 { 4582 lane_t *lane_out = &ldcp->lane_out; 4583 4584 if (lane_out->dring_mode == VIO_RX_DRING_DATA) { 4585 vsw_unmap_tx_dring(ldcp); 4586 } else { 4587 vsw_unmap_rx_dring(ldcp); 4588 } 4589 } 4590 4591 /* 4592 * Map the shared memory data buffer area exported by the peer. 4593 * Used in RxDringData mode only. 4594 */ 4595 static int 4596 vsw_map_data(vsw_ldc_t *ldcp, dring_info_t *dp, void *pkt) 4597 { 4598 int rv; 4599 vio_dring_reg_ext_msg_t *emsg; 4600 vio_dring_reg_msg_t *msg = pkt; 4601 uint8_t *buf = (uint8_t *)msg->cookie; 4602 vsw_t *vswp = ldcp->ldc_vswp; 4603 ldc_mem_info_t minfo; 4604 4605 /* skip over dring cookies */ 4606 ASSERT(msg->ncookies == 1); 4607 buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); 4608 4609 emsg = (vio_dring_reg_ext_msg_t *)buf; 4610 if (emsg->data_ncookies > VNET_DATA_AREA_COOKIES) { 4611 return (1); 4612 } 4613 4614 /* save # of data area cookies */ 4615 dp->data_ncookies = emsg->data_ncookies; 4616 4617 /* save data area size */ 4618 dp->data_sz = emsg->data_area_size; 4619 4620 /* allocate ldc mem handle for data area */ 4621 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &dp->data_handle); 4622 if (rv != 0) { 4623 cmn_err(CE_WARN, "ldc_mem_alloc_handle failed\n"); 4624 DWARN(vswp, "%s (%lld) ldc_mem_alloc_handle() failed: %d\n", 4625 __func__, ldcp->ldc_id, rv); 4626 return (1); 4627 } 4628 4629 /* map the data area */ 4630 rv = ldc_mem_map(dp->data_handle, emsg->data_cookie, 4631 emsg->data_ncookies, LDC_DIRECT_MAP, LDC_MEM_R, 4632 (caddr_t *)&dp->data_addr, NULL); 4633 if (rv != 0) { 4634 cmn_err(CE_WARN, "ldc_mem_map failed\n"); 4635 DWARN(vswp, "%s (%lld) ldc_mem_map() failed: %d\n", 4636 __func__, ldcp->ldc_id, rv); 4637 return (1); 4638 } 4639 4640 /* get the map info */ 4641 rv = ldc_mem_info(dp->data_handle, &minfo); 4642 if (rv != 0) { 4643 cmn_err(CE_WARN, "ldc_mem_info failed\n"); 4644 DWARN(vswp, "%s (%lld) ldc_mem_info() failed: %d\n", 4645 __func__, ldcp->ldc_id, rv); 4646 return (1); 4647 } 4648 4649 if (minfo.mtype != LDC_DIRECT_MAP) { 4650 DWARN(vswp, "%s (%lld) mtype(%d) is not direct map\n", 4651 __func__, ldcp->ldc_id, minfo.mtype); 4652 return (1); 4653 } 4654 4655 /* allocate memory for data area cookies */ 4656 dp->data_cookie = kmem_zalloc(emsg->data_ncookies * 4657 sizeof (ldc_mem_cookie_t), KM_SLEEP); 4658 4659 /* save data area cookies */ 4660 bcopy(emsg->data_cookie, dp->data_cookie, 4661 emsg->data_ncookies * sizeof (ldc_mem_cookie_t)); 4662 4663 return (0); 4664 } 4665 4666 /* 4667 * Reset and free all the resources associated with the channel. 4668 */ 4669 static void 4670 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 4671 { 4672 lane_t *lp; 4673 4674 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 4675 4676 if (dir == INBOUND) { 4677 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 4678 " of channel %lld", __func__, ldcp->ldc_id); 4679 lp = &ldcp->lane_in; 4680 } else { 4681 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 4682 " of channel %lld", __func__, ldcp->ldc_id); 4683 lp = &ldcp->lane_out; 4684 } 4685 4686 lp->lstate = VSW_LANE_INACTIV; 4687 lp->seq_num = VNET_ISS; 4688 4689 if (dir == INBOUND) { 4690 /* Unmap the remote dring which is imported from the peer */ 4691 vsw_unmap_dring(ldcp); 4692 } else { 4693 /* Destroy the local dring which is exported to the peer */ 4694 vsw_destroy_dring(ldcp); 4695 } 4696 4697 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 4698 } 4699 4700 /* 4701 * Destroy the descriptor ring. 4702 */ 4703 static void 4704 vsw_destroy_dring(vsw_ldc_t *ldcp) 4705 { 4706 lane_t *lp = &ldcp->lane_out; 4707 4708 if (lp->dring_mode == VIO_RX_DRING_DATA) { 4709 vsw_destroy_rx_dring(ldcp); 4710 } else { 4711 vsw_destroy_tx_dring(ldcp); 4712 } 4713 } 4714 4715 /* 4716 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 4717 * This thread is woken up by the vsw_portsend to transmit 4718 * packets. 4719 */ 4720 static void 4721 vsw_ldc_tx_worker(void *arg) 4722 { 4723 callb_cpr_t cprinfo; 4724 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4725 vsw_t *vswp = ldcp->ldc_vswp; 4726 mblk_t *mp; 4727 mblk_t *tmp; 4728 4729 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 4730 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 4731 "vnet_tx_thread"); 4732 mutex_enter(&ldcp->tx_thr_lock); 4733 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 4734 4735 CALLB_CPR_SAFE_BEGIN(&cprinfo); 4736 /* 4737 * Wait until the data is received or a stop 4738 * request is received. 4739 */ 4740 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 4741 (ldcp->tx_mhead == NULL)) { 4742 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 4743 } 4744 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 4745 4746 /* 4747 * First process the stop request. 4748 */ 4749 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 4750 D2(vswp, "%s(%lld):tx thread stopped\n", 4751 __func__, ldcp->ldc_id); 4752 break; 4753 } 4754 mp = ldcp->tx_mhead; 4755 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 4756 ldcp->tx_cnt = 0; 4757 mutex_exit(&ldcp->tx_thr_lock); 4758 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 4759 __func__, ldcp->ldc_id); 4760 while (mp != NULL) { 4761 tmp = mp->b_next; 4762 mp->b_next = mp->b_prev = NULL; 4763 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 4764 mp = tmp; 4765 } 4766 mutex_enter(&ldcp->tx_thr_lock); 4767 } 4768 4769 /* 4770 * Update the run status and wakeup the thread that 4771 * has sent the stop request. 4772 */ 4773 ldcp->tx_thr_flags &= ~VSW_WTHR_STOP; 4774 ldcp->tx_thread = NULL; 4775 CALLB_CPR_EXIT(&cprinfo); 4776 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 4777 thread_exit(); 4778 } 4779 4780 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 4781 static void 4782 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 4783 { 4784 kt_did_t tid = 0; 4785 vsw_t *vswp = ldcp->ldc_vswp; 4786 4787 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 4788 /* 4789 * Send a stop request by setting the stop flag and 4790 * wait until the receive thread stops. 4791 */ 4792 mutex_enter(&ldcp->tx_thr_lock); 4793 if (ldcp->tx_thread != NULL) { 4794 tid = ldcp->tx_thread->t_did; 4795 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 4796 cv_signal(&ldcp->tx_thr_cv); 4797 } 4798 mutex_exit(&ldcp->tx_thr_lock); 4799 4800 if (tid != 0) { 4801 thread_join(tid); 4802 } 4803 4804 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 4805 } 4806 4807 static int 4808 vsw_mapin_avail(vsw_ldc_t *ldcp) 4809 { 4810 int rv; 4811 ldc_info_t info; 4812 uint64_t mapin_sz_req; 4813 uint64_t dblk_sz; 4814 vsw_t *vswp = ldcp->ldc_vswp; 4815 4816 rv = ldc_info(ldcp->ldc_handle, &info); 4817 if (rv != 0) { 4818 return (B_FALSE); 4819 } 4820 4821 dblk_sz = RXDRING_DBLK_SZ(vswp->max_frame_size); 4822 mapin_sz_req = (VSW_RXDRING_NRBUFS * dblk_sz); 4823 4824 if (info.direct_map_size_max >= mapin_sz_req) { 4825 return (B_TRUE); 4826 } 4827 4828 return (B_FALSE); 4829 } 4830 4831 /* 4832 * Debugging routines 4833 */ 4834 static void 4835 display_state(void) 4836 { 4837 vsw_t *vswp; 4838 vsw_port_list_t *plist; 4839 vsw_port_t *port; 4840 vsw_ldc_t *ldcp; 4841 extern vsw_t *vsw_head; 4842 4843 cmn_err(CE_NOTE, "***** system state *****"); 4844 4845 for (vswp = vsw_head; vswp; vswp = vswp->next) { 4846 plist = &vswp->plist; 4847 READ_ENTER(&plist->lockrw); 4848 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 4849 vswp->instance, plist->num_ports); 4850 4851 for (port = plist->head; port != NULL; port = port->p_next) { 4852 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 4853 port->p_instance, port->num_ldcs); 4854 ldcp = port->ldcp; 4855 cmn_err(CE_CONT, "chan %lu : dev %d : " 4856 "status %d : phase %u\n", 4857 ldcp->ldc_id, ldcp->dev_class, 4858 ldcp->ldc_status, ldcp->hphase); 4859 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 4860 "psession %lu\n", ldcp->ldc_id, 4861 ldcp->local_session, ldcp->peer_session); 4862 4863 cmn_err(CE_CONT, "Inbound lane:\n"); 4864 display_lane(&ldcp->lane_in); 4865 cmn_err(CE_CONT, "Outbound lane:\n"); 4866 display_lane(&ldcp->lane_out); 4867 } 4868 RW_EXIT(&plist->lockrw); 4869 } 4870 cmn_err(CE_NOTE, "***** system state *****"); 4871 } 4872 4873 static void 4874 display_lane(lane_t *lp) 4875 { 4876 dring_info_t *drp = lp->dringp; 4877 4878 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 4879 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 4880 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 4881 lp->addr_type, lp->addr, lp->xfer_mode); 4882 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 4883 4884 cmn_err(CE_CONT, "Dring info:\n"); 4885 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 4886 drp->num_descriptors, drp->descriptor_size); 4887 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->dring_handle); 4888 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 4889 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 4890 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 4891 drp->ident, drp->end_idx); 4892 display_ring(drp); 4893 } 4894 4895 static void 4896 display_ring(dring_info_t *dringp) 4897 { 4898 uint64_t i; 4899 uint64_t priv_count = 0; 4900 uint64_t pub_count = 0; 4901 vnet_public_desc_t *pub_addr = NULL; 4902 vsw_private_desc_t *priv_addr = NULL; 4903 4904 for (i = 0; i < vsw_num_descriptors; i++) { 4905 if (dringp->pub_addr != NULL) { 4906 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 4907 4908 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 4909 pub_count++; 4910 } 4911 4912 if (dringp->priv_addr != NULL) { 4913 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 4914 4915 if (priv_addr->dstate == VIO_DESC_FREE) 4916 priv_count++; 4917 } 4918 } 4919 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 4920 i, priv_count, pub_count); 4921 } 4922 4923 static void 4924 dump_flags(uint64_t state) 4925 { 4926 int i; 4927 4928 typedef struct flag_name { 4929 int flag_val; 4930 char *flag_name; 4931 } flag_name_t; 4932 4933 flag_name_t flags[] = { 4934 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 4935 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 4936 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 4937 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 4938 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 4939 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 4940 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 4941 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 4942 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 4943 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 4944 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 4945 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 4946 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 4947 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 4948 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 4949 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 4950 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 4951 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 4952 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 4953 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 4954 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 4955 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 4956 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 4957 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 4958 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 4959 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 4960 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 4961 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 4962 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 4963 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 4964 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 4965 4966 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 4967 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 4968 if (state & flags[i].flag_val) 4969 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 4970 } 4971 } 4972