1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* Port add/deletion/etc routines */ 75 static void vsw_port_delete(vsw_port_t *port); 76 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 77 static void vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 78 static int vsw_init_ldcs(vsw_port_t *port); 79 static void vsw_uninit_ldcs(vsw_port_t *port); 80 static int vsw_ldc_init(vsw_ldc_t *ldcp); 81 static void vsw_ldc_uninit(vsw_ldc_t *ldcp); 82 static void vsw_drain_ldcs(vsw_port_t *port); 83 static void vsw_drain_port_taskq(vsw_port_t *port); 84 static void vsw_marker_task(void *); 85 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 86 void vsw_detach_ports(vsw_t *vswp); 87 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 88 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 89 int vsw_port_detach(vsw_t *vswp, int p_instance); 90 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 91 int vsw_port_attach(vsw_port_t *portp); 92 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 93 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 94 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 95 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 96 void vsw_reset_ports(vsw_t *vswp); 97 void vsw_port_reset(vsw_port_t *portp); 98 99 /* Interrupt routines */ 100 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 101 102 /* Handshake routines */ 103 static void vsw_ldc_reinit(vsw_ldc_t *); 104 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 105 static void vsw_conn_task(void *); 106 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 107 static void vsw_next_milestone(vsw_ldc_t *); 108 static int vsw_supported_version(vio_ver_msg_t *); 109 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 110 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 111 112 /* Data processing routines */ 113 static void vsw_process_pkt(void *); 114 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 115 static void vsw_process_ctrl_pkt(void *); 116 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 123 uint32_t); 124 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 125 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 126 static void vsw_process_pkt_data(void *, void *, uint32_t); 127 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 128 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 129 130 /* Switching/data transmit routines */ 131 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 132 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 133 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 134 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 135 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 138 /* Packet creation routines */ 139 static void vsw_send_ver(void *); 140 static void vsw_send_attr(vsw_ldc_t *); 141 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 142 static void vsw_send_dring_info(vsw_ldc_t *); 143 static void vsw_send_rdx(vsw_ldc_t *); 144 145 /* Dring routines */ 146 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 147 static void vsw_create_privring(vsw_ldc_t *); 148 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 149 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 150 int *); 151 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 152 static int vsw_reclaim_dring(dring_info_t *dp, int start); 153 154 static void vsw_set_lane_attr(vsw_t *, lane_t *); 155 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 156 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 157 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 158 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 159 160 /* Rcv/Tx thread routines */ 161 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 162 static void vsw_ldc_tx_worker(void *arg); 163 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 164 static void vsw_ldc_rx_worker(void *arg); 165 166 /* Misc support routines */ 167 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 168 static void vsw_free_ring(dring_info_t *); 169 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 170 static int vsw_get_same_dest_list(struct ether_header *ehp, 171 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 172 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 173 174 /* Debugging routines */ 175 static void dump_flags(uint64_t); 176 static void display_state(void); 177 static void display_lane(lane_t *); 178 static void display_ring(dring_info_t *); 179 180 /* 181 * Functions imported from other files. 182 */ 183 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 184 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 185 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 186 extern void vsw_del_mcst_port(vsw_port_t *port); 187 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 188 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 189 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 190 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 191 extern void vsw_create_vlans(void *arg, int type); 192 extern void vsw_destroy_vlans(void *arg, int type); 193 extern void vsw_vlan_add_ids(void *arg, int type); 194 extern void vsw_vlan_remove_ids(void *arg, int type); 195 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 196 struct ether_header *ehp, uint16_t *vidp); 197 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 198 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 199 mblk_t **npt); 200 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 201 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 202 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 203 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 204 extern void vsw_hio_stop_port(vsw_port_t *portp); 205 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 206 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 207 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 208 209 210 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 211 212 /* 213 * Tunables used in this file. 214 */ 215 extern int vsw_num_handshakes; 216 extern int vsw_wretries; 217 extern int vsw_desc_delay; 218 extern int vsw_read_attempts; 219 extern int vsw_ldc_tx_delay; 220 extern int vsw_ldc_tx_retries; 221 extern int vsw_ldc_retries; 222 extern int vsw_ldc_delay; 223 extern boolean_t vsw_ldc_rxthr_enabled; 224 extern boolean_t vsw_ldc_txthr_enabled; 225 extern uint32_t vsw_ntxds; 226 extern uint32_t vsw_max_tx_qcount; 227 extern uint32_t vsw_chain_len; 228 extern uint32_t vsw_mblk_size1; 229 extern uint32_t vsw_mblk_size2; 230 extern uint32_t vsw_mblk_size3; 231 extern uint32_t vsw_mblk_size4; 232 extern uint32_t vsw_num_mblks1; 233 extern uint32_t vsw_num_mblks2; 234 extern uint32_t vsw_num_mblks3; 235 extern uint32_t vsw_num_mblks4; 236 extern boolean_t vsw_obp_ver_proto_workaround; 237 extern uint32_t vsw_publish_macaddr_count; 238 extern boolean_t vsw_jumbo_rxpools; 239 240 #define LDC_ENTER_LOCK(ldcp) \ 241 mutex_enter(&((ldcp)->ldc_cblock));\ 242 mutex_enter(&((ldcp)->ldc_rxlock));\ 243 mutex_enter(&((ldcp)->ldc_txlock)); 244 #define LDC_EXIT_LOCK(ldcp) \ 245 mutex_exit(&((ldcp)->ldc_txlock));\ 246 mutex_exit(&((ldcp)->ldc_rxlock));\ 247 mutex_exit(&((ldcp)->ldc_cblock)); 248 249 #define VSW_VER_EQ(ldcp, major, minor) \ 250 ((ldcp)->lane_out.ver_major == (major) && \ 251 (ldcp)->lane_out.ver_minor == (minor)) 252 253 #define VSW_VER_LT(ldcp, major, minor) \ 254 (((ldcp)->lane_out.ver_major < (major)) || \ 255 ((ldcp)->lane_out.ver_major == (major) && \ 256 (ldcp)->lane_out.ver_minor < (minor))) 257 258 #define VSW_VER_GTEQ(ldcp, major, minor) \ 259 (((ldcp)->lane_out.ver_major > (major)) || \ 260 ((ldcp)->lane_out.ver_major == (major) && \ 261 (ldcp)->lane_out.ver_minor >= (minor))) 262 263 /* supported versions */ 264 static ver_sup_t vsw_versions[] = { {1, 4} }; 265 266 /* 267 * For the moment the state dump routines have their own 268 * private flag. 269 */ 270 #define DUMP_STATE 0 271 272 #if DUMP_STATE 273 274 #define DUMP_TAG(tag) \ 275 { \ 276 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 277 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 278 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 279 } 280 281 #define DUMP_TAG_PTR(tag) \ 282 { \ 283 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 284 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 285 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 286 } 287 288 #define DUMP_FLAGS(flags) dump_flags(flags); 289 #define DISPLAY_STATE() display_state() 290 291 #else 292 293 #define DUMP_TAG(tag) 294 #define DUMP_TAG_PTR(tag) 295 #define DUMP_FLAGS(state) 296 #define DISPLAY_STATE() 297 298 #endif /* DUMP_STATE */ 299 300 /* 301 * Attach the specified port. 302 * 303 * Returns 0 on success, 1 on failure. 304 */ 305 int 306 vsw_port_attach(vsw_port_t *port) 307 { 308 vsw_t *vswp = port->p_vswp; 309 vsw_port_list_t *plist = &vswp->plist; 310 vsw_port_t *p, **pp; 311 int i; 312 int nids = port->num_ldcs; 313 uint64_t *ldcids; 314 int rv; 315 316 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 317 318 /* port already exists? */ 319 READ_ENTER(&plist->lockrw); 320 for (p = plist->head; p != NULL; p = p->p_next) { 321 if (p->p_instance == port->p_instance) { 322 DWARN(vswp, "%s: port instance %d already attached", 323 __func__, p->p_instance); 324 RW_EXIT(&plist->lockrw); 325 return (1); 326 } 327 } 328 RW_EXIT(&plist->lockrw); 329 330 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 331 332 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 333 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 334 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 335 336 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 337 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 338 port->state = VSW_PORT_INIT; 339 340 D2(vswp, "%s: %d nids", __func__, nids); 341 ldcids = port->ldc_ids; 342 for (i = 0; i < nids; i++) { 343 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 344 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 345 DERR(vswp, "%s: ldc_attach failed", __func__); 346 goto exit_error; 347 } 348 } 349 350 if (vswp->switching_setup_done == B_TRUE) { 351 /* 352 * If the underlying network device has been setup, 353 * then open a mac client and porgram the mac address 354 * for this port. 355 */ 356 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 357 if (rv != 0) { 358 goto exit_error; 359 } 360 } 361 362 /* create the fdb entry for this port/mac address */ 363 vsw_fdbe_add(vswp, port); 364 365 vsw_create_vlans(port, VSW_VNETPORT); 366 367 WRITE_ENTER(&plist->lockrw); 368 369 /* link it into the list of ports for this vsw instance */ 370 pp = (vsw_port_t **)(&plist->head); 371 port->p_next = *pp; 372 *pp = port; 373 plist->num_ports++; 374 375 RW_EXIT(&plist->lockrw); 376 377 /* 378 * Initialise the port and any ldc's under it. 379 */ 380 (void) vsw_init_ldcs(port); 381 382 /* announce macaddr of vnet to the physical switch */ 383 if (vsw_publish_macaddr_count != 0) { /* enabled */ 384 vsw_publish_macaddr(vswp, port); 385 } 386 387 D1(vswp, "%s: exit", __func__); 388 return (0); 389 390 exit_error: 391 rw_destroy(&port->p_ldclist.lockrw); 392 393 cv_destroy(&port->state_cv); 394 mutex_destroy(&port->state_lock); 395 396 rw_destroy(&port->maccl_rwlock); 397 mutex_destroy(&port->tx_lock); 398 mutex_destroy(&port->mca_lock); 399 kmem_free(port, sizeof (vsw_port_t)); 400 return (1); 401 } 402 403 /* 404 * Detach the specified port. 405 * 406 * Returns 0 on success, 1 on failure. 407 */ 408 int 409 vsw_port_detach(vsw_t *vswp, int p_instance) 410 { 411 vsw_port_t *port = NULL; 412 vsw_port_list_t *plist = &vswp->plist; 413 414 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 415 416 WRITE_ENTER(&plist->lockrw); 417 418 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 419 RW_EXIT(&plist->lockrw); 420 return (1); 421 } 422 423 if (vsw_plist_del_node(vswp, port)) { 424 RW_EXIT(&plist->lockrw); 425 return (1); 426 } 427 428 /* cleanup any HybridIO for this port */ 429 vsw_hio_stop_port(port); 430 431 /* 432 * No longer need to hold writer lock on port list now 433 * that we have unlinked the target port from the list. 434 */ 435 RW_EXIT(&plist->lockrw); 436 437 /* Cleanup and close the mac client */ 438 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 439 440 /* Remove the fdb entry for this port/mac address */ 441 vsw_fdbe_del(vswp, &(port->p_macaddr)); 442 vsw_destroy_vlans(port, VSW_VNETPORT); 443 444 /* Remove any multicast addresses.. */ 445 vsw_del_mcst_port(port); 446 447 vsw_port_delete(port); 448 449 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 450 return (0); 451 } 452 453 /* 454 * Detach all active ports. 455 */ 456 void 457 vsw_detach_ports(vsw_t *vswp) 458 { 459 vsw_port_list_t *plist = &vswp->plist; 460 vsw_port_t *port = NULL; 461 462 D1(vswp, "%s: enter", __func__); 463 464 WRITE_ENTER(&plist->lockrw); 465 466 while ((port = plist->head) != NULL) { 467 (void) vsw_plist_del_node(vswp, port); 468 469 /* cleanup any HybridIO for this port */ 470 vsw_hio_stop_port(port); 471 472 /* Cleanup and close the mac client */ 473 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 474 475 /* Remove the fdb entry for this port/mac address */ 476 vsw_fdbe_del(vswp, &(port->p_macaddr)); 477 vsw_destroy_vlans(port, VSW_VNETPORT); 478 479 /* Remove any multicast addresses.. */ 480 vsw_del_mcst_port(port); 481 482 /* 483 * No longer need to hold the lock on the port list 484 * now that we have unlinked the target port from the 485 * list. 486 */ 487 RW_EXIT(&plist->lockrw); 488 vsw_port_delete(port); 489 WRITE_ENTER(&plist->lockrw); 490 } 491 RW_EXIT(&plist->lockrw); 492 493 D1(vswp, "%s: exit", __func__); 494 } 495 496 /* 497 * Delete the specified port. 498 */ 499 static void 500 vsw_port_delete(vsw_port_t *port) 501 { 502 vsw_ldc_list_t *ldcl; 503 vsw_t *vswp = port->p_vswp; 504 int num_ldcs; 505 506 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 507 508 vsw_uninit_ldcs(port); 509 510 /* 511 * Wait for any pending ctrl msg tasks which reference this 512 * port to finish. 513 */ 514 vsw_drain_port_taskq(port); 515 516 /* 517 * Wait for any active callbacks to finish 518 */ 519 vsw_drain_ldcs(port); 520 521 ldcl = &port->p_ldclist; 522 num_ldcs = port->num_ldcs; 523 WRITE_ENTER(&ldcl->lockrw); 524 while (num_ldcs > 0) { 525 vsw_ldc_detach(port, ldcl->head->ldc_id); 526 num_ldcs--; 527 } 528 RW_EXIT(&ldcl->lockrw); 529 530 rw_destroy(&port->p_ldclist.lockrw); 531 532 rw_destroy(&port->maccl_rwlock); 533 mutex_destroy(&port->mca_lock); 534 mutex_destroy(&port->tx_lock); 535 536 cv_destroy(&port->state_cv); 537 mutex_destroy(&port->state_lock); 538 539 if (port->num_ldcs != 0) { 540 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 541 port->num_ldcs = 0; 542 } 543 544 if (port->nvids != 0) { 545 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 546 } 547 548 kmem_free(port, sizeof (vsw_port_t)); 549 550 D1(vswp, "%s: exit", __func__); 551 } 552 553 static int 554 vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp) 555 { 556 size_t data_sz; 557 int rv; 558 uint32_t sz1 = 0; 559 uint32_t sz2 = 0; 560 uint32_t sz3 = 0; 561 uint32_t sz4 = 0; 562 563 /* 564 * We round up the mtu specified to be a multiple of 2K to limit the 565 * number of rx buffer pools created for a given mtu. 566 */ 567 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 568 data_sz = VNET_ROUNDUP_2K(data_sz); 569 570 /* 571 * If pool sizes are specified, use them. Note that the presence of 572 * the first tunable will be used as a hint. 573 */ 574 if (vsw_mblk_size1 != 0) { 575 sz1 = vsw_mblk_size1; 576 sz2 = vsw_mblk_size2; 577 sz3 = vsw_mblk_size3; 578 sz4 = vsw_mblk_size4; 579 580 if (sz4 == 0) { /* need 3 pools */ 581 582 ldcp->max_rxpool_size = sz3; 583 rv = vio_init_multipools(&ldcp->vmp, 584 VSW_NUM_VMPOOLS, sz1, sz2, sz3, 585 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 586 587 } else { 588 589 ldcp->max_rxpool_size = sz4; 590 rv = vio_init_multipools(&ldcp->vmp, 591 VSW_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 592 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 593 vsw_num_mblks4); 594 595 } 596 597 return (rv); 598 } 599 600 /* 601 * Pool sizes are not specified. We select the pool sizes based on the 602 * mtu if vnet_jumbo_rxpools is enabled. 603 */ 604 if (vsw_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 605 /* 606 * Receive buffer pool allocation based on mtu is disabled. 607 * Use the default mechanism of standard size pool allocation. 608 */ 609 sz1 = VSW_MBLK_SZ_128; 610 sz2 = VSW_MBLK_SZ_256; 611 sz3 = VSW_MBLK_SZ_2048; 612 ldcp->max_rxpool_size = sz3; 613 614 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 615 sz1, sz2, sz3, 616 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 617 618 return (rv); 619 } 620 621 switch (data_sz) { 622 623 case VNET_4K: 624 625 sz1 = VSW_MBLK_SZ_128; 626 sz2 = VSW_MBLK_SZ_256; 627 sz3 = VSW_MBLK_SZ_2048; 628 sz4 = sz3 << 1; /* 4K */ 629 ldcp->max_rxpool_size = sz4; 630 631 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 632 sz1, sz2, sz3, sz4, 633 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 634 vsw_num_mblks4); 635 break; 636 637 default: /* data_sz: 4K+ to 16K */ 638 639 sz1 = VSW_MBLK_SZ_256; 640 sz2 = VSW_MBLK_SZ_2048; 641 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 642 sz4 = data_sz; /* Jumbo-size */ 643 ldcp->max_rxpool_size = sz4; 644 645 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 646 sz1, sz2, sz3, sz4, 647 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 648 vsw_num_mblks4); 649 break; 650 } 651 652 return (rv); 653 654 } 655 656 /* 657 * Attach a logical domain channel (ldc) under a specified port. 658 * 659 * Returns 0 on success, 1 on failure. 660 */ 661 static int 662 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 663 { 664 vsw_t *vswp = port->p_vswp; 665 vsw_ldc_list_t *ldcl = &port->p_ldclist; 666 vsw_ldc_t *ldcp = NULL; 667 ldc_attr_t attr; 668 ldc_status_t istatus; 669 int status = DDI_FAILURE; 670 char kname[MAXNAMELEN]; 671 enum { PROG_init = 0x0, 672 PROG_callback = 0x1, PROG_rx_thread = 0x2, 673 PROG_tx_thread = 0x4} 674 progress; 675 676 progress = PROG_init; 677 678 D1(vswp, "%s: enter", __func__); 679 680 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 681 if (ldcp == NULL) { 682 DERR(vswp, "%s: kmem_zalloc failed", __func__); 683 return (1); 684 } 685 ldcp->ldc_id = ldc_id; 686 687 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 688 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 689 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 690 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 691 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 692 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 693 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 694 695 /* required for handshake with peer */ 696 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 697 ldcp->peer_session = 0; 698 ldcp->session_status = 0; 699 ldcp->hss_id = 1; /* Initial handshake session id */ 700 701 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 702 703 /* only set for outbound lane, inbound set by peer */ 704 vsw_set_lane_attr(vswp, &ldcp->lane_out); 705 706 attr.devclass = LDC_DEV_NT_SVC; 707 attr.instance = ddi_get_instance(vswp->dip); 708 attr.mode = LDC_MODE_UNRELIABLE; 709 attr.mtu = VSW_LDC_MTU; 710 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 711 if (status != 0) { 712 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 713 __func__, ldc_id, status); 714 goto ldc_attach_fail; 715 } 716 717 if (vsw_ldc_rxthr_enabled) { 718 ldcp->rx_thr_flags = 0; 719 720 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 721 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 722 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 723 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 724 725 progress |= PROG_rx_thread; 726 if (ldcp->rx_thread == NULL) { 727 DWARN(vswp, "%s(%lld): Failed to create worker thread", 728 __func__, ldc_id); 729 goto ldc_attach_fail; 730 } 731 } 732 733 if (vsw_ldc_txthr_enabled) { 734 ldcp->tx_thr_flags = 0; 735 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 736 737 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 738 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 739 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 740 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 741 742 progress |= PROG_tx_thread; 743 if (ldcp->tx_thread == NULL) { 744 DWARN(vswp, "%s(%lld): Failed to create worker thread", 745 __func__, ldc_id); 746 goto ldc_attach_fail; 747 } 748 } 749 750 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 751 if (status != 0) { 752 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 753 __func__, ldc_id, status); 754 (void) ldc_fini(ldcp->ldc_handle); 755 goto ldc_attach_fail; 756 } 757 /* 758 * allocate a message for ldc_read()s, big enough to hold ctrl and 759 * data msgs, including raw data msgs used to recv priority frames. 760 */ 761 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 762 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 763 764 progress |= PROG_callback; 765 766 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 767 768 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 769 DERR(vswp, "%s: ldc_status failed", __func__); 770 mutex_destroy(&ldcp->status_lock); 771 goto ldc_attach_fail; 772 } 773 774 ldcp->ldc_status = istatus; 775 ldcp->ldc_port = port; 776 ldcp->ldc_vswp = vswp; 777 778 vsw_reset_vnet_proto_ops(ldcp); 779 780 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 781 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 782 kname, &ldcp->ldc_stats); 783 if (ldcp->ksp == NULL) { 784 DERR(vswp, "%s: kstats setup failed", __func__); 785 goto ldc_attach_fail; 786 } 787 788 /* link it into the list of channels for this port */ 789 WRITE_ENTER(&ldcl->lockrw); 790 ldcp->ldc_next = ldcl->head; 791 ldcl->head = ldcp; 792 RW_EXIT(&ldcl->lockrw); 793 794 D1(vswp, "%s: exit", __func__); 795 return (0); 796 797 ldc_attach_fail: 798 799 if (progress & PROG_callback) { 800 (void) ldc_unreg_callback(ldcp->ldc_handle); 801 kmem_free(ldcp->ldcmsg, ldcp->msglen); 802 } 803 804 if (progress & PROG_rx_thread) { 805 if (ldcp->rx_thread != NULL) { 806 vsw_stop_rx_thread(ldcp); 807 } 808 mutex_destroy(&ldcp->rx_thr_lock); 809 cv_destroy(&ldcp->rx_thr_cv); 810 } 811 812 if (progress & PROG_tx_thread) { 813 if (ldcp->tx_thread != NULL) { 814 vsw_stop_tx_thread(ldcp); 815 } 816 mutex_destroy(&ldcp->tx_thr_lock); 817 cv_destroy(&ldcp->tx_thr_cv); 818 } 819 if (ldcp->ksp != NULL) { 820 vgen_destroy_kstats(ldcp->ksp); 821 } 822 mutex_destroy(&ldcp->ldc_txlock); 823 mutex_destroy(&ldcp->ldc_rxlock); 824 mutex_destroy(&ldcp->ldc_cblock); 825 mutex_destroy(&ldcp->drain_cv_lock); 826 827 cv_destroy(&ldcp->drain_cv); 828 829 rw_destroy(&ldcp->lane_in.dlistrw); 830 rw_destroy(&ldcp->lane_out.dlistrw); 831 832 kmem_free(ldcp, sizeof (vsw_ldc_t)); 833 834 return (1); 835 } 836 837 /* 838 * Detach a logical domain channel (ldc) belonging to a 839 * particular port. 840 */ 841 static void 842 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 843 { 844 vsw_t *vswp = port->p_vswp; 845 vsw_ldc_t *ldcp, *prev_ldcp; 846 vsw_ldc_list_t *ldcl = &port->p_ldclist; 847 int rv; 848 int retries = 0; 849 850 prev_ldcp = ldcl->head; 851 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 852 if (ldcp->ldc_id == ldc_id) { 853 break; 854 } 855 } 856 857 /* specified ldc id not found */ 858 ASSERT(ldcp != NULL); 859 860 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 861 862 /* Stop the receive thread */ 863 if (ldcp->rx_thread != NULL) { 864 vsw_stop_rx_thread(ldcp); 865 mutex_destroy(&ldcp->rx_thr_lock); 866 cv_destroy(&ldcp->rx_thr_cv); 867 } 868 kmem_free(ldcp->ldcmsg, ldcp->msglen); 869 870 /* Stop the tx thread */ 871 if (ldcp->tx_thread != NULL) { 872 vsw_stop_tx_thread(ldcp); 873 mutex_destroy(&ldcp->tx_thr_lock); 874 cv_destroy(&ldcp->tx_thr_cv); 875 if (ldcp->tx_mhead != NULL) { 876 freemsgchain(ldcp->tx_mhead); 877 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 878 ldcp->tx_cnt = 0; 879 } 880 } 881 882 /* Destory kstats */ 883 vgen_destroy_kstats(ldcp->ksp); 884 885 /* 886 * Before we can close the channel we must release any mapped 887 * resources (e.g. drings). 888 */ 889 vsw_free_lane_resources(ldcp, INBOUND); 890 vsw_free_lane_resources(ldcp, OUTBOUND); 891 892 /* 893 * Close the channel, retry on EAAGIN. 894 */ 895 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 896 if (++retries > vsw_ldc_retries) { 897 break; 898 } 899 drv_usecwait(vsw_ldc_delay); 900 } 901 if (rv != 0) { 902 cmn_err(CE_NOTE, 903 "!vsw%d: Error(%d) closing the channel(0x%lx)\n", 904 vswp->instance, rv, ldcp->ldc_id); 905 } 906 907 (void) ldc_fini(ldcp->ldc_handle); 908 909 ldcp->ldc_status = LDC_INIT; 910 ldcp->ldc_handle = NULL; 911 ldcp->ldc_vswp = NULL; 912 913 914 /* 915 * Most likely some mblks are still in use and 916 * have not been returned to the pool. These mblks are 917 * added to the pool that is maintained in the device instance. 918 * Another attempt will be made to destroy the pool 919 * when the device detaches. 920 */ 921 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 922 923 /* unlink it from the list */ 924 prev_ldcp = ldcp->ldc_next; 925 926 mutex_destroy(&ldcp->ldc_txlock); 927 mutex_destroy(&ldcp->ldc_rxlock); 928 mutex_destroy(&ldcp->ldc_cblock); 929 cv_destroy(&ldcp->drain_cv); 930 mutex_destroy(&ldcp->drain_cv_lock); 931 mutex_destroy(&ldcp->status_lock); 932 rw_destroy(&ldcp->lane_in.dlistrw); 933 rw_destroy(&ldcp->lane_out.dlistrw); 934 935 kmem_free(ldcp, sizeof (vsw_ldc_t)); 936 } 937 938 /* 939 * Open and attempt to bring up the channel. Note that channel 940 * can only be brought up if peer has also opened channel. 941 * 942 * Returns 0 if can open and bring up channel, otherwise 943 * returns 1. 944 */ 945 static int 946 vsw_ldc_init(vsw_ldc_t *ldcp) 947 { 948 vsw_t *vswp = ldcp->ldc_vswp; 949 ldc_status_t istatus = 0; 950 int rv; 951 952 D1(vswp, "%s: enter", __func__); 953 954 LDC_ENTER_LOCK(ldcp); 955 956 /* don't start at 0 in case clients don't like that */ 957 ldcp->next_ident = 1; 958 959 rv = ldc_open(ldcp->ldc_handle); 960 if (rv != 0) { 961 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 962 __func__, ldcp->ldc_id, rv); 963 LDC_EXIT_LOCK(ldcp); 964 return (1); 965 } 966 967 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 968 DERR(vswp, "%s: unable to get status", __func__); 969 LDC_EXIT_LOCK(ldcp); 970 return (1); 971 972 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 973 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 974 __func__, ldcp->ldc_id, istatus); 975 LDC_EXIT_LOCK(ldcp); 976 return (1); 977 } 978 979 mutex_enter(&ldcp->status_lock); 980 ldcp->ldc_status = istatus; 981 mutex_exit(&ldcp->status_lock); 982 983 rv = ldc_up(ldcp->ldc_handle); 984 if (rv != 0) { 985 /* 986 * Not a fatal error for ldc_up() to fail, as peer 987 * end point may simply not be ready yet. 988 */ 989 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 990 ldcp->ldc_id, rv); 991 LDC_EXIT_LOCK(ldcp); 992 return (1); 993 } 994 995 /* 996 * ldc_up() call is non-blocking so need to explicitly 997 * check channel status to see if in fact the channel 998 * is UP. 999 */ 1000 mutex_enter(&ldcp->status_lock); 1001 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 1002 DERR(vswp, "%s: unable to get status", __func__); 1003 mutex_exit(&ldcp->status_lock); 1004 LDC_EXIT_LOCK(ldcp); 1005 return (1); 1006 1007 } 1008 1009 if (ldcp->ldc_status == LDC_UP) { 1010 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 1011 ldcp->ldc_id, istatus); 1012 mutex_exit(&ldcp->status_lock); 1013 LDC_EXIT_LOCK(ldcp); 1014 1015 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1016 return (0); 1017 } 1018 1019 mutex_exit(&ldcp->status_lock); 1020 LDC_EXIT_LOCK(ldcp); 1021 1022 D1(vswp, "%s: exit", __func__); 1023 return (0); 1024 } 1025 1026 /* disable callbacks on the channel */ 1027 static void 1028 vsw_ldc_uninit(vsw_ldc_t *ldcp) 1029 { 1030 vsw_t *vswp = ldcp->ldc_vswp; 1031 int rv; 1032 1033 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 1034 1035 LDC_ENTER_LOCK(ldcp); 1036 1037 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 1038 if (rv != 0) { 1039 cmn_err(CE_NOTE, "!vsw_ldc_uninit(%ld): error disabling " 1040 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 1041 } 1042 1043 mutex_enter(&ldcp->status_lock); 1044 ldcp->ldc_status = LDC_INIT; 1045 mutex_exit(&ldcp->status_lock); 1046 1047 LDC_EXIT_LOCK(ldcp); 1048 1049 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 1050 } 1051 1052 static int 1053 vsw_init_ldcs(vsw_port_t *port) 1054 { 1055 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1056 vsw_ldc_t *ldcp; 1057 1058 READ_ENTER(&ldcl->lockrw); 1059 ldcp = ldcl->head; 1060 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1061 (void) vsw_ldc_init(ldcp); 1062 } 1063 RW_EXIT(&ldcl->lockrw); 1064 1065 return (0); 1066 } 1067 1068 static void 1069 vsw_uninit_ldcs(vsw_port_t *port) 1070 { 1071 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1072 vsw_ldc_t *ldcp; 1073 1074 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1075 1076 READ_ENTER(&ldcl->lockrw); 1077 ldcp = ldcl->head; 1078 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1079 vsw_ldc_uninit(ldcp); 1080 } 1081 RW_EXIT(&ldcl->lockrw); 1082 1083 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1084 } 1085 1086 /* 1087 * Wait until the callback(s) associated with the ldcs under the specified 1088 * port have completed. 1089 * 1090 * Prior to this function being invoked each channel under this port 1091 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1092 * 1093 * A short explaination of what we are doing below.. 1094 * 1095 * The simplest approach would be to have a reference counter in 1096 * the ldc structure which is increment/decremented by the callbacks as 1097 * they use the channel. The drain function could then simply disable any 1098 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1099 * there is a tiny window here - before the callback is able to get the lock 1100 * on the channel it is interrupted and this function gets to execute. It 1101 * sees that the ref count is zero and believes its free to delete the 1102 * associated data structures. 1103 * 1104 * We get around this by taking advantage of the fact that before the ldc 1105 * framework invokes a callback it sets a flag to indicate that there is a 1106 * callback active (or about to become active). If when we attempt to 1107 * unregister a callback when this active flag is set then the unregister 1108 * will fail with EWOULDBLOCK. 1109 * 1110 * If the unregister fails we do a cv_timedwait. We will either be signaled 1111 * by the callback as it is exiting (note we have to wait a short period to 1112 * allow the callback to return fully to the ldc framework and it to clear 1113 * the active flag), or by the timer expiring. In either case we again attempt 1114 * the unregister. We repeat this until we can succesfully unregister the 1115 * callback. 1116 * 1117 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1118 * the case where the callback has finished but the ldc framework has not yet 1119 * cleared the active flag. In this case we would never get a cv_signal. 1120 */ 1121 static void 1122 vsw_drain_ldcs(vsw_port_t *port) 1123 { 1124 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1125 vsw_ldc_t *ldcp; 1126 vsw_t *vswp = port->p_vswp; 1127 1128 D1(vswp, "%s: enter", __func__); 1129 1130 READ_ENTER(&ldcl->lockrw); 1131 1132 ldcp = ldcl->head; 1133 1134 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1135 /* 1136 * If we can unregister the channel callback then we 1137 * know that there is no callback either running or 1138 * scheduled to run for this channel so move on to next 1139 * channel in the list. 1140 */ 1141 mutex_enter(&ldcp->drain_cv_lock); 1142 1143 /* prompt active callbacks to quit */ 1144 ldcp->drain_state = VSW_LDC_DRAINING; 1145 1146 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1147 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1148 ldcp->ldc_id); 1149 mutex_exit(&ldcp->drain_cv_lock); 1150 continue; 1151 } else { 1152 /* 1153 * If we end up here we know that either 1) a callback 1154 * is currently executing, 2) is about to start (i.e. 1155 * the ldc framework has set the active flag but 1156 * has not actually invoked the callback yet, or 3) 1157 * has finished and has returned to the ldc framework 1158 * but the ldc framework has not yet cleared the 1159 * active bit. 1160 * 1161 * Wait for it to finish. 1162 */ 1163 while (ldc_unreg_callback(ldcp->ldc_handle) 1164 == EWOULDBLOCK) 1165 (void) cv_timedwait(&ldcp->drain_cv, 1166 &ldcp->drain_cv_lock, lbolt + hz); 1167 1168 mutex_exit(&ldcp->drain_cv_lock); 1169 D2(vswp, "%s: unreg callback for chan %ld after " 1170 "timeout", __func__, ldcp->ldc_id); 1171 } 1172 } 1173 RW_EXIT(&ldcl->lockrw); 1174 1175 D1(vswp, "%s: exit", __func__); 1176 } 1177 1178 /* 1179 * Wait until all tasks which reference this port have completed. 1180 * 1181 * Prior to this function being invoked each channel under this port 1182 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1183 */ 1184 static void 1185 vsw_drain_port_taskq(vsw_port_t *port) 1186 { 1187 vsw_t *vswp = port->p_vswp; 1188 1189 D1(vswp, "%s: enter", __func__); 1190 1191 /* 1192 * Mark the port as in the process of being detached, and 1193 * dispatch a marker task to the queue so we know when all 1194 * relevant tasks have completed. 1195 */ 1196 mutex_enter(&port->state_lock); 1197 port->state = VSW_PORT_DETACHING; 1198 1199 if ((vswp->taskq_p == NULL) || 1200 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1201 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1202 cmn_err(CE_NOTE, "!vsw%d: unable to dispatch marker task", 1203 vswp->instance); 1204 mutex_exit(&port->state_lock); 1205 return; 1206 } 1207 1208 /* 1209 * Wait for the marker task to finish. 1210 */ 1211 while (port->state != VSW_PORT_DETACHABLE) 1212 cv_wait(&port->state_cv, &port->state_lock); 1213 1214 mutex_exit(&port->state_lock); 1215 1216 D1(vswp, "%s: exit", __func__); 1217 } 1218 1219 static void 1220 vsw_marker_task(void *arg) 1221 { 1222 vsw_port_t *port = arg; 1223 vsw_t *vswp = port->p_vswp; 1224 1225 D1(vswp, "%s: enter", __func__); 1226 1227 mutex_enter(&port->state_lock); 1228 1229 /* 1230 * No further tasks should be dispatched which reference 1231 * this port so ok to mark it as safe to detach. 1232 */ 1233 port->state = VSW_PORT_DETACHABLE; 1234 1235 cv_signal(&port->state_cv); 1236 1237 mutex_exit(&port->state_lock); 1238 1239 D1(vswp, "%s: exit", __func__); 1240 } 1241 1242 vsw_port_t * 1243 vsw_lookup_port(vsw_t *vswp, int p_instance) 1244 { 1245 vsw_port_list_t *plist = &vswp->plist; 1246 vsw_port_t *port; 1247 1248 for (port = plist->head; port != NULL; port = port->p_next) { 1249 if (port->p_instance == p_instance) { 1250 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1251 return (port); 1252 } 1253 } 1254 1255 return (NULL); 1256 } 1257 1258 void 1259 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1260 { 1261 vsw_ldc_list_t *ldclp; 1262 vsw_ldc_t *ldcp; 1263 1264 ldclp = &portp->p_ldclist; 1265 1266 READ_ENTER(&ldclp->lockrw); 1267 1268 /* 1269 * NOTE: for now, we will assume we have a single channel. 1270 */ 1271 if (ldclp->head == NULL) { 1272 RW_EXIT(&ldclp->lockrw); 1273 return; 1274 } 1275 ldcp = ldclp->head; 1276 1277 mutex_enter(&ldcp->ldc_cblock); 1278 1279 /* 1280 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1281 * the connection. See comments in vsw_set_vnet_proto_ops(). 1282 */ 1283 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1284 portp->nvids != 0) { 1285 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1286 } 1287 1288 mutex_exit(&ldcp->ldc_cblock); 1289 1290 RW_EXIT(&ldclp->lockrw); 1291 } 1292 1293 void 1294 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1295 { 1296 vsw_ldc_list_t *ldclp; 1297 vsw_ldc_t *ldcp; 1298 1299 ldclp = &portp->p_ldclist; 1300 1301 READ_ENTER(&ldclp->lockrw); 1302 1303 /* 1304 * NOTE: for now, we will assume we have a single channel. 1305 */ 1306 if (ldclp->head == NULL) { 1307 RW_EXIT(&ldclp->lockrw); 1308 return; 1309 } 1310 ldcp = ldclp->head; 1311 1312 mutex_enter(&ldcp->ldc_cblock); 1313 1314 /* 1315 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1316 * to trigger re-negotiation, which inturn trigger HybridIO 1317 * setup/cleanup. 1318 */ 1319 if ((ldcp->hphase == VSW_MILESTONE4) && 1320 (portp->p_hio_capable == B_TRUE)) { 1321 if (immediate == B_TRUE) { 1322 (void) ldc_down(ldcp->ldc_handle); 1323 } else { 1324 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1325 } 1326 } 1327 1328 mutex_exit(&ldcp->ldc_cblock); 1329 1330 RW_EXIT(&ldclp->lockrw); 1331 } 1332 1333 void 1334 vsw_port_reset(vsw_port_t *portp) 1335 { 1336 vsw_ldc_list_t *ldclp; 1337 vsw_ldc_t *ldcp; 1338 1339 ldclp = &portp->p_ldclist; 1340 1341 READ_ENTER(&ldclp->lockrw); 1342 1343 /* 1344 * NOTE: for now, we will assume we have a single channel. 1345 */ 1346 if (ldclp->head == NULL) { 1347 RW_EXIT(&ldclp->lockrw); 1348 return; 1349 } 1350 ldcp = ldclp->head; 1351 1352 mutex_enter(&ldcp->ldc_cblock); 1353 1354 /* 1355 * reset channel and terminate the connection. 1356 */ 1357 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1358 1359 mutex_exit(&ldcp->ldc_cblock); 1360 1361 RW_EXIT(&ldclp->lockrw); 1362 } 1363 1364 void 1365 vsw_reset_ports(vsw_t *vswp) 1366 { 1367 vsw_port_list_t *plist = &vswp->plist; 1368 vsw_port_t *portp; 1369 1370 READ_ENTER(&plist->lockrw); 1371 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1372 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1373 vsw_hio_stop_port(portp); 1374 } 1375 vsw_port_reset(portp); 1376 } 1377 RW_EXIT(&plist->lockrw); 1378 } 1379 1380 1381 /* 1382 * Search for and remove the specified port from the port 1383 * list. Returns 0 if able to locate and remove port, otherwise 1384 * returns 1. 1385 */ 1386 static int 1387 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1388 { 1389 vsw_port_list_t *plist = &vswp->plist; 1390 vsw_port_t *curr_p, *prev_p; 1391 1392 if (plist->head == NULL) 1393 return (1); 1394 1395 curr_p = prev_p = plist->head; 1396 1397 while (curr_p != NULL) { 1398 if (curr_p == port) { 1399 if (prev_p == curr_p) { 1400 plist->head = curr_p->p_next; 1401 } else { 1402 prev_p->p_next = curr_p->p_next; 1403 } 1404 plist->num_ports--; 1405 break; 1406 } else { 1407 prev_p = curr_p; 1408 curr_p = curr_p->p_next; 1409 } 1410 } 1411 return (0); 1412 } 1413 1414 /* 1415 * Interrupt handler for ldc messages. 1416 */ 1417 static uint_t 1418 vsw_ldc_cb(uint64_t event, caddr_t arg) 1419 { 1420 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1421 vsw_t *vswp = ldcp->ldc_vswp; 1422 1423 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1424 1425 mutex_enter(&ldcp->ldc_cblock); 1426 ldcp->ldc_stats.callbacks++; 1427 1428 mutex_enter(&ldcp->status_lock); 1429 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1430 mutex_exit(&ldcp->status_lock); 1431 mutex_exit(&ldcp->ldc_cblock); 1432 return (LDC_SUCCESS); 1433 } 1434 mutex_exit(&ldcp->status_lock); 1435 1436 if (event & LDC_EVT_UP) { 1437 /* 1438 * Channel has come up. 1439 */ 1440 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1441 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1442 1443 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1444 1445 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1446 } 1447 1448 if (event & LDC_EVT_READ) { 1449 /* 1450 * Data available for reading. 1451 */ 1452 D2(vswp, "%s: id(ld) event(%llx) data READ", 1453 __func__, ldcp->ldc_id, event); 1454 1455 if (ldcp->rx_thread != NULL) { 1456 /* 1457 * If the receive thread is enabled, then 1458 * wakeup the receive thread to process the 1459 * LDC messages. 1460 */ 1461 mutex_exit(&ldcp->ldc_cblock); 1462 mutex_enter(&ldcp->rx_thr_lock); 1463 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1464 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1465 cv_signal(&ldcp->rx_thr_cv); 1466 } 1467 mutex_exit(&ldcp->rx_thr_lock); 1468 mutex_enter(&ldcp->ldc_cblock); 1469 } else { 1470 vsw_process_pkt(ldcp); 1471 } 1472 1473 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1474 1475 goto vsw_cb_exit; 1476 } 1477 1478 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1479 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1480 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1481 1482 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1483 } 1484 1485 /* 1486 * Catch either LDC_EVT_WRITE which we don't support or any 1487 * unknown event. 1488 */ 1489 if (event & 1490 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1491 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1492 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1493 } 1494 1495 vsw_cb_exit: 1496 mutex_exit(&ldcp->ldc_cblock); 1497 1498 /* 1499 * Let the drain function know we are finishing if it 1500 * is waiting. 1501 */ 1502 mutex_enter(&ldcp->drain_cv_lock); 1503 if (ldcp->drain_state == VSW_LDC_DRAINING) 1504 cv_signal(&ldcp->drain_cv); 1505 mutex_exit(&ldcp->drain_cv_lock); 1506 1507 return (LDC_SUCCESS); 1508 } 1509 1510 /* 1511 * Reinitialise data structures associated with the channel. 1512 */ 1513 static void 1514 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1515 { 1516 vsw_t *vswp = ldcp->ldc_vswp; 1517 vsw_port_t *port; 1518 vsw_ldc_list_t *ldcl; 1519 1520 D1(vswp, "%s: enter", __func__); 1521 1522 /* free receive mblk pools for the channel */ 1523 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 1524 1525 port = ldcp->ldc_port; 1526 ldcl = &port->p_ldclist; 1527 1528 READ_ENTER(&ldcl->lockrw); 1529 1530 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1531 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1532 1533 vsw_free_lane_resources(ldcp, INBOUND); 1534 vsw_free_lane_resources(ldcp, OUTBOUND); 1535 RW_EXIT(&ldcl->lockrw); 1536 1537 ldcp->lane_in.lstate = 0; 1538 ldcp->lane_out.lstate = 0; 1539 1540 /* Remove the fdb entry for this port/mac address */ 1541 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1542 1543 /* remove the port from vlans it has been assigned to */ 1544 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1545 1546 /* 1547 * Remove parent port from any multicast groups 1548 * it may have registered with. Client must resend 1549 * multicast add command after handshake completes. 1550 */ 1551 vsw_del_mcst_port(port); 1552 1553 ldcp->peer_session = 0; 1554 ldcp->session_status = 0; 1555 ldcp->hcnt = 0; 1556 ldcp->hphase = VSW_MILESTONE0; 1557 1558 vsw_reset_vnet_proto_ops(ldcp); 1559 1560 D1(vswp, "%s: exit", __func__); 1561 } 1562 1563 /* 1564 * Process a connection event. 1565 * 1566 * Note - care must be taken to ensure that this function is 1567 * not called with the dlistrw lock held. 1568 */ 1569 static void 1570 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1571 { 1572 vsw_t *vswp = ldcp->ldc_vswp; 1573 vsw_conn_evt_t *conn = NULL; 1574 1575 D1(vswp, "%s: enter", __func__); 1576 1577 /* 1578 * Check if either a reset or restart event is pending 1579 * or in progress. If so just return. 1580 * 1581 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1582 * being received by the callback handler, or a ECONNRESET error 1583 * code being returned from a ldc_read() or ldc_write() call. 1584 * 1585 * A VSW_CONN_RESTART event occurs when some error checking code 1586 * decides that there is a problem with data from the channel, 1587 * and that the handshake should be restarted. 1588 */ 1589 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1590 (ldstub((uint8_t *)&ldcp->reset_active))) 1591 return; 1592 1593 /* 1594 * If it is an LDC_UP event we first check the recorded 1595 * state of the channel. If this is UP then we know that 1596 * the channel moving to the UP state has already been dealt 1597 * with and don't need to dispatch a new task. 1598 * 1599 * The reason for this check is that when we do a ldc_up(), 1600 * depending on the state of the peer, we may or may not get 1601 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1602 * every time we do ldc_up() we explicitly check the channel 1603 * status to see has it come up (ldc_up() is asynch and will 1604 * complete at some undefined time), and take the appropriate 1605 * action. 1606 * 1607 * The flip side of this is that we may get a LDC_UP event 1608 * when we have already seen that the channel is up and have 1609 * dealt with that. 1610 */ 1611 mutex_enter(&ldcp->status_lock); 1612 if (evt == VSW_CONN_UP) { 1613 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1614 mutex_exit(&ldcp->status_lock); 1615 return; 1616 } 1617 } 1618 mutex_exit(&ldcp->status_lock); 1619 1620 /* 1621 * The transaction group id allows us to identify and discard 1622 * any tasks which are still pending on the taskq and refer 1623 * to the handshake session we are about to restart or reset. 1624 * These stale messages no longer have any real meaning. 1625 */ 1626 (void) atomic_inc_32(&ldcp->hss_id); 1627 1628 ASSERT(vswp->taskq_p != NULL); 1629 1630 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1631 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1632 " connection event", vswp->instance); 1633 goto err_exit; 1634 } 1635 1636 conn->evt = evt; 1637 conn->ldcp = ldcp; 1638 1639 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1640 DDI_NOSLEEP) != DDI_SUCCESS) { 1641 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1642 vswp->instance); 1643 1644 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1645 goto err_exit; 1646 } 1647 1648 D1(vswp, "%s: exit", __func__); 1649 return; 1650 1651 err_exit: 1652 /* 1653 * Have mostly likely failed due to memory shortage. Clear the flag so 1654 * that future requests will at least be attempted and will hopefully 1655 * succeed. 1656 */ 1657 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1658 ldcp->reset_active = 0; 1659 } 1660 1661 /* 1662 * Deal with events relating to a connection. Invoked from a taskq. 1663 */ 1664 static void 1665 vsw_conn_task(void *arg) 1666 { 1667 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1668 vsw_ldc_t *ldcp = NULL; 1669 vsw_port_t *portp; 1670 vsw_t *vswp = NULL; 1671 uint16_t evt; 1672 ldc_status_t curr_status; 1673 1674 ldcp = conn->ldcp; 1675 evt = conn->evt; 1676 vswp = ldcp->ldc_vswp; 1677 portp = ldcp->ldc_port; 1678 1679 D1(vswp, "%s: enter", __func__); 1680 1681 /* can safely free now have copied out data */ 1682 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1683 1684 mutex_enter(&ldcp->status_lock); 1685 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1686 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1687 "channel %ld", vswp->instance, ldcp->ldc_id); 1688 mutex_exit(&ldcp->status_lock); 1689 return; 1690 } 1691 1692 /* 1693 * If we wish to restart the handshake on this channel, then if 1694 * the channel is UP we bring it DOWN to flush the underlying 1695 * ldc queue. 1696 */ 1697 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1698 (void) ldc_down(ldcp->ldc_handle); 1699 1700 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1701 vsw_hio_stop(vswp, ldcp); 1702 } 1703 1704 /* 1705 * re-init all the associated data structures. 1706 */ 1707 vsw_ldc_reinit(ldcp); 1708 1709 /* 1710 * Bring the channel back up (note it does no harm to 1711 * do this even if the channel is already UP, Just 1712 * becomes effectively a no-op). 1713 */ 1714 (void) ldc_up(ldcp->ldc_handle); 1715 1716 /* 1717 * Check if channel is now UP. This will only happen if 1718 * peer has also done a ldc_up(). 1719 */ 1720 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1721 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1722 "channel %ld", vswp->instance, ldcp->ldc_id); 1723 mutex_exit(&ldcp->status_lock); 1724 return; 1725 } 1726 1727 ldcp->ldc_status = curr_status; 1728 1729 /* channel UP so restart handshake by sending version info */ 1730 if (curr_status == LDC_UP) { 1731 if (ldcp->hcnt++ > vsw_num_handshakes) { 1732 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1733 " handshake attempts (%d) on channel %ld", 1734 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1735 mutex_exit(&ldcp->status_lock); 1736 return; 1737 } 1738 1739 if (vsw_obp_ver_proto_workaround == B_FALSE && 1740 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1741 DDI_NOSLEEP) != DDI_SUCCESS)) { 1742 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1743 vswp->instance); 1744 1745 /* 1746 * Don't count as valid restart attempt if couldn't 1747 * send version msg. 1748 */ 1749 if (ldcp->hcnt > 0) 1750 ldcp->hcnt--; 1751 } 1752 } 1753 1754 /* 1755 * Mark that the process is complete by clearing the flag. 1756 * 1757 * Note is it possible that the taskq dispatch above may have failed, 1758 * most likely due to memory shortage. We still clear the flag so 1759 * future attempts will at least be attempted and will hopefully 1760 * succeed. 1761 */ 1762 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1763 ldcp->reset_active = 0; 1764 1765 mutex_exit(&ldcp->status_lock); 1766 1767 D1(vswp, "%s: exit", __func__); 1768 } 1769 1770 /* 1771 * returns 0 if legal for event signified by flag to have 1772 * occured at the time it did. Otherwise returns 1. 1773 */ 1774 int 1775 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1776 { 1777 vsw_t *vswp = ldcp->ldc_vswp; 1778 uint64_t state; 1779 uint64_t phase; 1780 1781 if (dir == INBOUND) 1782 state = ldcp->lane_in.lstate; 1783 else 1784 state = ldcp->lane_out.lstate; 1785 1786 phase = ldcp->hphase; 1787 1788 switch (flag) { 1789 case VSW_VER_INFO_RECV: 1790 if (phase > VSW_MILESTONE0) { 1791 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1792 " when in state %d\n", ldcp->ldc_id, phase); 1793 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1794 return (1); 1795 } 1796 break; 1797 1798 case VSW_VER_ACK_RECV: 1799 case VSW_VER_NACK_RECV: 1800 if (!(state & VSW_VER_INFO_SENT)) { 1801 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1802 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1803 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1804 return (1); 1805 } else 1806 state &= ~VSW_VER_INFO_SENT; 1807 break; 1808 1809 case VSW_ATTR_INFO_RECV: 1810 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1811 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1812 " when in state %d\n", ldcp->ldc_id, phase); 1813 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1814 return (1); 1815 } 1816 break; 1817 1818 case VSW_ATTR_ACK_RECV: 1819 case VSW_ATTR_NACK_RECV: 1820 if (!(state & VSW_ATTR_INFO_SENT)) { 1821 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1822 " or ATTR_NACK when in state %d\n", 1823 ldcp->ldc_id, phase); 1824 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1825 return (1); 1826 } else 1827 state &= ~VSW_ATTR_INFO_SENT; 1828 break; 1829 1830 case VSW_DRING_INFO_RECV: 1831 if (phase < VSW_MILESTONE1) { 1832 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1833 " when in state %d\n", ldcp->ldc_id, phase); 1834 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1835 return (1); 1836 } 1837 break; 1838 1839 case VSW_DRING_ACK_RECV: 1840 case VSW_DRING_NACK_RECV: 1841 if (!(state & VSW_DRING_INFO_SENT)) { 1842 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1843 " or DRING_NACK when in state %d\n", 1844 ldcp->ldc_id, phase); 1845 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1846 return (1); 1847 } else 1848 state &= ~VSW_DRING_INFO_SENT; 1849 break; 1850 1851 case VSW_RDX_INFO_RECV: 1852 if (phase < VSW_MILESTONE3) { 1853 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1854 " when in state %d\n", ldcp->ldc_id, phase); 1855 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1856 return (1); 1857 } 1858 break; 1859 1860 case VSW_RDX_ACK_RECV: 1861 case VSW_RDX_NACK_RECV: 1862 if (!(state & VSW_RDX_INFO_SENT)) { 1863 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1864 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1865 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1866 return (1); 1867 } else 1868 state &= ~VSW_RDX_INFO_SENT; 1869 break; 1870 1871 case VSW_MCST_INFO_RECV: 1872 if (phase < VSW_MILESTONE3) { 1873 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1874 " when in state %d\n", ldcp->ldc_id, phase); 1875 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1876 return (1); 1877 } 1878 break; 1879 1880 default: 1881 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1882 ldcp->ldc_id, flag); 1883 return (1); 1884 } 1885 1886 if (dir == INBOUND) 1887 ldcp->lane_in.lstate = state; 1888 else 1889 ldcp->lane_out.lstate = state; 1890 1891 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1892 1893 return (0); 1894 } 1895 1896 void 1897 vsw_next_milestone(vsw_ldc_t *ldcp) 1898 { 1899 vsw_t *vswp = ldcp->ldc_vswp; 1900 vsw_port_t *portp = ldcp->ldc_port; 1901 1902 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1903 ldcp->ldc_id, ldcp->hphase); 1904 1905 DUMP_FLAGS(ldcp->lane_in.lstate); 1906 DUMP_FLAGS(ldcp->lane_out.lstate); 1907 1908 switch (ldcp->hphase) { 1909 1910 case VSW_MILESTONE0: 1911 /* 1912 * If we haven't started to handshake with our peer, 1913 * start to do so now. 1914 */ 1915 if (ldcp->lane_out.lstate == 0) { 1916 D2(vswp, "%s: (chan %lld) starting handshake " 1917 "with peer", __func__, ldcp->ldc_id); 1918 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1919 } 1920 1921 /* 1922 * Only way to pass this milestone is to have successfully 1923 * negotiated version info. 1924 */ 1925 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1926 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1927 1928 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1929 __func__, ldcp->ldc_id); 1930 1931 vsw_set_vnet_proto_ops(ldcp); 1932 1933 /* 1934 * Next milestone is passed when attribute 1935 * information has been successfully exchanged. 1936 */ 1937 ldcp->hphase = VSW_MILESTONE1; 1938 vsw_send_attr(ldcp); 1939 1940 } 1941 break; 1942 1943 case VSW_MILESTONE1: 1944 /* 1945 * Only way to pass this milestone is to have successfully 1946 * negotiated attribute information. 1947 */ 1948 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1949 1950 ldcp->hphase = VSW_MILESTONE2; 1951 1952 /* 1953 * If the peer device has said it wishes to 1954 * use descriptor rings then we send it our ring 1955 * info, otherwise we just set up a private ring 1956 * which we use an internal buffer 1957 */ 1958 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1959 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1960 (VSW_VER_LT(ldcp, 1, 2) && 1961 (ldcp->lane_in.xfer_mode == 1962 VIO_DRING_MODE_V1_0))) { 1963 vsw_send_dring_info(ldcp); 1964 } 1965 } 1966 break; 1967 1968 case VSW_MILESTONE2: 1969 /* 1970 * If peer has indicated in its attribute message that 1971 * it wishes to use descriptor rings then the only way 1972 * to pass this milestone is for us to have received 1973 * valid dring info. 1974 * 1975 * If peer is not using descriptor rings then just fall 1976 * through. 1977 */ 1978 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1979 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1980 (VSW_VER_LT(ldcp, 1, 2) && 1981 (ldcp->lane_in.xfer_mode == 1982 VIO_DRING_MODE_V1_0))) { 1983 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 1984 break; 1985 } 1986 1987 D2(vswp, "%s: (chan %lld) leaving milestone 2", 1988 __func__, ldcp->ldc_id); 1989 1990 ldcp->hphase = VSW_MILESTONE3; 1991 vsw_send_rdx(ldcp); 1992 break; 1993 1994 case VSW_MILESTONE3: 1995 /* 1996 * Pass this milestone when all paramaters have been 1997 * successfully exchanged and RDX sent in both directions. 1998 * 1999 * Mark outbound lane as available to transmit data. 2000 */ 2001 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 2002 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 2003 2004 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2005 __func__, ldcp->ldc_id); 2006 D2(vswp, "%s: ** handshake complete (0x%llx : " 2007 "0x%llx) **", __func__, ldcp->lane_in.lstate, 2008 ldcp->lane_out.lstate); 2009 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2010 ldcp->hphase = VSW_MILESTONE4; 2011 ldcp->hcnt = 0; 2012 DISPLAY_STATE(); 2013 /* Start HIO if enabled and capable */ 2014 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 2015 D2(vswp, "%s: start HybridIO setup", __func__); 2016 vsw_hio_start(vswp, ldcp); 2017 } 2018 } else { 2019 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 2020 __func__, ldcp->lane_in.lstate, 2021 ldcp->lane_out.lstate); 2022 } 2023 break; 2024 2025 case VSW_MILESTONE4: 2026 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2027 ldcp->ldc_id); 2028 break; 2029 2030 default: 2031 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2032 ldcp->ldc_id, ldcp->hphase); 2033 } 2034 2035 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2036 ldcp->hphase); 2037 } 2038 2039 /* 2040 * Check if major version is supported. 2041 * 2042 * Returns 0 if finds supported major number, and if necessary 2043 * adjusts the minor field. 2044 * 2045 * Returns 1 if can't match major number exactly. Sets mjor/minor 2046 * to next lowest support values, or to zero if no other values possible. 2047 */ 2048 static int 2049 vsw_supported_version(vio_ver_msg_t *vp) 2050 { 2051 int i; 2052 2053 D1(NULL, "vsw_supported_version: enter"); 2054 2055 for (i = 0; i < VSW_NUM_VER; i++) { 2056 if (vsw_versions[i].ver_major == vp->ver_major) { 2057 /* 2058 * Matching or lower major version found. Update 2059 * minor number if necessary. 2060 */ 2061 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2062 D2(NULL, "%s: adjusting minor value from %d " 2063 "to %d", __func__, vp->ver_minor, 2064 vsw_versions[i].ver_minor); 2065 vp->ver_minor = vsw_versions[i].ver_minor; 2066 } 2067 2068 return (0); 2069 } 2070 2071 /* 2072 * If the message contains a higher major version number, set 2073 * the message's major/minor versions to the current values 2074 * and return false, so this message will get resent with 2075 * these values. 2076 */ 2077 if (vsw_versions[i].ver_major < vp->ver_major) { 2078 D2(NULL, "%s: adjusting major and minor " 2079 "values to %d, %d\n", 2080 __func__, vsw_versions[i].ver_major, 2081 vsw_versions[i].ver_minor); 2082 vp->ver_major = vsw_versions[i].ver_major; 2083 vp->ver_minor = vsw_versions[i].ver_minor; 2084 return (1); 2085 } 2086 } 2087 2088 /* No match was possible, zero out fields */ 2089 vp->ver_major = 0; 2090 vp->ver_minor = 0; 2091 2092 D1(NULL, "vsw_supported_version: exit"); 2093 2094 return (1); 2095 } 2096 2097 /* 2098 * Set vnet-protocol-version dependent functions based on version. 2099 */ 2100 static void 2101 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 2102 { 2103 vsw_t *vswp = ldcp->ldc_vswp; 2104 lane_t *lp = &ldcp->lane_out; 2105 2106 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2107 /* 2108 * If the version negotiated with peer is >= 1.4(Jumbo Frame 2109 * Support), set the mtu in our attributes to max_frame_size. 2110 */ 2111 lp->mtu = vswp->max_frame_size; 2112 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 2113 /* 2114 * If the version negotiated with peer is == 1.3 (Vlan Tag 2115 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 2116 */ 2117 lp->mtu = ETHERMAX + VLAN_TAGSZ; 2118 } else { 2119 vsw_port_t *portp = ldcp->ldc_port; 2120 /* 2121 * Pre-1.3 peers expect max frame size of ETHERMAX. 2122 * We can negotiate that size with those peers provided only 2123 * pvid is defined for our peer and there are no vids. Then we 2124 * can send/recv only untagged frames of max size ETHERMAX. 2125 * Note that pvid of the peer can be different, as vsw has to 2126 * serve the vnet in that vlan even if itself is not assigned 2127 * to that vlan. 2128 */ 2129 if (portp->nvids == 0) { 2130 lp->mtu = ETHERMAX; 2131 } 2132 } 2133 2134 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2135 /* Versions >= 1.2 */ 2136 2137 if (VSW_PRI_ETH_DEFINED(vswp)) { 2138 /* 2139 * enable priority routines and pkt mode only if 2140 * at least one pri-eth-type is specified in MD. 2141 */ 2142 ldcp->tx = vsw_ldctx_pri; 2143 ldcp->rx_pktdata = vsw_process_pkt_data; 2144 2145 /* set xfer mode for vsw_send_attr() */ 2146 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2147 } else { 2148 /* no priority eth types defined in MD */ 2149 2150 ldcp->tx = vsw_ldctx; 2151 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2152 2153 /* set xfer mode for vsw_send_attr() */ 2154 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2155 } 2156 2157 } else { 2158 /* Versions prior to 1.2 */ 2159 2160 vsw_reset_vnet_proto_ops(ldcp); 2161 } 2162 } 2163 2164 /* 2165 * Reset vnet-protocol-version dependent functions to v1.0. 2166 */ 2167 static void 2168 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2169 { 2170 lane_t *lp = &ldcp->lane_out; 2171 2172 ldcp->tx = vsw_ldctx; 2173 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2174 2175 /* set xfer mode for vsw_send_attr() */ 2176 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2177 } 2178 2179 /* 2180 * Main routine for processing messages received over LDC. 2181 */ 2182 static void 2183 vsw_process_pkt(void *arg) 2184 { 2185 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2186 vsw_t *vswp = ldcp->ldc_vswp; 2187 size_t msglen; 2188 vio_msg_tag_t *tagp; 2189 uint64_t *ldcmsg; 2190 int rv = 0; 2191 2192 2193 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2194 2195 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2196 2197 ldcmsg = ldcp->ldcmsg; 2198 /* 2199 * If channel is up read messages until channel is empty. 2200 */ 2201 do { 2202 msglen = ldcp->msglen; 2203 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2204 2205 if (rv != 0) { 2206 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2207 __func__, ldcp->ldc_id, rv, msglen); 2208 } 2209 2210 /* channel has been reset */ 2211 if (rv == ECONNRESET) { 2212 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2213 break; 2214 } 2215 2216 if (msglen == 0) { 2217 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2218 ldcp->ldc_id); 2219 break; 2220 } 2221 2222 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2223 ldcp->ldc_id, msglen); 2224 2225 /* 2226 * Figure out what sort of packet we have gotten by 2227 * examining the msg tag, and then switch it appropriately. 2228 */ 2229 tagp = (vio_msg_tag_t *)ldcmsg; 2230 2231 switch (tagp->vio_msgtype) { 2232 case VIO_TYPE_CTRL: 2233 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2234 break; 2235 case VIO_TYPE_DATA: 2236 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2237 break; 2238 case VIO_TYPE_ERR: 2239 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2240 break; 2241 default: 2242 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2243 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2244 break; 2245 } 2246 } while (msglen); 2247 2248 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2249 } 2250 2251 /* 2252 * Dispatch a task to process a VIO control message. 2253 */ 2254 static void 2255 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2256 { 2257 vsw_ctrl_task_t *ctaskp = NULL; 2258 vsw_port_t *port = ldcp->ldc_port; 2259 vsw_t *vswp = port->p_vswp; 2260 2261 D1(vswp, "%s: enter", __func__); 2262 2263 /* 2264 * We need to handle RDX ACK messages in-band as once they 2265 * are exchanged it is possible that we will get an 2266 * immediate (legitimate) data packet. 2267 */ 2268 if ((tagp->vio_subtype_env == VIO_RDX) && 2269 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2270 2271 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2272 return; 2273 2274 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2275 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2276 "(ostate 0x%llx : hphase %d)", __func__, 2277 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2278 vsw_next_milestone(ldcp); 2279 return; 2280 } 2281 2282 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2283 2284 if (ctaskp == NULL) { 2285 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2286 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2287 return; 2288 } 2289 2290 ctaskp->ldcp = ldcp; 2291 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2292 ctaskp->hss_id = ldcp->hss_id; 2293 2294 /* 2295 * Dispatch task to processing taskq if port is not in 2296 * the process of being detached. 2297 */ 2298 mutex_enter(&port->state_lock); 2299 if (port->state == VSW_PORT_INIT) { 2300 if ((vswp->taskq_p == NULL) || 2301 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2302 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2303 mutex_exit(&port->state_lock); 2304 DERR(vswp, "%s: unable to dispatch task to taskq", 2305 __func__); 2306 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2307 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2308 return; 2309 } 2310 } else { 2311 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2312 DWARN(vswp, "%s: port %d detaching, not dispatching " 2313 "task", __func__, port->p_instance); 2314 } 2315 2316 mutex_exit(&port->state_lock); 2317 2318 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2319 ldcp->ldc_id); 2320 D1(vswp, "%s: exit", __func__); 2321 } 2322 2323 /* 2324 * Process a VIO ctrl message. Invoked from taskq. 2325 */ 2326 static void 2327 vsw_process_ctrl_pkt(void *arg) 2328 { 2329 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2330 vsw_ldc_t *ldcp = ctaskp->ldcp; 2331 vsw_t *vswp = ldcp->ldc_vswp; 2332 vio_msg_tag_t tag; 2333 uint16_t env; 2334 2335 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2336 2337 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2338 env = tag.vio_subtype_env; 2339 2340 /* stale pkt check */ 2341 if (ctaskp->hss_id < ldcp->hss_id) { 2342 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2343 " (%ld) handshake session", __func__, ctaskp->hss_id); 2344 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2345 return; 2346 } 2347 2348 /* session id check */ 2349 if (ldcp->session_status & VSW_PEER_SESSION) { 2350 if (ldcp->peer_session != tag.vio_sid) { 2351 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2352 __func__, ldcp->ldc_id, tag.vio_sid); 2353 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2354 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2355 return; 2356 } 2357 } 2358 2359 /* 2360 * Switch on vio_subtype envelope, then let lower routines 2361 * decide if its an INFO, ACK or NACK packet. 2362 */ 2363 switch (env) { 2364 case VIO_VER_INFO: 2365 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2366 break; 2367 case VIO_DRING_REG: 2368 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2369 break; 2370 case VIO_DRING_UNREG: 2371 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2372 break; 2373 case VIO_ATTR_INFO: 2374 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2375 break; 2376 case VNET_MCAST_INFO: 2377 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2378 break; 2379 case VIO_RDX: 2380 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2381 break; 2382 case VIO_DDS_INFO: 2383 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2384 break; 2385 default: 2386 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2387 } 2388 2389 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2390 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2391 } 2392 2393 /* 2394 * Version negotiation. We can end up here either because our peer 2395 * has responded to a handshake message we have sent it, or our peer 2396 * has initiated a handshake with us. If its the former then can only 2397 * be ACK or NACK, if its the later can only be INFO. 2398 * 2399 * If its an ACK we move to the next stage of the handshake, namely 2400 * attribute exchange. If its a NACK we see if we can specify another 2401 * version, if we can't we stop. 2402 * 2403 * If it is an INFO we reset all params associated with communication 2404 * in that direction over this channel (remember connection is 2405 * essentially 2 independent simplex channels). 2406 */ 2407 void 2408 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2409 { 2410 vio_ver_msg_t *ver_pkt; 2411 vsw_t *vswp = ldcp->ldc_vswp; 2412 2413 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2414 2415 /* 2416 * We know this is a ctrl/version packet so 2417 * cast it into the correct structure. 2418 */ 2419 ver_pkt = (vio_ver_msg_t *)pkt; 2420 2421 switch (ver_pkt->tag.vio_subtype) { 2422 case VIO_SUBTYPE_INFO: 2423 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2424 2425 /* 2426 * Record the session id, which we will use from now 2427 * until we see another VER_INFO msg. Even then the 2428 * session id in most cases will be unchanged, execpt 2429 * if channel was reset. 2430 */ 2431 if ((ldcp->session_status & VSW_PEER_SESSION) && 2432 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2433 DERR(vswp, "%s: updating session id for chan %lld " 2434 "from %llx to %llx", __func__, ldcp->ldc_id, 2435 ldcp->peer_session, ver_pkt->tag.vio_sid); 2436 } 2437 2438 ldcp->peer_session = ver_pkt->tag.vio_sid; 2439 ldcp->session_status |= VSW_PEER_SESSION; 2440 2441 /* Legal message at this time ? */ 2442 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2443 return; 2444 2445 /* 2446 * First check the device class. Currently only expect 2447 * to be talking to a network device. In the future may 2448 * also talk to another switch. 2449 */ 2450 if (ver_pkt->dev_class != VDEV_NETWORK) { 2451 DERR(vswp, "%s: illegal device class %d", __func__, 2452 ver_pkt->dev_class); 2453 2454 ver_pkt->tag.vio_sid = ldcp->local_session; 2455 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2456 2457 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2458 2459 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2460 sizeof (vio_ver_msg_t), B_TRUE); 2461 2462 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2463 vsw_next_milestone(ldcp); 2464 return; 2465 } else { 2466 ldcp->dev_class = ver_pkt->dev_class; 2467 } 2468 2469 /* 2470 * Now check the version. 2471 */ 2472 if (vsw_supported_version(ver_pkt) == 0) { 2473 /* 2474 * Support this major version and possibly 2475 * adjusted minor version. 2476 */ 2477 2478 D2(vswp, "%s: accepted ver %d:%d", __func__, 2479 ver_pkt->ver_major, ver_pkt->ver_minor); 2480 2481 /* Store accepted values */ 2482 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2483 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2484 2485 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2486 2487 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2488 2489 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2490 /* 2491 * Send a version info message 2492 * using the accepted version that 2493 * we are about to ack. Also note that 2494 * we send our ver info before we ack. 2495 * Otherwise, as soon as receiving the 2496 * ack, obp sends attr info msg, which 2497 * breaks vsw_check_flag() invoked 2498 * from vsw_process_ctrl_attr_pkt(); 2499 * as we also need VSW_VER_ACK_RECV to 2500 * be set in lane_out.lstate, before 2501 * we can receive attr info. 2502 */ 2503 vsw_send_ver(ldcp); 2504 } 2505 } else { 2506 /* 2507 * NACK back with the next lower major/minor 2508 * pairing we support (if don't suuport any more 2509 * versions then they will be set to zero. 2510 */ 2511 2512 D2(vswp, "%s: replying with ver %d:%d", __func__, 2513 ver_pkt->ver_major, ver_pkt->ver_minor); 2514 2515 /* Store updated values */ 2516 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2517 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2518 2519 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2520 2521 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2522 } 2523 2524 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2525 ver_pkt->tag.vio_sid = ldcp->local_session; 2526 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2527 sizeof (vio_ver_msg_t), B_TRUE); 2528 2529 vsw_next_milestone(ldcp); 2530 break; 2531 2532 case VIO_SUBTYPE_ACK: 2533 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2534 2535 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2536 return; 2537 2538 /* Store updated values */ 2539 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2540 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2541 2542 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2543 vsw_next_milestone(ldcp); 2544 2545 break; 2546 2547 case VIO_SUBTYPE_NACK: 2548 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2549 2550 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2551 return; 2552 2553 /* 2554 * If our peer sent us a NACK with the ver fields set to 2555 * zero then there is nothing more we can do. Otherwise see 2556 * if we support either the version suggested, or a lesser 2557 * one. 2558 */ 2559 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2560 DERR(vswp, "%s: peer unable to negotiate any " 2561 "further.", __func__); 2562 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2563 vsw_next_milestone(ldcp); 2564 return; 2565 } 2566 2567 /* 2568 * Check to see if we support this major version or 2569 * a lower one. If we don't then maj/min will be set 2570 * to zero. 2571 */ 2572 (void) vsw_supported_version(ver_pkt); 2573 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2574 /* Nothing more we can do */ 2575 DERR(vswp, "%s: version negotiation failed.\n", 2576 __func__); 2577 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2578 vsw_next_milestone(ldcp); 2579 } else { 2580 /* found a supported major version */ 2581 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2582 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2583 2584 D2(vswp, "%s: resending with updated values (%x, %x)", 2585 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2586 2587 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2588 ver_pkt->tag.vio_sid = ldcp->local_session; 2589 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2590 2591 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2592 2593 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2594 sizeof (vio_ver_msg_t), B_TRUE); 2595 2596 vsw_next_milestone(ldcp); 2597 2598 } 2599 break; 2600 2601 default: 2602 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2603 ver_pkt->tag.vio_subtype); 2604 } 2605 2606 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2607 } 2608 2609 /* 2610 * Process an attribute packet. We can end up here either because our peer 2611 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2612 * peer has sent us an attribute INFO message 2613 * 2614 * If its an ACK we then move to the next stage of the handshake which 2615 * is to send our descriptor ring info to our peer. If its a NACK then 2616 * there is nothing more we can (currently) do. 2617 * 2618 * If we get a valid/acceptable INFO packet (and we have already negotiated 2619 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2620 * NACK back and reset channel state to INACTIV. 2621 * 2622 * FUTURE: in time we will probably negotiate over attributes, but for 2623 * the moment unacceptable attributes are regarded as a fatal error. 2624 * 2625 */ 2626 void 2627 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2628 { 2629 vnet_attr_msg_t *attr_pkt; 2630 vsw_t *vswp = ldcp->ldc_vswp; 2631 vsw_port_t *port = ldcp->ldc_port; 2632 uint64_t macaddr = 0; 2633 lane_t *lane_out = &ldcp->lane_out; 2634 lane_t *lane_in = &ldcp->lane_in; 2635 uint32_t mtu; 2636 boolean_t ack = B_TRUE; 2637 int i; 2638 2639 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2640 2641 /* 2642 * We know this is a ctrl/attr packet so 2643 * cast it into the correct structure. 2644 */ 2645 attr_pkt = (vnet_attr_msg_t *)pkt; 2646 2647 switch (attr_pkt->tag.vio_subtype) { 2648 case VIO_SUBTYPE_INFO: 2649 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2650 2651 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2652 return; 2653 2654 /* 2655 * If the attributes are unacceptable then we NACK back. 2656 */ 2657 if (vsw_check_attr(attr_pkt, ldcp)) { 2658 ack = B_FALSE; 2659 2660 DERR(vswp, "%s (chan %d): invalid attributes", 2661 __func__, ldcp->ldc_id); 2662 2663 } else { 2664 2665 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2666 /* 2667 * Versions >= 1.4: 2668 * The mtu is negotiated down to the 2669 * minimum of our mtu and peer's mtu. 2670 */ 2671 mtu = MIN(attr_pkt->mtu, vswp->max_frame_size); 2672 2673 /* 2674 * If we have received an ack for the attr info 2675 * that we sent, then check if the mtu computed 2676 * above matches the mtu that the peer had ack'd 2677 * (saved in local hparams). If they don't 2678 * match, we fail the handshake. 2679 */ 2680 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2681 if (mtu != lane_out->mtu) { 2682 /* send NACK */ 2683 ack = B_FALSE; 2684 } 2685 } else { 2686 /* 2687 * Save the mtu computed above in our 2688 * attr parameters, so it gets sent in 2689 * the attr info from us to the peer. 2690 */ 2691 lane_out->mtu = mtu; 2692 } 2693 } 2694 2695 } 2696 2697 if (ack == B_FALSE) { 2698 2699 vsw_free_lane_resources(ldcp, INBOUND); 2700 2701 attr_pkt->tag.vio_sid = ldcp->local_session; 2702 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2703 2704 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2705 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2706 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2707 sizeof (vnet_attr_msg_t), B_TRUE); 2708 2709 vsw_next_milestone(ldcp); 2710 return; 2711 } 2712 2713 /* 2714 * Otherwise store attributes for this lane and update 2715 * lane state. 2716 */ 2717 lane_in->mtu = attr_pkt->mtu; 2718 lane_in->addr = attr_pkt->addr; 2719 lane_in->addr_type = attr_pkt->addr_type; 2720 lane_in->xfer_mode = attr_pkt->xfer_mode; 2721 lane_in->ack_freq = attr_pkt->ack_freq; 2722 2723 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2724 /* save the MIN mtu in the msg to be replied */ 2725 attr_pkt->mtu = mtu; 2726 } 2727 2728 macaddr = lane_in->addr; 2729 for (i = ETHERADDRL - 1; i >= 0; i--) { 2730 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2731 macaddr >>= 8; 2732 } 2733 2734 /* create the fdb entry for this port/mac address */ 2735 vsw_fdbe_add(vswp, port); 2736 2737 /* add the port to the specified vlans */ 2738 vsw_vlan_add_ids(port, VSW_VNETPORT); 2739 2740 /* setup device specifc xmit routines */ 2741 mutex_enter(&port->tx_lock); 2742 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2743 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2744 (VSW_VER_LT(ldcp, 1, 2) && 2745 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2746 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2747 port->transmit = vsw_dringsend; 2748 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2749 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2750 vsw_create_privring(ldcp); 2751 port->transmit = vsw_descrsend; 2752 lane_out->xfer_mode = VIO_DESC_MODE; 2753 } 2754 2755 /* 2756 * HybridIO is supported only vnet, not by OBP. 2757 * So, set hio_capable to true only when in DRING mode. 2758 */ 2759 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2760 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2761 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2762 } else { 2763 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2764 } 2765 2766 mutex_exit(&port->tx_lock); 2767 2768 attr_pkt->tag.vio_sid = ldcp->local_session; 2769 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2770 2771 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2772 2773 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2774 2775 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2776 sizeof (vnet_attr_msg_t), B_TRUE); 2777 2778 vsw_next_milestone(ldcp); 2779 break; 2780 2781 case VIO_SUBTYPE_ACK: 2782 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2783 2784 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2785 return; 2786 2787 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2788 /* 2789 * Versions >= 1.4: 2790 * The ack msg sent by the peer contains the minimum of 2791 * our mtu (that we had sent in our attr info) and the 2792 * peer's mtu. 2793 * 2794 * If we have sent an ack for the attr info msg from 2795 * the peer, check if the mtu that was computed then 2796 * (saved in lane_out params) matches the mtu that the 2797 * peer has ack'd. If they don't match, we fail the 2798 * handshake. 2799 */ 2800 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2801 if (lane_out->mtu != attr_pkt->mtu) { 2802 return; 2803 } 2804 } else { 2805 /* 2806 * If the mtu ack'd by the peer is > our mtu 2807 * fail handshake. Otherwise, save the mtu, so 2808 * we can validate it when we receive attr info 2809 * from our peer. 2810 */ 2811 if (attr_pkt->mtu > lane_out->mtu) { 2812 return; 2813 } 2814 if (attr_pkt->mtu <= lane_out->mtu) { 2815 lane_out->mtu = attr_pkt->mtu; 2816 } 2817 } 2818 } 2819 2820 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2821 vsw_next_milestone(ldcp); 2822 break; 2823 2824 case VIO_SUBTYPE_NACK: 2825 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2826 2827 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2828 return; 2829 2830 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2831 vsw_next_milestone(ldcp); 2832 break; 2833 2834 default: 2835 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2836 attr_pkt->tag.vio_subtype); 2837 } 2838 2839 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2840 } 2841 2842 /* 2843 * Process a dring info packet. We can end up here either because our peer 2844 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2845 * peer has sent us a dring INFO message. 2846 * 2847 * If we get a valid/acceptable INFO packet (and we have already negotiated 2848 * a version) we ACK back and update the lane state, otherwise we NACK back. 2849 * 2850 * FUTURE: nothing to stop client from sending us info on multiple dring's 2851 * but for the moment we will just use the first one we are given. 2852 * 2853 */ 2854 void 2855 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2856 { 2857 vio_dring_reg_msg_t *dring_pkt; 2858 vsw_t *vswp = ldcp->ldc_vswp; 2859 ldc_mem_info_t minfo; 2860 dring_info_t *dp, *dbp; 2861 int dring_found = 0; 2862 2863 /* 2864 * We know this is a ctrl/dring packet so 2865 * cast it into the correct structure. 2866 */ 2867 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2868 2869 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2870 2871 switch (dring_pkt->tag.vio_subtype) { 2872 case VIO_SUBTYPE_INFO: 2873 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2874 2875 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2876 return; 2877 2878 /* 2879 * If the dring params are unacceptable then we NACK back. 2880 */ 2881 if (vsw_check_dring_info(dring_pkt)) { 2882 2883 DERR(vswp, "%s (%lld): invalid dring info", 2884 __func__, ldcp->ldc_id); 2885 2886 vsw_free_lane_resources(ldcp, INBOUND); 2887 2888 dring_pkt->tag.vio_sid = ldcp->local_session; 2889 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2890 2891 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2892 2893 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2894 2895 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2896 sizeof (vio_dring_reg_msg_t), B_TRUE); 2897 2898 vsw_next_milestone(ldcp); 2899 return; 2900 } 2901 2902 /* 2903 * Otherwise, attempt to map in the dring using the 2904 * cookie. If that succeeds we send back a unique dring 2905 * identifier that the sending side will use in future 2906 * to refer to this descriptor ring. 2907 */ 2908 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2909 2910 dp->num_descriptors = dring_pkt->num_descriptors; 2911 dp->descriptor_size = dring_pkt->descriptor_size; 2912 dp->options = dring_pkt->options; 2913 dp->ncookies = dring_pkt->ncookies; 2914 2915 /* 2916 * Note: should only get one cookie. Enforced in 2917 * the ldc layer. 2918 */ 2919 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2920 sizeof (ldc_mem_cookie_t)); 2921 2922 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2923 dp->num_descriptors, dp->descriptor_size); 2924 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2925 dp->options, dp->ncookies); 2926 2927 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2928 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2929 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 2930 2931 DERR(vswp, "%s: dring_map failed\n", __func__); 2932 2933 kmem_free(dp, sizeof (dring_info_t)); 2934 vsw_free_lane_resources(ldcp, INBOUND); 2935 2936 dring_pkt->tag.vio_sid = ldcp->local_session; 2937 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2938 2939 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2940 2941 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2942 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2943 sizeof (vio_dring_reg_msg_t), B_TRUE); 2944 2945 vsw_next_milestone(ldcp); 2946 return; 2947 } 2948 2949 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2950 2951 DERR(vswp, "%s: dring_addr failed\n", __func__); 2952 2953 kmem_free(dp, sizeof (dring_info_t)); 2954 vsw_free_lane_resources(ldcp, INBOUND); 2955 2956 dring_pkt->tag.vio_sid = ldcp->local_session; 2957 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2958 2959 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2960 2961 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2962 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2963 sizeof (vio_dring_reg_msg_t), B_TRUE); 2964 2965 vsw_next_milestone(ldcp); 2966 return; 2967 } else { 2968 /* store the address of the pub part of ring */ 2969 dp->pub_addr = minfo.vaddr; 2970 2971 /* cache the dring mtype */ 2972 dp->dring_mtype = minfo.mtype; 2973 } 2974 2975 /* no private section as we are importing */ 2976 dp->priv_addr = NULL; 2977 2978 /* 2979 * Using simple mono increasing int for ident at 2980 * the moment. 2981 */ 2982 dp->ident = ldcp->next_ident; 2983 ldcp->next_ident++; 2984 2985 dp->end_idx = 0; 2986 dp->next = NULL; 2987 2988 /* 2989 * Link it onto the end of the list of drings 2990 * for this lane. 2991 */ 2992 if (ldcp->lane_in.dringp == NULL) { 2993 D2(vswp, "%s: adding first INBOUND dring", __func__); 2994 ldcp->lane_in.dringp = dp; 2995 } else { 2996 dbp = ldcp->lane_in.dringp; 2997 2998 while (dbp->next != NULL) 2999 dbp = dbp->next; 3000 3001 dbp->next = dp; 3002 } 3003 3004 /* acknowledge it */ 3005 dring_pkt->tag.vio_sid = ldcp->local_session; 3006 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3007 dring_pkt->dring_ident = dp->ident; 3008 3009 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3010 sizeof (vio_dring_reg_msg_t), B_TRUE); 3011 3012 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3013 vsw_next_milestone(ldcp); 3014 break; 3015 3016 case VIO_SUBTYPE_ACK: 3017 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3018 3019 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3020 return; 3021 3022 /* 3023 * Peer is acknowledging our dring info and will have 3024 * sent us a dring identifier which we will use to 3025 * refer to this ring w.r.t. our peer. 3026 */ 3027 dp = ldcp->lane_out.dringp; 3028 if (dp != NULL) { 3029 /* 3030 * Find the ring this ident should be associated 3031 * with. 3032 */ 3033 if (vsw_dring_match(dp, dring_pkt)) { 3034 dring_found = 1; 3035 3036 } else while (dp != NULL) { 3037 if (vsw_dring_match(dp, dring_pkt)) { 3038 dring_found = 1; 3039 break; 3040 } 3041 dp = dp->next; 3042 } 3043 3044 if (dring_found == 0) { 3045 DERR(NULL, "%s: unrecognised ring cookie", 3046 __func__); 3047 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3048 return; 3049 } 3050 3051 } else { 3052 DERR(vswp, "%s: DRING ACK received but no drings " 3053 "allocated", __func__); 3054 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3055 return; 3056 } 3057 3058 /* store ident */ 3059 dp->ident = dring_pkt->dring_ident; 3060 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3061 vsw_next_milestone(ldcp); 3062 break; 3063 3064 case VIO_SUBTYPE_NACK: 3065 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3066 3067 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3068 return; 3069 3070 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3071 vsw_next_milestone(ldcp); 3072 break; 3073 3074 default: 3075 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3076 dring_pkt->tag.vio_subtype); 3077 } 3078 3079 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3080 } 3081 3082 /* 3083 * Process a request from peer to unregister a dring. 3084 * 3085 * For the moment we just restart the handshake if our 3086 * peer endpoint attempts to unregister a dring. 3087 */ 3088 void 3089 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3090 { 3091 vsw_t *vswp = ldcp->ldc_vswp; 3092 vio_dring_unreg_msg_t *dring_pkt; 3093 3094 /* 3095 * We know this is a ctrl/dring packet so 3096 * cast it into the correct structure. 3097 */ 3098 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3099 3100 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3101 3102 switch (dring_pkt->tag.vio_subtype) { 3103 case VIO_SUBTYPE_INFO: 3104 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3105 3106 DWARN(vswp, "%s: restarting handshake..", __func__); 3107 break; 3108 3109 case VIO_SUBTYPE_ACK: 3110 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3111 3112 DWARN(vswp, "%s: restarting handshake..", __func__); 3113 break; 3114 3115 case VIO_SUBTYPE_NACK: 3116 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3117 3118 DWARN(vswp, "%s: restarting handshake..", __func__); 3119 break; 3120 3121 default: 3122 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3123 dring_pkt->tag.vio_subtype); 3124 } 3125 3126 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3127 3128 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3129 } 3130 3131 #define SND_MCST_NACK(ldcp, pkt) \ 3132 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3133 pkt->tag.vio_sid = ldcp->local_session; \ 3134 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3135 sizeof (vnet_mcast_msg_t), B_TRUE); 3136 3137 /* 3138 * Process a multicast request from a vnet. 3139 * 3140 * Vnet's specify a multicast address that they are interested in. This 3141 * address is used as a key into the hash table which forms the multicast 3142 * forwarding database (mFDB). 3143 * 3144 * The table keys are the multicast addresses, while the table entries 3145 * are pointers to lists of ports which wish to receive packets for the 3146 * specified multicast address. 3147 * 3148 * When a multicast packet is being switched we use the address as a key 3149 * into the hash table, and then walk the appropriate port list forwarding 3150 * the pkt to each port in turn. 3151 * 3152 * If a vnet is no longer interested in a particular multicast grouping 3153 * we simply find the correct location in the hash table and then delete 3154 * the relevant port from the port list. 3155 * 3156 * To deal with the case whereby a port is being deleted without first 3157 * removing itself from the lists in the hash table, we maintain a list 3158 * of multicast addresses the port has registered an interest in, within 3159 * the port structure itself. We then simply walk that list of addresses 3160 * using them as keys into the hash table and remove the port from the 3161 * appropriate lists. 3162 */ 3163 static void 3164 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3165 { 3166 vnet_mcast_msg_t *mcst_pkt; 3167 vsw_port_t *port = ldcp->ldc_port; 3168 vsw_t *vswp = ldcp->ldc_vswp; 3169 int i; 3170 3171 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3172 3173 /* 3174 * We know this is a ctrl/mcast packet so 3175 * cast it into the correct structure. 3176 */ 3177 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3178 3179 switch (mcst_pkt->tag.vio_subtype) { 3180 case VIO_SUBTYPE_INFO: 3181 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3182 3183 /* 3184 * Check if in correct state to receive a multicast 3185 * message (i.e. handshake complete). If not reset 3186 * the handshake. 3187 */ 3188 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3189 return; 3190 3191 /* 3192 * Before attempting to add or remove address check 3193 * that they are valid multicast addresses. 3194 * If not, then NACK back. 3195 */ 3196 for (i = 0; i < mcst_pkt->count; i++) { 3197 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3198 DERR(vswp, "%s: invalid multicast address", 3199 __func__); 3200 SND_MCST_NACK(ldcp, mcst_pkt); 3201 return; 3202 } 3203 } 3204 3205 /* 3206 * Now add/remove the addresses. If this fails we 3207 * NACK back. 3208 */ 3209 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3210 SND_MCST_NACK(ldcp, mcst_pkt); 3211 return; 3212 } 3213 3214 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3215 mcst_pkt->tag.vio_sid = ldcp->local_session; 3216 3217 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3218 3219 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3220 sizeof (vnet_mcast_msg_t), B_TRUE); 3221 break; 3222 3223 case VIO_SUBTYPE_ACK: 3224 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3225 3226 /* 3227 * We shouldn't ever get a multicast ACK message as 3228 * at the moment we never request multicast addresses 3229 * to be set on some other device. This may change in 3230 * the future if we have cascading switches. 3231 */ 3232 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3233 return; 3234 3235 /* Do nothing */ 3236 break; 3237 3238 case VIO_SUBTYPE_NACK: 3239 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3240 3241 /* 3242 * We shouldn't get a multicast NACK packet for the 3243 * same reasons as we shouldn't get a ACK packet. 3244 */ 3245 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3246 return; 3247 3248 /* Do nothing */ 3249 break; 3250 3251 default: 3252 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3253 mcst_pkt->tag.vio_subtype); 3254 } 3255 3256 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3257 } 3258 3259 static void 3260 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3261 { 3262 vio_rdx_msg_t *rdx_pkt; 3263 vsw_t *vswp = ldcp->ldc_vswp; 3264 3265 /* 3266 * We know this is a ctrl/rdx packet so 3267 * cast it into the correct structure. 3268 */ 3269 rdx_pkt = (vio_rdx_msg_t *)pkt; 3270 3271 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3272 3273 switch (rdx_pkt->tag.vio_subtype) { 3274 case VIO_SUBTYPE_INFO: 3275 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3276 3277 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3278 return; 3279 3280 rdx_pkt->tag.vio_sid = ldcp->local_session; 3281 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3282 3283 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3284 3285 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3286 3287 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3288 sizeof (vio_rdx_msg_t), B_TRUE); 3289 3290 vsw_next_milestone(ldcp); 3291 break; 3292 3293 case VIO_SUBTYPE_ACK: 3294 /* 3295 * Should be handled in-band by callback handler. 3296 */ 3297 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3298 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3299 break; 3300 3301 case VIO_SUBTYPE_NACK: 3302 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3303 3304 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3305 return; 3306 3307 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3308 vsw_next_milestone(ldcp); 3309 break; 3310 3311 default: 3312 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3313 rdx_pkt->tag.vio_subtype); 3314 } 3315 3316 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3317 } 3318 3319 static void 3320 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3321 uint32_t msglen) 3322 { 3323 uint16_t env = tagp->vio_subtype_env; 3324 vsw_t *vswp = ldcp->ldc_vswp; 3325 3326 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3327 3328 /* session id check */ 3329 if (ldcp->session_status & VSW_PEER_SESSION) { 3330 if (ldcp->peer_session != tagp->vio_sid) { 3331 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3332 __func__, ldcp->ldc_id, tagp->vio_sid); 3333 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3334 return; 3335 } 3336 } 3337 3338 /* 3339 * It is an error for us to be getting data packets 3340 * before the handshake has completed. 3341 */ 3342 if (ldcp->hphase != VSW_MILESTONE4) { 3343 DERR(vswp, "%s: got data packet before handshake complete " 3344 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3345 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3346 DUMP_FLAGS(ldcp->lane_in.lstate); 3347 DUMP_FLAGS(ldcp->lane_out.lstate); 3348 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3349 return; 3350 } 3351 3352 /* 3353 * To reduce the locking contention, release the 3354 * ldc_cblock here and re-acquire it once we are done 3355 * receiving packets. 3356 */ 3357 mutex_exit(&ldcp->ldc_cblock); 3358 mutex_enter(&ldcp->ldc_rxlock); 3359 3360 /* 3361 * Switch on vio_subtype envelope, then let lower routines 3362 * decide if its an INFO, ACK or NACK packet. 3363 */ 3364 if (env == VIO_DRING_DATA) { 3365 vsw_process_data_dring_pkt(ldcp, dpkt); 3366 } else if (env == VIO_PKT_DATA) { 3367 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3368 } else if (env == VIO_DESC_DATA) { 3369 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3370 } else { 3371 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3372 } 3373 3374 mutex_exit(&ldcp->ldc_rxlock); 3375 mutex_enter(&ldcp->ldc_cblock); 3376 3377 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3378 } 3379 3380 #define SND_DRING_NACK(ldcp, pkt) \ 3381 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3382 pkt->tag.vio_sid = ldcp->local_session; \ 3383 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3384 sizeof (vio_dring_msg_t), B_TRUE); 3385 3386 static void 3387 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3388 { 3389 vio_dring_msg_t *dring_pkt; 3390 vnet_public_desc_t desc, *pub_addr = NULL; 3391 vsw_private_desc_t *priv_addr = NULL; 3392 dring_info_t *dp = NULL; 3393 vsw_t *vswp = ldcp->ldc_vswp; 3394 mblk_t *mp = NULL; 3395 mblk_t *bp = NULL; 3396 mblk_t *bpt = NULL; 3397 size_t nbytes = 0; 3398 uint64_t chain = 0; 3399 uint64_t len; 3400 uint32_t pos, start; 3401 uint32_t range_start, range_end; 3402 int32_t end, num, cnt = 0; 3403 int i, rv, rng_rv = 0, msg_rv = 0; 3404 boolean_t prev_desc_ack = B_FALSE; 3405 int read_attempts = 0; 3406 struct ether_header *ehp; 3407 lane_t *lp = &ldcp->lane_out; 3408 3409 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3410 3411 /* 3412 * We know this is a data/dring packet so 3413 * cast it into the correct structure. 3414 */ 3415 dring_pkt = (vio_dring_msg_t *)dpkt; 3416 3417 /* 3418 * Switch on the vio_subtype. If its INFO then we need to 3419 * process the data. If its an ACK we need to make sure 3420 * it makes sense (i.e did we send an earlier data/info), 3421 * and if its a NACK then we maybe attempt a retry. 3422 */ 3423 switch (dring_pkt->tag.vio_subtype) { 3424 case VIO_SUBTYPE_INFO: 3425 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3426 3427 READ_ENTER(&ldcp->lane_in.dlistrw); 3428 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3429 dring_pkt->dring_ident)) == NULL) { 3430 RW_EXIT(&ldcp->lane_in.dlistrw); 3431 3432 DERR(vswp, "%s(%lld): unable to find dring from " 3433 "ident 0x%llx", __func__, ldcp->ldc_id, 3434 dring_pkt->dring_ident); 3435 3436 SND_DRING_NACK(ldcp, dring_pkt); 3437 return; 3438 } 3439 3440 start = pos = dring_pkt->start_idx; 3441 end = dring_pkt->end_idx; 3442 len = dp->num_descriptors; 3443 3444 range_start = range_end = pos; 3445 3446 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3447 __func__, ldcp->ldc_id, start, end); 3448 3449 if (end == -1) { 3450 num = -1; 3451 } else if (end >= 0) { 3452 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3453 3454 /* basic sanity check */ 3455 if (end > len) { 3456 RW_EXIT(&ldcp->lane_in.dlistrw); 3457 DERR(vswp, "%s(%lld): endpoint %lld outside " 3458 "ring length %lld", __func__, 3459 ldcp->ldc_id, end, len); 3460 3461 SND_DRING_NACK(ldcp, dring_pkt); 3462 return; 3463 } 3464 } else { 3465 RW_EXIT(&ldcp->lane_in.dlistrw); 3466 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3467 __func__, ldcp->ldc_id, end); 3468 SND_DRING_NACK(ldcp, dring_pkt); 3469 return; 3470 } 3471 3472 while (cnt != num) { 3473 vsw_recheck_desc: 3474 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3475 3476 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3477 &desc, dp->dring_mtype, dp->handle, 3478 pos, pos)) != 0) { 3479 DERR(vswp, "%s(%lld): unable to copy " 3480 "descriptor at pos %d: err %d", 3481 __func__, pos, ldcp->ldc_id, rng_rv); 3482 ldcp->ldc_stats.ierrors++; 3483 break; 3484 } 3485 3486 /* 3487 * When given a bounded range of descriptors 3488 * to process, its an error to hit a descriptor 3489 * which is not ready. In the non-bounded case 3490 * (end_idx == -1) this simply indicates we have 3491 * reached the end of the current active range. 3492 */ 3493 if (desc.hdr.dstate != VIO_DESC_READY) { 3494 /* unbound - no error */ 3495 if (end == -1) { 3496 if (read_attempts == vsw_read_attempts) 3497 break; 3498 3499 delay(drv_usectohz(vsw_desc_delay)); 3500 read_attempts++; 3501 goto vsw_recheck_desc; 3502 } 3503 3504 /* bounded - error - so NACK back */ 3505 RW_EXIT(&ldcp->lane_in.dlistrw); 3506 DERR(vswp, "%s(%lld): descriptor not READY " 3507 "(%d)", __func__, ldcp->ldc_id, 3508 desc.hdr.dstate); 3509 SND_DRING_NACK(ldcp, dring_pkt); 3510 return; 3511 } 3512 3513 DTRACE_PROBE1(read_attempts, int, read_attempts); 3514 3515 range_end = pos; 3516 3517 /* 3518 * If we ACK'd the previous descriptor then now 3519 * record the new range start position for later 3520 * ACK's. 3521 */ 3522 if (prev_desc_ack) { 3523 range_start = pos; 3524 3525 D2(vswp, "%s(%lld): updating range start to be " 3526 "%d", __func__, ldcp->ldc_id, range_start); 3527 3528 prev_desc_ack = B_FALSE; 3529 } 3530 3531 D2(vswp, "%s(%lld): processing desc %lld at pos" 3532 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3533 __func__, ldcp->ldc_id, pos, &desc, 3534 desc.hdr.dstate, desc.nbytes); 3535 3536 if ((desc.nbytes < ETHERMIN) || 3537 (desc.nbytes > lp->mtu)) { 3538 /* invalid size; drop the packet */ 3539 ldcp->ldc_stats.ierrors++; 3540 goto vsw_process_desc_done; 3541 } 3542 3543 /* 3544 * Ensure that we ask ldc for an aligned 3545 * number of bytes. Data is padded to align on 8 3546 * byte boundary, desc.nbytes is actual data length, 3547 * i.e. minus that padding. 3548 */ 3549 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3550 if (nbytes > ldcp->max_rxpool_size) { 3551 mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3552 BPRI_MED); 3553 } else { 3554 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3555 if (mp == NULL) { 3556 ldcp->ldc_stats.rx_vio_allocb_fail++; 3557 /* 3558 * No free receive buffers available, 3559 * so fallback onto allocb(9F). Make 3560 * sure that we get a data buffer which 3561 * is a multiple of 8 as this is 3562 * required by ldc_mem_copy. 3563 */ 3564 DTRACE_PROBE(allocb); 3565 mp = allocb(desc.nbytes + 3566 VNET_IPALIGN + 8, BPRI_MED); 3567 } 3568 } 3569 if (mp == NULL) { 3570 DERR(vswp, "%s(%ld): allocb failed", 3571 __func__, ldcp->ldc_id); 3572 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3573 dp->dring_mtype, dp->handle, pos, pos, 3574 VIO_DESC_DONE); 3575 ldcp->ldc_stats.ierrors++; 3576 ldcp->ldc_stats.rx_allocb_fail++; 3577 break; 3578 } 3579 3580 rv = ldc_mem_copy(ldcp->ldc_handle, 3581 (caddr_t)mp->b_rptr, 0, &nbytes, 3582 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3583 if (rv != 0) { 3584 DERR(vswp, "%s(%d): unable to copy in data " 3585 "from %d cookies in desc %d (rv %d)", 3586 __func__, ldcp->ldc_id, desc.ncookies, 3587 pos, rv); 3588 freemsg(mp); 3589 3590 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3591 dp->dring_mtype, dp->handle, pos, pos, 3592 VIO_DESC_DONE); 3593 ldcp->ldc_stats.ierrors++; 3594 break; 3595 } else { 3596 D2(vswp, "%s(%d): copied in %ld bytes" 3597 " using %d cookies", __func__, 3598 ldcp->ldc_id, nbytes, desc.ncookies); 3599 } 3600 3601 /* adjust the read pointer to skip over the padding */ 3602 mp->b_rptr += VNET_IPALIGN; 3603 3604 /* point to the actual end of data */ 3605 mp->b_wptr = mp->b_rptr + desc.nbytes; 3606 3607 /* update statistics */ 3608 ehp = (struct ether_header *)mp->b_rptr; 3609 if (IS_BROADCAST(ehp)) 3610 ldcp->ldc_stats.brdcstrcv++; 3611 else if (IS_MULTICAST(ehp)) 3612 ldcp->ldc_stats.multircv++; 3613 3614 ldcp->ldc_stats.ipackets++; 3615 ldcp->ldc_stats.rbytes += desc.nbytes; 3616 3617 /* 3618 * IPALIGN space can be used for VLAN_TAG 3619 */ 3620 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3621 VSW_VNETPORT, mp); 3622 3623 /* build a chain of received packets */ 3624 if (bp == NULL) { 3625 /* first pkt */ 3626 bp = mp; 3627 bp->b_next = bp->b_prev = NULL; 3628 bpt = bp; 3629 chain = 1; 3630 } else { 3631 mp->b_next = mp->b_prev = NULL; 3632 bpt->b_next = mp; 3633 bpt = mp; 3634 chain++; 3635 } 3636 3637 vsw_process_desc_done: 3638 /* mark we are finished with this descriptor */ 3639 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3640 dp->dring_mtype, dp->handle, pos, pos, 3641 VIO_DESC_DONE)) != 0) { 3642 DERR(vswp, "%s(%lld): unable to update " 3643 "dstate at pos %d: err %d", 3644 __func__, pos, ldcp->ldc_id, rng_rv); 3645 ldcp->ldc_stats.ierrors++; 3646 break; 3647 } 3648 3649 /* 3650 * Send an ACK back to peer if requested. 3651 */ 3652 if (desc.hdr.ack) { 3653 dring_pkt->start_idx = range_start; 3654 dring_pkt->end_idx = range_end; 3655 3656 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3657 " requested", __func__, ldcp->ldc_id, 3658 dring_pkt->start_idx, dring_pkt->end_idx); 3659 3660 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3661 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3662 dring_pkt->tag.vio_sid = ldcp->local_session; 3663 3664 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3665 sizeof (vio_dring_msg_t), B_FALSE); 3666 3667 /* 3668 * Check if ACK was successfully sent. If not 3669 * we break and deal with that below. 3670 */ 3671 if (msg_rv != 0) 3672 break; 3673 3674 prev_desc_ack = B_TRUE; 3675 range_start = pos; 3676 } 3677 3678 /* next descriptor */ 3679 pos = (pos + 1) % len; 3680 cnt++; 3681 3682 /* 3683 * Break out of loop here and stop processing to 3684 * allow some other network device (or disk) to 3685 * get access to the cpu. 3686 */ 3687 if (chain > vsw_chain_len) { 3688 D3(vswp, "%s(%lld): switching chain of %d " 3689 "msgs", __func__, ldcp->ldc_id, chain); 3690 break; 3691 } 3692 } 3693 RW_EXIT(&ldcp->lane_in.dlistrw); 3694 3695 /* send the chain of packets to be switched */ 3696 if (bp != NULL) { 3697 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3698 D3(vswp, "%s(%lld): switching chain of %d msgs", 3699 __func__, ldcp->ldc_id, chain); 3700 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3701 ldcp->ldc_port, NULL); 3702 } 3703 3704 /* 3705 * If when we encountered an error when attempting to 3706 * access an imported dring, initiate a connection reset. 3707 */ 3708 if (rng_rv != 0) { 3709 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3710 break; 3711 } 3712 3713 /* 3714 * If when we attempted to send the ACK we found that the 3715 * channel had been reset then now handle this. We deal with 3716 * it here as we cannot reset the channel while holding the 3717 * dlistrw lock, and we don't want to acquire/release it 3718 * continuously in the above loop, as a channel reset should 3719 * be a rare event. 3720 */ 3721 if (msg_rv == ECONNRESET) { 3722 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3723 break; 3724 } 3725 3726 DTRACE_PROBE1(msg_cnt, int, cnt); 3727 3728 /* 3729 * We are now finished so ACK back with the state 3730 * set to STOPPING so our peer knows we are finished 3731 */ 3732 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3733 dring_pkt->tag.vio_sid = ldcp->local_session; 3734 3735 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3736 3737 DTRACE_PROBE(stop_process_sent); 3738 3739 /* 3740 * We have not processed any more descriptors beyond 3741 * the last one we ACK'd. 3742 */ 3743 if (prev_desc_ack) 3744 range_start = range_end; 3745 3746 dring_pkt->start_idx = range_start; 3747 dring_pkt->end_idx = range_end; 3748 3749 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3750 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3751 dring_pkt->end_idx); 3752 3753 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3754 sizeof (vio_dring_msg_t), B_TRUE); 3755 break; 3756 3757 case VIO_SUBTYPE_ACK: 3758 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3759 /* 3760 * Verify that the relevant descriptors are all 3761 * marked as DONE 3762 */ 3763 READ_ENTER(&ldcp->lane_out.dlistrw); 3764 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3765 dring_pkt->dring_ident)) == NULL) { 3766 RW_EXIT(&ldcp->lane_out.dlistrw); 3767 DERR(vswp, "%s: unknown ident in ACK", __func__); 3768 return; 3769 } 3770 3771 start = end = 0; 3772 start = dring_pkt->start_idx; 3773 end = dring_pkt->end_idx; 3774 len = dp->num_descriptors; 3775 3776 3777 mutex_enter(&dp->dlock); 3778 dp->last_ack_recv = end; 3779 ldcp->ldc_stats.dring_data_acks++; 3780 mutex_exit(&dp->dlock); 3781 3782 (void) vsw_reclaim_dring(dp, start); 3783 3784 /* 3785 * If our peer is stopping processing descriptors then 3786 * we check to make sure it has processed all the descriptors 3787 * we have updated. If not then we send it a new message 3788 * to prompt it to restart. 3789 */ 3790 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3791 DTRACE_PROBE(stop_process_recv); 3792 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3793 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3794 dring_pkt->end_idx); 3795 3796 /* 3797 * Check next descriptor in public section of ring. 3798 * If its marked as READY then we need to prompt our 3799 * peer to start processing the ring again. 3800 */ 3801 i = (end + 1) % len; 3802 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3803 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3804 3805 /* 3806 * Hold the restart lock across all of this to 3807 * make sure that its not possible for us to 3808 * decide that a msg needs to be sent in the future 3809 * but the sending code having already checked is 3810 * about to exit. 3811 */ 3812 mutex_enter(&dp->restart_lock); 3813 ldcp->ldc_stats.dring_stopped_acks++; 3814 mutex_enter(&priv_addr->dstate_lock); 3815 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3816 3817 mutex_exit(&priv_addr->dstate_lock); 3818 3819 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3820 dring_pkt->tag.vio_sid = ldcp->local_session; 3821 3822 dring_pkt->start_idx = (end + 1) % len; 3823 dring_pkt->end_idx = -1; 3824 3825 D2(vswp, "%s(%lld) : sending restart msg:" 3826 " %d : %d", __func__, ldcp->ldc_id, 3827 dring_pkt->start_idx, dring_pkt->end_idx); 3828 3829 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3830 sizeof (vio_dring_msg_t), B_FALSE); 3831 ldcp->ldc_stats.dring_data_msgs++; 3832 3833 } else { 3834 mutex_exit(&priv_addr->dstate_lock); 3835 dp->restart_reqd = B_TRUE; 3836 } 3837 mutex_exit(&dp->restart_lock); 3838 } 3839 RW_EXIT(&ldcp->lane_out.dlistrw); 3840 3841 /* only do channel reset after dropping dlistrw lock */ 3842 if (msg_rv == ECONNRESET) 3843 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3844 3845 break; 3846 3847 case VIO_SUBTYPE_NACK: 3848 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3849 __func__, ldcp->ldc_id); 3850 /* 3851 * Something is badly wrong if we are getting NACK's 3852 * for our data pkts. So reset the channel. 3853 */ 3854 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3855 3856 break; 3857 3858 default: 3859 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3860 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3861 } 3862 3863 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3864 } 3865 3866 /* 3867 * dummy pkt data handler function for vnet protocol version 1.0 3868 */ 3869 static void 3870 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3871 { 3872 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3873 } 3874 3875 /* 3876 * This function handles raw pkt data messages received over the channel. 3877 * Currently, only priority-eth-type frames are received through this mechanism. 3878 * In this case, the frame(data) is present within the message itself which 3879 * is copied into an mblk before switching it. 3880 */ 3881 static void 3882 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3883 { 3884 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3885 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3886 uint32_t size; 3887 mblk_t *mp; 3888 vsw_t *vswp = ldcp->ldc_vswp; 3889 vgen_stats_t *statsp = &ldcp->ldc_stats; 3890 lane_t *lp = &ldcp->lane_out; 3891 3892 size = msglen - VIO_PKT_DATA_HDRSIZE; 3893 if (size < ETHERMIN || size > lp->mtu) { 3894 (void) atomic_inc_32(&statsp->rx_pri_fail); 3895 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3896 ldcp->ldc_id, size); 3897 return; 3898 } 3899 3900 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3901 if (mp == NULL) { 3902 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3903 if (mp == NULL) { 3904 (void) atomic_inc_32(&statsp->rx_pri_fail); 3905 DWARN(vswp, "%s(%lld) allocb failure, " 3906 "unable to process priority frame\n", __func__, 3907 ldcp->ldc_id); 3908 return; 3909 } 3910 } 3911 3912 /* skip over the extra space for vlan tag */ 3913 mp->b_rptr += VLAN_TAGSZ; 3914 3915 /* copy the frame from the payload of raw data msg into the mblk */ 3916 bcopy(dpkt->data, mp->b_rptr, size); 3917 mp->b_wptr = mp->b_rptr + size; 3918 3919 /* update stats */ 3920 (void) atomic_inc_64(&statsp->rx_pri_packets); 3921 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3922 3923 /* 3924 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3925 */ 3926 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3927 3928 /* switch the frame to destination */ 3929 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3930 } 3931 3932 /* 3933 * Process an in-band descriptor message (most likely from 3934 * OBP). 3935 */ 3936 static void 3937 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3938 { 3939 vnet_ibnd_desc_t *ibnd_desc; 3940 dring_info_t *dp = NULL; 3941 vsw_private_desc_t *priv_addr = NULL; 3942 vsw_t *vswp = ldcp->ldc_vswp; 3943 mblk_t *mp = NULL; 3944 size_t nbytes = 0; 3945 size_t off = 0; 3946 uint64_t idx = 0; 3947 uint32_t num = 1, len, datalen = 0; 3948 uint64_t ncookies = 0; 3949 int i, rv; 3950 int j = 0; 3951 3952 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3953 3954 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3955 3956 switch (ibnd_desc->hdr.tag.vio_subtype) { 3957 case VIO_SUBTYPE_INFO: 3958 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3959 3960 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3961 return; 3962 3963 /* 3964 * Data is padded to align on a 8 byte boundary, 3965 * nbytes is actual data length, i.e. minus that 3966 * padding. 3967 */ 3968 datalen = ibnd_desc->nbytes; 3969 3970 D2(vswp, "%s(%lld): processing inband desc : " 3971 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 3972 3973 ncookies = ibnd_desc->ncookies; 3974 3975 /* 3976 * allocb(9F) returns an aligned data block. We 3977 * need to ensure that we ask ldc for an aligned 3978 * number of bytes also. 3979 */ 3980 nbytes = datalen; 3981 if (nbytes & 0x7) { 3982 off = 8 - (nbytes & 0x7); 3983 nbytes += off; 3984 } 3985 3986 /* alloc extra space for VLAN_TAG */ 3987 mp = allocb(datalen + 8, BPRI_MED); 3988 if (mp == NULL) { 3989 DERR(vswp, "%s(%lld): allocb failed", 3990 __func__, ldcp->ldc_id); 3991 ldcp->ldc_stats.rx_allocb_fail++; 3992 return; 3993 } 3994 3995 /* skip over the extra space for VLAN_TAG */ 3996 mp->b_rptr += 8; 3997 3998 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 3999 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4000 LDC_COPY_IN); 4001 4002 if (rv != 0) { 4003 DERR(vswp, "%s(%d): unable to copy in data from " 4004 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 4005 freemsg(mp); 4006 ldcp->ldc_stats.ierrors++; 4007 return; 4008 } 4009 4010 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 4011 __func__, ldcp->ldc_id, nbytes, ncookies); 4012 4013 /* point to the actual end of data */ 4014 mp->b_wptr = mp->b_rptr + datalen; 4015 ldcp->ldc_stats.ipackets++; 4016 ldcp->ldc_stats.rbytes += datalen; 4017 4018 /* 4019 * We ACK back every in-band descriptor message we process 4020 */ 4021 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4022 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4023 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 4024 sizeof (vnet_ibnd_desc_t), B_TRUE); 4025 4026 /* 4027 * there is extra space alloc'd for VLAN_TAG 4028 */ 4029 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4030 4031 /* send the packet to be switched */ 4032 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4033 ldcp->ldc_port, NULL); 4034 4035 break; 4036 4037 case VIO_SUBTYPE_ACK: 4038 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4039 4040 /* Verify the ACK is valid */ 4041 idx = ibnd_desc->hdr.desc_handle; 4042 4043 if (idx >= vsw_ntxds) { 4044 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 4045 "(idx %ld)", vswp->instance, idx); 4046 return; 4047 } 4048 4049 if ((dp = ldcp->lane_out.dringp) == NULL) { 4050 DERR(vswp, "%s: no dring found", __func__); 4051 return; 4052 } 4053 4054 len = dp->num_descriptors; 4055 /* 4056 * If the descriptor we are being ACK'ed for is not the 4057 * one we expected, then pkts were lost somwhere, either 4058 * when we tried to send a msg, or a previous ACK msg from 4059 * our peer. In either case we now reclaim the descriptors 4060 * in the range from the last ACK we received up to the 4061 * current ACK. 4062 */ 4063 if (idx != dp->last_ack_recv) { 4064 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 4065 __func__, dp->last_ack_recv, idx); 4066 num = idx >= dp->last_ack_recv ? 4067 idx - dp->last_ack_recv + 1: 4068 (len - dp->last_ack_recv + 1) + idx; 4069 } 4070 4071 /* 4072 * When we sent the in-band message to our peer we 4073 * marked the copy in our private ring as READY. We now 4074 * check that the descriptor we are being ACK'ed for is in 4075 * fact READY, i.e. it is one we have shared with our peer. 4076 * 4077 * If its not we flag an error, but still reset the descr 4078 * back to FREE. 4079 */ 4080 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 4081 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4082 mutex_enter(&priv_addr->dstate_lock); 4083 if (priv_addr->dstate != VIO_DESC_READY) { 4084 DERR(vswp, "%s: (%ld) desc at index %ld not " 4085 "READY (0x%lx)", __func__, 4086 ldcp->ldc_id, idx, priv_addr->dstate); 4087 DERR(vswp, "%s: bound %d: ncookies %ld : " 4088 "datalen %ld", __func__, 4089 priv_addr->bound, priv_addr->ncookies, 4090 priv_addr->datalen); 4091 } 4092 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4093 ldcp->ldc_id, idx); 4094 /* release resources associated with sent msg */ 4095 priv_addr->datalen = 0; 4096 priv_addr->dstate = VIO_DESC_FREE; 4097 mutex_exit(&priv_addr->dstate_lock); 4098 } 4099 /* update to next expected value */ 4100 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 4101 4102 break; 4103 4104 case VIO_SUBTYPE_NACK: 4105 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4106 4107 /* 4108 * We should only get a NACK if our peer doesn't like 4109 * something about a message we have sent it. If this 4110 * happens we just release the resources associated with 4111 * the message. (We are relying on higher layers to decide 4112 * whether or not to resend. 4113 */ 4114 4115 /* limit check */ 4116 idx = ibnd_desc->hdr.desc_handle; 4117 4118 if (idx >= vsw_ntxds) { 4119 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4120 __func__, idx); 4121 return; 4122 } 4123 4124 if ((dp = ldcp->lane_out.dringp) == NULL) { 4125 DERR(vswp, "%s: no dring found", __func__); 4126 return; 4127 } 4128 4129 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4130 4131 /* move to correct location in ring */ 4132 priv_addr += idx; 4133 4134 /* release resources associated with sent msg */ 4135 mutex_enter(&priv_addr->dstate_lock); 4136 priv_addr->datalen = 0; 4137 priv_addr->dstate = VIO_DESC_FREE; 4138 mutex_exit(&priv_addr->dstate_lock); 4139 4140 break; 4141 4142 default: 4143 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4144 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4145 } 4146 4147 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4148 } 4149 4150 static void 4151 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 4152 { 4153 _NOTE(ARGUNUSED(epkt)) 4154 4155 vsw_t *vswp = ldcp->ldc_vswp; 4156 uint16_t env = tagp->vio_subtype_env; 4157 4158 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4159 4160 /* 4161 * Error vio_subtypes have yet to be defined. So for 4162 * the moment we can't do anything. 4163 */ 4164 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4165 4166 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4167 } 4168 4169 /* transmit the packet over the given port */ 4170 int 4171 vsw_portsend(vsw_port_t *port, mblk_t *mp) 4172 { 4173 vsw_ldc_list_t *ldcl = &port->p_ldclist; 4174 vsw_ldc_t *ldcp; 4175 mblk_t *mpt; 4176 int count; 4177 int status = 0; 4178 4179 READ_ENTER(&ldcl->lockrw); 4180 /* 4181 * Note for now, we have a single channel. 4182 */ 4183 ldcp = ldcl->head; 4184 if (ldcp == NULL) { 4185 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 4186 freemsgchain(mp); 4187 RW_EXIT(&ldcl->lockrw); 4188 return (1); 4189 } 4190 4191 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 4192 4193 if (count != 0) { 4194 status = ldcp->tx(ldcp, mp, mpt, count); 4195 } 4196 4197 RW_EXIT(&ldcl->lockrw); 4198 return (status); 4199 } 4200 4201 /* 4202 * Break up frames into 2 seperate chains: normal and 4203 * priority, based on the frame type. The number of 4204 * priority frames is also counted and returned. 4205 * 4206 * Params: 4207 * vswp: pointer to the instance of vsw 4208 * np: head of packet chain to be broken 4209 * npt: tail of packet chain to be broken 4210 * 4211 * Returns: 4212 * np: head of normal data packets 4213 * npt: tail of normal data packets 4214 * hp: head of high priority packets 4215 * hpt: tail of high priority packets 4216 */ 4217 static uint32_t 4218 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4219 mblk_t **hp, mblk_t **hpt) 4220 { 4221 mblk_t *tmp = NULL; 4222 mblk_t *smp = NULL; 4223 mblk_t *hmp = NULL; /* high prio pkts head */ 4224 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4225 mblk_t *nmp = NULL; /* normal pkts head */ 4226 mblk_t *nmpt = NULL; /* normal pkts tail */ 4227 uint32_t count = 0; 4228 int i; 4229 struct ether_header *ehp; 4230 uint32_t num_types; 4231 uint16_t *types; 4232 4233 tmp = *np; 4234 while (tmp != NULL) { 4235 4236 smp = tmp; 4237 tmp = tmp->b_next; 4238 smp->b_next = NULL; 4239 smp->b_prev = NULL; 4240 4241 ehp = (struct ether_header *)smp->b_rptr; 4242 num_types = vswp->pri_num_types; 4243 types = vswp->pri_types; 4244 for (i = 0; i < num_types; i++) { 4245 if (ehp->ether_type == types[i]) { 4246 /* high priority frame */ 4247 4248 if (hmp != NULL) { 4249 hmpt->b_next = smp; 4250 hmpt = smp; 4251 } else { 4252 hmp = hmpt = smp; 4253 } 4254 count++; 4255 break; 4256 } 4257 } 4258 if (i == num_types) { 4259 /* normal data frame */ 4260 4261 if (nmp != NULL) { 4262 nmpt->b_next = smp; 4263 nmpt = smp; 4264 } else { 4265 nmp = nmpt = smp; 4266 } 4267 } 4268 } 4269 4270 *hp = hmp; 4271 *hpt = hmpt; 4272 *np = nmp; 4273 *npt = nmpt; 4274 4275 return (count); 4276 } 4277 4278 /* 4279 * Wrapper function to transmit normal and/or priority frames over the channel. 4280 */ 4281 static int 4282 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4283 { 4284 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4285 mblk_t *tmp; 4286 mblk_t *smp; 4287 mblk_t *hmp; /* high prio pkts head */ 4288 mblk_t *hmpt; /* high prio pkts tail */ 4289 mblk_t *nmp; /* normal pkts head */ 4290 mblk_t *nmpt; /* normal pkts tail */ 4291 uint32_t n = 0; 4292 vsw_t *vswp = ldcp->ldc_vswp; 4293 4294 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4295 ASSERT(count != 0); 4296 4297 nmp = mp; 4298 nmpt = mpt; 4299 4300 /* gather any priority frames from the chain of packets */ 4301 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4302 4303 /* transmit priority frames */ 4304 tmp = hmp; 4305 while (tmp != NULL) { 4306 smp = tmp; 4307 tmp = tmp->b_next; 4308 smp->b_next = NULL; 4309 vsw_ldcsend_pkt(ldcp, smp); 4310 } 4311 4312 count -= n; 4313 4314 if (count == 0) { 4315 /* no normal data frames to process */ 4316 return (0); 4317 } 4318 4319 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4320 } 4321 4322 /* 4323 * Wrapper function to transmit normal frames over the channel. 4324 */ 4325 static int 4326 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4327 { 4328 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4329 mblk_t *tmp = NULL; 4330 4331 ASSERT(count != 0); 4332 /* 4333 * If the TX thread is enabled, then queue the 4334 * ordinary frames and signal the tx thread. 4335 */ 4336 if (ldcp->tx_thread != NULL) { 4337 4338 mutex_enter(&ldcp->tx_thr_lock); 4339 4340 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4341 /* 4342 * If we reached queue limit, 4343 * do not queue new packets, 4344 * drop them. 4345 */ 4346 ldcp->ldc_stats.tx_qfull += count; 4347 mutex_exit(&ldcp->tx_thr_lock); 4348 freemsgchain(mp); 4349 goto exit; 4350 } 4351 if (ldcp->tx_mhead == NULL) { 4352 ldcp->tx_mhead = mp; 4353 ldcp->tx_mtail = mpt; 4354 cv_signal(&ldcp->tx_thr_cv); 4355 } else { 4356 ldcp->tx_mtail->b_next = mp; 4357 ldcp->tx_mtail = mpt; 4358 } 4359 ldcp->tx_cnt += count; 4360 mutex_exit(&ldcp->tx_thr_lock); 4361 } else { 4362 while (mp != NULL) { 4363 tmp = mp->b_next; 4364 mp->b_next = mp->b_prev = NULL; 4365 (void) vsw_ldcsend(ldcp, mp, 1); 4366 mp = tmp; 4367 } 4368 } 4369 4370 exit: 4371 return (0); 4372 } 4373 4374 /* 4375 * This function transmits the frame in the payload of a raw data 4376 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4377 * send special frames with high priorities, without going through 4378 * the normal data path which uses descriptor ring mechanism. 4379 */ 4380 static void 4381 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4382 { 4383 vio_raw_data_msg_t *pkt; 4384 mblk_t *bp; 4385 mblk_t *nmp = NULL; 4386 caddr_t dst; 4387 uint32_t mblksz; 4388 uint32_t size; 4389 uint32_t nbytes; 4390 int rv; 4391 vsw_t *vswp = ldcp->ldc_vswp; 4392 vgen_stats_t *statsp = &ldcp->ldc_stats; 4393 4394 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4395 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4396 (void) atomic_inc_32(&statsp->tx_pri_fail); 4397 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4398 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4399 ldcp->lane_out.lstate); 4400 goto send_pkt_exit; 4401 } 4402 4403 size = msgsize(mp); 4404 4405 /* frame size bigger than available payload len of raw data msg ? */ 4406 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4407 (void) atomic_inc_32(&statsp->tx_pri_fail); 4408 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4409 ldcp->ldc_id, size); 4410 goto send_pkt_exit; 4411 } 4412 4413 if (size < ETHERMIN) 4414 size = ETHERMIN; 4415 4416 /* alloc space for a raw data message */ 4417 nmp = vio_allocb(vswp->pri_tx_vmp); 4418 if (nmp == NULL) { 4419 (void) atomic_inc_32(&statsp->tx_pri_fail); 4420 DWARN(vswp, "vio_allocb failed\n"); 4421 goto send_pkt_exit; 4422 } 4423 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4424 4425 /* copy frame into the payload of raw data message */ 4426 dst = (caddr_t)pkt->data; 4427 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4428 mblksz = MBLKL(bp); 4429 bcopy(bp->b_rptr, dst, mblksz); 4430 dst += mblksz; 4431 } 4432 4433 /* setup the raw data msg */ 4434 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4435 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4436 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4437 pkt->tag.vio_sid = ldcp->local_session; 4438 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4439 4440 /* send the msg over ldc */ 4441 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4442 if (rv != 0) { 4443 (void) atomic_inc_32(&statsp->tx_pri_fail); 4444 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4445 ldcp->ldc_id); 4446 goto send_pkt_exit; 4447 } 4448 4449 /* update stats */ 4450 (void) atomic_inc_64(&statsp->tx_pri_packets); 4451 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4452 4453 send_pkt_exit: 4454 if (nmp != NULL) 4455 freemsg(nmp); 4456 freemsg(mp); 4457 } 4458 4459 /* 4460 * Transmit the packet over the given LDC channel. 4461 * 4462 * The 'retries' argument indicates how many times a packet 4463 * is retried before it is dropped. Note, the retry is done 4464 * only for a resource related failure, for all other failures 4465 * the packet is dropped immediately. 4466 */ 4467 static int 4468 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4469 { 4470 int i; 4471 int rc; 4472 int status = 0; 4473 vsw_port_t *port = ldcp->ldc_port; 4474 dring_info_t *dp = NULL; 4475 4476 4477 for (i = 0; i < retries; ) { 4478 /* 4479 * Send the message out using the appropriate 4480 * transmit function which will free mblock when it 4481 * is finished with it. 4482 */ 4483 mutex_enter(&port->tx_lock); 4484 if (port->transmit != NULL) { 4485 status = (*port->transmit)(ldcp, mp); 4486 } 4487 if (status == LDC_TX_SUCCESS) { 4488 mutex_exit(&port->tx_lock); 4489 break; 4490 } 4491 i++; /* increment the counter here */ 4492 4493 /* If its the last retry, then update the oerror */ 4494 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4495 ldcp->ldc_stats.oerrors++; 4496 } 4497 mutex_exit(&port->tx_lock); 4498 4499 if (status != LDC_TX_NORESOURCES) { 4500 /* 4501 * No retrying required for errors un-related 4502 * to resources. 4503 */ 4504 break; 4505 } 4506 READ_ENTER(&ldcp->lane_out.dlistrw); 4507 if (((dp = ldcp->lane_out.dringp) != NULL) && 4508 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4509 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4510 ((VSW_VER_LT(ldcp, 1, 2) && 4511 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4512 rc = vsw_reclaim_dring(dp, dp->end_idx); 4513 } else { 4514 /* 4515 * If there is no dring or the xfer_mode is 4516 * set to DESC_MODE(ie., OBP), then simply break here. 4517 */ 4518 RW_EXIT(&ldcp->lane_out.dlistrw); 4519 break; 4520 } 4521 RW_EXIT(&ldcp->lane_out.dlistrw); 4522 4523 /* 4524 * Delay only if none were reclaimed 4525 * and its not the last retry. 4526 */ 4527 if ((rc == 0) && (i < retries)) { 4528 delay(drv_usectohz(vsw_ldc_tx_delay)); 4529 } 4530 } 4531 freemsg(mp); 4532 return (status); 4533 } 4534 4535 /* 4536 * Send packet out via descriptor ring to a logical device. 4537 */ 4538 static int 4539 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4540 { 4541 vio_dring_msg_t dring_pkt; 4542 dring_info_t *dp = NULL; 4543 vsw_private_desc_t *priv_desc = NULL; 4544 vnet_public_desc_t *pub = NULL; 4545 vsw_t *vswp = ldcp->ldc_vswp; 4546 mblk_t *bp; 4547 size_t n, size; 4548 caddr_t bufp; 4549 int idx; 4550 int status = LDC_TX_SUCCESS; 4551 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4552 lane_t *lp = &ldcp->lane_out; 4553 4554 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4555 4556 /* TODO: make test a macro */ 4557 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4558 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4559 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4560 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4561 ldcp->lane_out.lstate); 4562 ldcp->ldc_stats.oerrors++; 4563 return (LDC_TX_FAILURE); 4564 } 4565 4566 /* 4567 * Note - using first ring only, this may change 4568 * in the future. 4569 */ 4570 READ_ENTER(&ldcp->lane_out.dlistrw); 4571 if ((dp = ldcp->lane_out.dringp) == NULL) { 4572 RW_EXIT(&ldcp->lane_out.dlistrw); 4573 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4574 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4575 ldcp->ldc_stats.oerrors++; 4576 return (LDC_TX_FAILURE); 4577 } 4578 4579 size = msgsize(mp); 4580 if (size > (size_t)lp->mtu) { 4581 RW_EXIT(&ldcp->lane_out.dlistrw); 4582 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4583 ldcp->ldc_id, size); 4584 ldcp->ldc_stats.oerrors++; 4585 return (LDC_TX_FAILURE); 4586 } 4587 4588 /* 4589 * Find a free descriptor 4590 * 4591 * Note: for the moment we are assuming that we will only 4592 * have one dring going from the switch to each of its 4593 * peers. This may change in the future. 4594 */ 4595 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4596 D2(vswp, "%s(%lld): no descriptor available for ring " 4597 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4598 4599 /* nothing more we can do */ 4600 status = LDC_TX_NORESOURCES; 4601 ldcp->ldc_stats.tx_no_desc++; 4602 goto vsw_dringsend_free_exit; 4603 } else { 4604 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4605 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4606 } 4607 4608 /* copy data into the descriptor */ 4609 bufp = priv_desc->datap; 4610 bufp += VNET_IPALIGN; 4611 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4612 n = MBLKL(bp); 4613 bcopy(bp->b_rptr, bufp, n); 4614 bufp += n; 4615 } 4616 4617 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4618 4619 pub = priv_desc->descp; 4620 pub->nbytes = priv_desc->datalen; 4621 4622 /* update statistics */ 4623 if (IS_BROADCAST(ehp)) 4624 ldcp->ldc_stats.brdcstxmt++; 4625 else if (IS_MULTICAST(ehp)) 4626 ldcp->ldc_stats.multixmt++; 4627 ldcp->ldc_stats.opackets++; 4628 ldcp->ldc_stats.obytes += priv_desc->datalen; 4629 4630 mutex_enter(&priv_desc->dstate_lock); 4631 pub->hdr.dstate = VIO_DESC_READY; 4632 mutex_exit(&priv_desc->dstate_lock); 4633 4634 /* 4635 * Determine whether or not we need to send a message to our 4636 * peer prompting them to read our newly updated descriptor(s). 4637 */ 4638 mutex_enter(&dp->restart_lock); 4639 if (dp->restart_reqd) { 4640 dp->restart_reqd = B_FALSE; 4641 ldcp->ldc_stats.dring_data_msgs++; 4642 mutex_exit(&dp->restart_lock); 4643 4644 /* 4645 * Send a vio_dring_msg to peer to prompt them to read 4646 * the updated descriptor ring. 4647 */ 4648 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4649 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4650 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4651 dring_pkt.tag.vio_sid = ldcp->local_session; 4652 4653 /* Note - for now using first ring */ 4654 dring_pkt.dring_ident = dp->ident; 4655 4656 /* 4657 * If last_ack_recv is -1 then we know we've not 4658 * received any ack's yet, so this must be the first 4659 * msg sent, so set the start to the begining of the ring. 4660 */ 4661 mutex_enter(&dp->dlock); 4662 if (dp->last_ack_recv == -1) { 4663 dring_pkt.start_idx = 0; 4664 } else { 4665 dring_pkt.start_idx = 4666 (dp->last_ack_recv + 1) % dp->num_descriptors; 4667 } 4668 dring_pkt.end_idx = -1; 4669 mutex_exit(&dp->dlock); 4670 4671 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4672 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4673 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4674 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4675 dring_pkt.end_idx); 4676 4677 RW_EXIT(&ldcp->lane_out.dlistrw); 4678 4679 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4680 sizeof (vio_dring_msg_t), B_TRUE); 4681 4682 return (status); 4683 4684 } else { 4685 mutex_exit(&dp->restart_lock); 4686 D2(vswp, "%s(%lld): updating descp %d", __func__, 4687 ldcp->ldc_id, idx); 4688 } 4689 4690 vsw_dringsend_free_exit: 4691 4692 RW_EXIT(&ldcp->lane_out.dlistrw); 4693 4694 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4695 return (status); 4696 } 4697 4698 /* 4699 * Send an in-band descriptor message over ldc. 4700 */ 4701 static int 4702 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4703 { 4704 vsw_t *vswp = ldcp->ldc_vswp; 4705 vnet_ibnd_desc_t ibnd_msg; 4706 vsw_private_desc_t *priv_desc = NULL; 4707 dring_info_t *dp = NULL; 4708 size_t n, size = 0; 4709 caddr_t bufp; 4710 mblk_t *bp; 4711 int idx, i; 4712 int status = LDC_TX_SUCCESS; 4713 static int warn_msg = 1; 4714 lane_t *lp = &ldcp->lane_out; 4715 4716 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4717 4718 ASSERT(mp != NULL); 4719 4720 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4721 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4722 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4723 __func__, ldcp->ldc_id, ldcp->ldc_status, 4724 ldcp->lane_out.lstate); 4725 ldcp->ldc_stats.oerrors++; 4726 return (LDC_TX_FAILURE); 4727 } 4728 4729 /* 4730 * only expect single dring to exist, which we use 4731 * as an internal buffer, rather than a transfer channel. 4732 */ 4733 READ_ENTER(&ldcp->lane_out.dlistrw); 4734 if ((dp = ldcp->lane_out.dringp) == NULL) { 4735 DERR(vswp, "%s(%lld): no dring for outbound lane", 4736 __func__, ldcp->ldc_id); 4737 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4738 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4739 RW_EXIT(&ldcp->lane_out.dlistrw); 4740 ldcp->ldc_stats.oerrors++; 4741 return (LDC_TX_FAILURE); 4742 } 4743 4744 size = msgsize(mp); 4745 if (size > (size_t)lp->mtu) { 4746 RW_EXIT(&ldcp->lane_out.dlistrw); 4747 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4748 ldcp->ldc_id, size); 4749 ldcp->ldc_stats.oerrors++; 4750 return (LDC_TX_FAILURE); 4751 } 4752 4753 /* 4754 * Find a free descriptor in our buffer ring 4755 */ 4756 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4757 RW_EXIT(&ldcp->lane_out.dlistrw); 4758 if (warn_msg) { 4759 DERR(vswp, "%s(%lld): no descriptor available for ring " 4760 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4761 warn_msg = 0; 4762 } 4763 4764 /* nothing more we can do */ 4765 status = LDC_TX_NORESOURCES; 4766 goto vsw_descrsend_free_exit; 4767 } else { 4768 D2(vswp, "%s(%lld): free private descriptor found at pos " 4769 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4770 warn_msg = 1; 4771 } 4772 4773 /* copy data into the descriptor */ 4774 bufp = priv_desc->datap; 4775 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4776 n = MBLKL(bp); 4777 bcopy(bp->b_rptr, bufp, n); 4778 bufp += n; 4779 } 4780 4781 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4782 4783 /* create and send the in-band descp msg */ 4784 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4785 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4786 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4787 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4788 4789 /* 4790 * Copy the mem cookies describing the data from the 4791 * private region of the descriptor ring into the inband 4792 * descriptor. 4793 */ 4794 for (i = 0; i < priv_desc->ncookies; i++) { 4795 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4796 sizeof (ldc_mem_cookie_t)); 4797 } 4798 4799 ibnd_msg.hdr.desc_handle = idx; 4800 ibnd_msg.ncookies = priv_desc->ncookies; 4801 ibnd_msg.nbytes = size; 4802 4803 ldcp->ldc_stats.opackets++; 4804 ldcp->ldc_stats.obytes += size; 4805 4806 RW_EXIT(&ldcp->lane_out.dlistrw); 4807 4808 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4809 sizeof (vnet_ibnd_desc_t), B_TRUE); 4810 4811 vsw_descrsend_free_exit: 4812 4813 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4814 return (status); 4815 } 4816 4817 static void 4818 vsw_send_ver(void *arg) 4819 { 4820 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4821 vsw_t *vswp = ldcp->ldc_vswp; 4822 lane_t *lp = &ldcp->lane_out; 4823 vio_ver_msg_t ver_msg; 4824 4825 D1(vswp, "%s enter", __func__); 4826 4827 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4828 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4829 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4830 ver_msg.tag.vio_sid = ldcp->local_session; 4831 4832 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4833 ver_msg.ver_major = vsw_versions[0].ver_major; 4834 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4835 } else { 4836 /* use the major,minor that we've ack'd */ 4837 lane_t *lpi = &ldcp->lane_in; 4838 ver_msg.ver_major = lpi->ver_major; 4839 ver_msg.ver_minor = lpi->ver_minor; 4840 } 4841 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4842 4843 lp->lstate |= VSW_VER_INFO_SENT; 4844 lp->ver_major = ver_msg.ver_major; 4845 lp->ver_minor = ver_msg.ver_minor; 4846 4847 DUMP_TAG(ver_msg.tag); 4848 4849 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4850 4851 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4852 } 4853 4854 static void 4855 vsw_send_attr(vsw_ldc_t *ldcp) 4856 { 4857 vsw_t *vswp = ldcp->ldc_vswp; 4858 lane_t *lp = &ldcp->lane_out; 4859 vnet_attr_msg_t attr_msg; 4860 4861 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4862 4863 /* 4864 * Subtype is set to INFO by default 4865 */ 4866 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4867 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4868 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4869 attr_msg.tag.vio_sid = ldcp->local_session; 4870 4871 /* payload copied from default settings for lane */ 4872 attr_msg.mtu = lp->mtu; 4873 attr_msg.addr_type = lp->addr_type; 4874 attr_msg.xfer_mode = lp->xfer_mode; 4875 attr_msg.ack_freq = lp->xfer_mode; 4876 4877 READ_ENTER(&vswp->if_lockrw); 4878 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4879 RW_EXIT(&vswp->if_lockrw); 4880 4881 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4882 4883 DUMP_TAG(attr_msg.tag); 4884 4885 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4886 4887 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4888 } 4889 4890 /* 4891 * Create dring info msg (which also results in the creation of 4892 * a dring). 4893 */ 4894 static vio_dring_reg_msg_t * 4895 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4896 { 4897 vio_dring_reg_msg_t *mp; 4898 dring_info_t *dp; 4899 vsw_t *vswp = ldcp->ldc_vswp; 4900 int rv; 4901 4902 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4903 4904 /* 4905 * If we can't create a dring, obviously no point sending 4906 * a message. 4907 */ 4908 if ((dp = vsw_create_dring(ldcp)) == NULL) 4909 return (NULL); 4910 4911 /* Allocate pools of receive mblks */ 4912 rv = vsw_init_multipools(ldcp, vswp); 4913 if (rv) { 4914 /* 4915 * We do not return failure if receive mblk pools can't be 4916 * allocated, instead allocb(9F) will be used to dynamically 4917 * allocate buffers during receive. 4918 */ 4919 DWARN(vswp, "%s: unable to create free mblk pools for" 4920 " channel %ld (rv %d)", __func__, ldcp->ldc_id, rv); 4921 } 4922 4923 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4924 4925 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4926 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4927 mp->tag.vio_subtype_env = VIO_DRING_REG; 4928 mp->tag.vio_sid = ldcp->local_session; 4929 4930 /* payload */ 4931 mp->num_descriptors = dp->num_descriptors; 4932 mp->descriptor_size = dp->descriptor_size; 4933 mp->options = dp->options; 4934 mp->ncookies = dp->ncookies; 4935 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4936 4937 mp->dring_ident = 0; 4938 4939 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4940 4941 return (mp); 4942 } 4943 4944 static void 4945 vsw_send_dring_info(vsw_ldc_t *ldcp) 4946 { 4947 vio_dring_reg_msg_t *dring_msg; 4948 vsw_t *vswp = ldcp->ldc_vswp; 4949 4950 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4951 4952 dring_msg = vsw_create_dring_info_pkt(ldcp); 4953 if (dring_msg == NULL) { 4954 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4955 vswp->instance, __func__); 4956 return; 4957 } 4958 4959 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4960 4961 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4962 4963 (void) vsw_send_msg(ldcp, dring_msg, 4964 sizeof (vio_dring_reg_msg_t), B_TRUE); 4965 4966 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4967 4968 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4969 } 4970 4971 static void 4972 vsw_send_rdx(vsw_ldc_t *ldcp) 4973 { 4974 vsw_t *vswp = ldcp->ldc_vswp; 4975 vio_rdx_msg_t rdx_msg; 4976 4977 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4978 4979 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4980 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4981 rdx_msg.tag.vio_subtype_env = VIO_RDX; 4982 rdx_msg.tag.vio_sid = ldcp->local_session; 4983 4984 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 4985 4986 DUMP_TAG(rdx_msg.tag); 4987 4988 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 4989 4990 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4991 } 4992 4993 /* 4994 * Generic routine to send message out over ldc channel. 4995 * 4996 * It is possible that when we attempt to write over the ldc channel 4997 * that we get notified that it has been reset. Depending on the value 4998 * of the handle_reset flag we either handle that event here or simply 4999 * notify the caller that the channel was reset. 5000 */ 5001 int 5002 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 5003 { 5004 int rv; 5005 size_t msglen = size; 5006 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5007 vsw_t *vswp = ldcp->ldc_vswp; 5008 vio_dring_msg_t *dmsg; 5009 vio_raw_data_msg_t *rmsg; 5010 vnet_ibnd_desc_t *imsg; 5011 boolean_t data_msg = B_FALSE; 5012 5013 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5014 ldcp->ldc_id, size); 5015 5016 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5017 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5018 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5019 5020 mutex_enter(&ldcp->ldc_txlock); 5021 5022 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 5023 if (tag->vio_subtype_env == VIO_DRING_DATA) { 5024 dmsg = (vio_dring_msg_t *)tag; 5025 dmsg->seq_num = ldcp->lane_out.seq_num; 5026 data_msg = B_TRUE; 5027 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 5028 rmsg = (vio_raw_data_msg_t *)tag; 5029 rmsg->seq_num = ldcp->lane_out.seq_num; 5030 data_msg = B_TRUE; 5031 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 5032 imsg = (vnet_ibnd_desc_t *)tag; 5033 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 5034 data_msg = B_TRUE; 5035 } 5036 } 5037 5038 do { 5039 msglen = size; 5040 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5041 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5042 5043 if (rv == 0 && data_msg == B_TRUE) { 5044 ldcp->lane_out.seq_num++; 5045 } 5046 5047 if ((rv != 0) || (msglen != size)) { 5048 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 5049 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 5050 ldcp->ldc_stats.oerrors++; 5051 } 5052 5053 mutex_exit(&ldcp->ldc_txlock); 5054 5055 /* 5056 * If channel has been reset we either handle it here or 5057 * simply report back that it has been reset and let caller 5058 * decide what to do. 5059 */ 5060 if (rv == ECONNRESET) { 5061 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 5062 5063 /* 5064 * N.B - must never be holding the dlistrw lock when 5065 * we do a reset of the channel. 5066 */ 5067 if (handle_reset) { 5068 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 5069 } 5070 } 5071 5072 return (rv); 5073 } 5074 5075 /* 5076 * Remove the specified address from the list of address maintained 5077 * in this port node. 5078 */ 5079 mcst_addr_t * 5080 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 5081 { 5082 vsw_t *vswp = NULL; 5083 vsw_port_t *port = NULL; 5084 mcst_addr_t *prev_p = NULL; 5085 mcst_addr_t *curr_p = NULL; 5086 5087 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 5088 __func__, devtype, addr); 5089 5090 if (devtype == VSW_VNETPORT) { 5091 port = (vsw_port_t *)arg; 5092 mutex_enter(&port->mca_lock); 5093 prev_p = curr_p = port->mcap; 5094 } else { 5095 vswp = (vsw_t *)arg; 5096 mutex_enter(&vswp->mca_lock); 5097 prev_p = curr_p = vswp->mcap; 5098 } 5099 5100 while (curr_p != NULL) { 5101 if (curr_p->addr == addr) { 5102 D2(NULL, "%s: address found", __func__); 5103 /* match found */ 5104 if (prev_p == curr_p) { 5105 /* list head */ 5106 if (devtype == VSW_VNETPORT) 5107 port->mcap = curr_p->nextp; 5108 else 5109 vswp->mcap = curr_p->nextp; 5110 } else { 5111 prev_p->nextp = curr_p->nextp; 5112 } 5113 break; 5114 } else { 5115 prev_p = curr_p; 5116 curr_p = curr_p->nextp; 5117 } 5118 } 5119 5120 if (devtype == VSW_VNETPORT) 5121 mutex_exit(&port->mca_lock); 5122 else 5123 mutex_exit(&vswp->mca_lock); 5124 5125 D1(NULL, "%s: exit", __func__); 5126 5127 return (curr_p); 5128 } 5129 5130 /* 5131 * Creates a descriptor ring (dring) and links it into the 5132 * link of outbound drings for this channel. 5133 * 5134 * Returns NULL if creation failed. 5135 */ 5136 static dring_info_t * 5137 vsw_create_dring(vsw_ldc_t *ldcp) 5138 { 5139 vsw_private_desc_t *priv_addr = NULL; 5140 vsw_t *vswp = ldcp->ldc_vswp; 5141 ldc_mem_info_t minfo; 5142 dring_info_t *dp, *tp; 5143 int i; 5144 5145 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5146 5147 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5148 5149 /* create public section of ring */ 5150 if ((ldc_mem_dring_create(vsw_ntxds, 5151 VSW_PUB_SIZE, &dp->handle)) != 0) { 5152 5153 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 5154 "failed", ldcp->ldc_id); 5155 goto create_fail_exit; 5156 } 5157 5158 ASSERT(dp->handle != NULL); 5159 5160 /* 5161 * Get the base address of the public section of the ring. 5162 */ 5163 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5164 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 5165 ldcp->ldc_id); 5166 goto dring_fail_exit; 5167 } else { 5168 ASSERT(minfo.vaddr != 0); 5169 dp->pub_addr = minfo.vaddr; 5170 } 5171 5172 dp->num_descriptors = vsw_ntxds; 5173 dp->descriptor_size = VSW_PUB_SIZE; 5174 dp->options = VIO_TX_DRING; 5175 dp->ncookies = 1; /* guaranteed by ldc */ 5176 5177 /* 5178 * create private portion of ring 5179 */ 5180 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 5181 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5182 5183 if (vsw_setup_ring(ldcp, dp)) { 5184 DERR(vswp, "%s: unable to setup ring", __func__); 5185 goto dring_fail_exit; 5186 } 5187 5188 /* haven't used any descriptors yet */ 5189 dp->end_idx = 0; 5190 dp->last_ack_recv = -1; 5191 5192 /* bind dring to the channel */ 5193 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 5194 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 5195 &dp->cookie[0], &dp->ncookies)) != 0) { 5196 DERR(vswp, "vsw_create_dring: unable to bind to channel " 5197 "%lld", ldcp->ldc_id); 5198 goto dring_fail_exit; 5199 } 5200 5201 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5202 dp->restart_reqd = B_TRUE; 5203 5204 /* 5205 * Only ever create rings for outgoing lane. Link it onto 5206 * end of list. 5207 */ 5208 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5209 if (ldcp->lane_out.dringp == NULL) { 5210 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5211 ldcp->lane_out.dringp = dp; 5212 } else { 5213 tp = ldcp->lane_out.dringp; 5214 while (tp->next != NULL) 5215 tp = tp->next; 5216 5217 tp->next = dp; 5218 } 5219 RW_EXIT(&ldcp->lane_out.dlistrw); 5220 5221 return (dp); 5222 5223 dring_fail_exit: 5224 (void) ldc_mem_dring_destroy(dp->handle); 5225 5226 create_fail_exit: 5227 if (dp->priv_addr != NULL) { 5228 priv_addr = dp->priv_addr; 5229 for (i = 0; i < vsw_ntxds; i++) { 5230 if (priv_addr->memhandle != NULL) 5231 (void) ldc_mem_free_handle( 5232 priv_addr->memhandle); 5233 priv_addr++; 5234 } 5235 kmem_free(dp->priv_addr, 5236 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5237 } 5238 mutex_destroy(&dp->dlock); 5239 5240 kmem_free(dp, sizeof (dring_info_t)); 5241 return (NULL); 5242 } 5243 5244 /* 5245 * Create a ring consisting of just a private portion and link 5246 * it into the list of rings for the outbound lane. 5247 * 5248 * These type of rings are used primarily for temporary data 5249 * storage (i.e. as data buffers). 5250 */ 5251 void 5252 vsw_create_privring(vsw_ldc_t *ldcp) 5253 { 5254 dring_info_t *dp, *tp; 5255 vsw_t *vswp = ldcp->ldc_vswp; 5256 5257 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5258 5259 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5260 5261 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5262 5263 /* no public section */ 5264 dp->pub_addr = NULL; 5265 5266 dp->priv_addr = kmem_zalloc( 5267 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5268 5269 dp->num_descriptors = vsw_ntxds; 5270 5271 if (vsw_setup_ring(ldcp, dp)) { 5272 DERR(vswp, "%s: setup of ring failed", __func__); 5273 kmem_free(dp->priv_addr, 5274 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5275 mutex_destroy(&dp->dlock); 5276 kmem_free(dp, sizeof (dring_info_t)); 5277 return; 5278 } 5279 5280 /* haven't used any descriptors yet */ 5281 dp->end_idx = 0; 5282 5283 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5284 dp->restart_reqd = B_TRUE; 5285 5286 /* 5287 * Only ever create rings for outgoing lane. Link it onto 5288 * end of list. 5289 */ 5290 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5291 if (ldcp->lane_out.dringp == NULL) { 5292 D2(vswp, "%s: adding first outbound privring", __func__); 5293 ldcp->lane_out.dringp = dp; 5294 } else { 5295 tp = ldcp->lane_out.dringp; 5296 while (tp->next != NULL) 5297 tp = tp->next; 5298 5299 tp->next = dp; 5300 } 5301 RW_EXIT(&ldcp->lane_out.dlistrw); 5302 5303 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5304 } 5305 5306 /* 5307 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5308 * failure. 5309 */ 5310 int 5311 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5312 { 5313 vnet_public_desc_t *pub_addr = NULL; 5314 vsw_private_desc_t *priv_addr = NULL; 5315 vsw_t *vswp = ldcp->ldc_vswp; 5316 uint64_t *tmpp; 5317 uint64_t offset = 0; 5318 uint32_t ncookies = 0; 5319 static char *name = "vsw_setup_ring"; 5320 int i, j, nc, rv; 5321 size_t data_sz; 5322 void *data_addr; 5323 5324 priv_addr = dp->priv_addr; 5325 pub_addr = dp->pub_addr; 5326 5327 /* public section may be null but private should never be */ 5328 ASSERT(priv_addr != NULL); 5329 5330 /* 5331 * Allocate the region of memory which will be used to hold 5332 * the data the descriptors will refer to. 5333 */ 5334 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5335 5336 /* 5337 * In order to ensure that the number of ldc cookies per descriptor is 5338 * limited to be within the default MAX_COOKIES (2), we take the steps 5339 * outlined below: 5340 * 5341 * Align the entire data buffer area to 8K and carve out per descriptor 5342 * data buffers starting from this 8K aligned base address. 5343 * 5344 * We round up the mtu specified to be a multiple of 2K or 4K. 5345 * For sizes up to 12K we round up the size to the next 2K. 5346 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 5347 * 14K could end up needing 3 cookies, with the buffer spread across 5348 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 5349 */ 5350 if (data_sz <= VNET_12K) { 5351 data_sz = VNET_ROUNDUP_2K(data_sz); 5352 } else { 5353 data_sz = VNET_ROUNDUP_4K(data_sz); 5354 } 5355 5356 dp->desc_data_sz = data_sz; 5357 5358 /* allocate extra 8K bytes for alignment */ 5359 dp->data_sz = (vsw_ntxds * data_sz) + VNET_8K; 5360 data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5361 dp->data_addr = data_addr; 5362 5363 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5364 dp->data_sz, dp->data_addr); 5365 5366 /* align the starting address of the data area to 8K */ 5367 data_addr = (void *)VNET_ROUNDUP_8K((uintptr_t)data_addr); 5368 5369 tmpp = (uint64_t *)data_addr; 5370 offset = dp->desc_data_sz/sizeof (tmpp); 5371 5372 /* 5373 * Initialise some of the private and public (if they exist) 5374 * descriptor fields. 5375 */ 5376 for (i = 0; i < vsw_ntxds; i++) { 5377 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5378 5379 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5380 &priv_addr->memhandle)) != 0) { 5381 DERR(vswp, "%s: alloc mem handle failed", name); 5382 goto setup_ring_cleanup; 5383 } 5384 5385 priv_addr->datap = (void *)tmpp; 5386 5387 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5388 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5389 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5390 &(priv_addr->memcookie[0]), &ncookies); 5391 if (rv != 0) { 5392 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5393 "(rv %d)", name, ldcp->ldc_id, rv); 5394 goto setup_ring_cleanup; 5395 } 5396 priv_addr->bound = 1; 5397 5398 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5399 name, i, priv_addr->memcookie[0].addr, 5400 priv_addr->memcookie[0].size); 5401 5402 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5403 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5404 "invalid num of cookies (%d) for size 0x%llx", 5405 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5406 5407 goto setup_ring_cleanup; 5408 } else { 5409 for (j = 1; j < ncookies; j++) { 5410 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5411 &(priv_addr->memcookie[j])); 5412 if (rv != 0) { 5413 DERR(vswp, "%s: ldc_mem_nextcookie " 5414 "failed rv (%d)", name, rv); 5415 goto setup_ring_cleanup; 5416 } 5417 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5418 "size 0x%llx", name, j, 5419 priv_addr->memcookie[j].addr, 5420 priv_addr->memcookie[j].size); 5421 } 5422 5423 } 5424 priv_addr->ncookies = ncookies; 5425 priv_addr->dstate = VIO_DESC_FREE; 5426 5427 if (pub_addr != NULL) { 5428 5429 /* link pub and private sides */ 5430 priv_addr->descp = pub_addr; 5431 5432 pub_addr->ncookies = priv_addr->ncookies; 5433 5434 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5435 bcopy(&priv_addr->memcookie[nc], 5436 &pub_addr->memcookie[nc], 5437 sizeof (ldc_mem_cookie_t)); 5438 } 5439 5440 pub_addr->hdr.dstate = VIO_DESC_FREE; 5441 pub_addr++; 5442 } 5443 5444 /* 5445 * move to next element in the dring and the next 5446 * position in the data buffer. 5447 */ 5448 priv_addr++; 5449 tmpp += offset; 5450 } 5451 5452 return (0); 5453 5454 setup_ring_cleanup: 5455 priv_addr = dp->priv_addr; 5456 5457 for (j = 0; j < i; j++) { 5458 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5459 (void) ldc_mem_free_handle(priv_addr->memhandle); 5460 5461 mutex_destroy(&priv_addr->dstate_lock); 5462 5463 priv_addr++; 5464 } 5465 kmem_free(dp->data_addr, dp->data_sz); 5466 5467 return (1); 5468 } 5469 5470 /* 5471 * Searches the private section of a ring for a free descriptor, 5472 * starting at the location of the last free descriptor found 5473 * previously. 5474 * 5475 * Returns 0 if free descriptor is available, and updates state 5476 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5477 * 5478 * FUTURE: might need to return contiguous range of descriptors 5479 * as dring info msg assumes all will be contiguous. 5480 */ 5481 static int 5482 vsw_dring_find_free_desc(dring_info_t *dringp, 5483 vsw_private_desc_t **priv_p, int *idx) 5484 { 5485 vsw_private_desc_t *addr = NULL; 5486 int num = vsw_ntxds; 5487 int ret = 1; 5488 5489 D1(NULL, "%s enter\n", __func__); 5490 5491 ASSERT(dringp->priv_addr != NULL); 5492 5493 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5494 __func__, dringp, dringp->end_idx); 5495 5496 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5497 5498 mutex_enter(&addr->dstate_lock); 5499 if (addr->dstate == VIO_DESC_FREE) { 5500 addr->dstate = VIO_DESC_READY; 5501 *priv_p = addr; 5502 *idx = dringp->end_idx; 5503 dringp->end_idx = (dringp->end_idx + 1) % num; 5504 ret = 0; 5505 5506 } 5507 mutex_exit(&addr->dstate_lock); 5508 5509 /* ring full */ 5510 if (ret == 1) { 5511 D2(NULL, "%s: no desp free: started at %d", __func__, 5512 dringp->end_idx); 5513 } 5514 5515 D1(NULL, "%s: exit\n", __func__); 5516 5517 return (ret); 5518 } 5519 5520 /* 5521 * Map from a dring identifier to the ring itself. Returns 5522 * pointer to ring or NULL if no match found. 5523 * 5524 * Should be called with dlistrw rwlock held as reader. 5525 */ 5526 static dring_info_t * 5527 vsw_ident2dring(lane_t *lane, uint64_t ident) 5528 { 5529 dring_info_t *dp = NULL; 5530 5531 if ((dp = lane->dringp) == NULL) { 5532 return (NULL); 5533 } else { 5534 if (dp->ident == ident) 5535 return (dp); 5536 5537 while (dp != NULL) { 5538 if (dp->ident == ident) 5539 break; 5540 dp = dp->next; 5541 } 5542 } 5543 5544 return (dp); 5545 } 5546 5547 /* 5548 * Set the default lane attributes. These are copied into 5549 * the attr msg we send to our peer. If they are not acceptable 5550 * then (currently) the handshake ends. 5551 */ 5552 static void 5553 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5554 { 5555 bzero(lp, sizeof (lane_t)); 5556 5557 READ_ENTER(&vswp->if_lockrw); 5558 ether_copy(&(vswp->if_addr), &(lp->addr)); 5559 RW_EXIT(&vswp->if_lockrw); 5560 5561 lp->mtu = vswp->max_frame_size; 5562 lp->addr_type = ADDR_TYPE_MAC; 5563 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5564 lp->ack_freq = 0; /* for shared mode */ 5565 lp->seq_num = VNET_ISS; 5566 } 5567 5568 /* 5569 * Verify that the attributes are acceptable. 5570 * 5571 * FUTURE: If some attributes are not acceptable, change them 5572 * our desired values. 5573 */ 5574 static int 5575 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5576 { 5577 int ret = 0; 5578 struct ether_addr ea; 5579 vsw_port_t *port = ldcp->ldc_port; 5580 lane_t *lp = &ldcp->lane_out; 5581 5582 D1(NULL, "vsw_check_attr enter\n"); 5583 5584 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5585 (pkt->xfer_mode != lp->xfer_mode)) { 5586 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5587 ret = 1; 5588 } 5589 5590 /* Only support MAC addresses at moment. */ 5591 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5592 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5593 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5594 ret = 1; 5595 } 5596 5597 /* 5598 * MAC address supplied by device should match that stored 5599 * in the vsw-port OBP node. Need to decide what to do if they 5600 * don't match, for the moment just warn but don't fail. 5601 */ 5602 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5603 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5604 DERR(NULL, "vsw_check_attr: device supplied address " 5605 "0x%llx doesn't match node address 0x%llx\n", 5606 pkt->addr, port->p_macaddr); 5607 } 5608 5609 /* 5610 * Ack freq only makes sense in pkt mode, in shared 5611 * mode the ring descriptors say whether or not to 5612 * send back an ACK. 5613 */ 5614 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5615 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5616 (VSW_VER_LT(ldcp, 1, 2) && 5617 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5618 if (pkt->ack_freq > 0) { 5619 D2(NULL, "vsw_check_attr: non zero ack freq " 5620 " in SHM mode\n"); 5621 ret = 1; 5622 } 5623 } 5624 5625 if (VSW_VER_LT(ldcp, 1, 4)) { 5626 /* versions < 1.4, mtu must match */ 5627 if (pkt->mtu != lp->mtu) { 5628 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5629 pkt->mtu); 5630 ret = 1; 5631 } 5632 } else { 5633 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5634 if (pkt->mtu < ETHERMAX) { 5635 ret = 1; 5636 } 5637 } 5638 5639 D1(NULL, "vsw_check_attr exit\n"); 5640 5641 return (ret); 5642 } 5643 5644 /* 5645 * Returns 1 if there is a problem, 0 otherwise. 5646 */ 5647 static int 5648 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5649 { 5650 _NOTE(ARGUNUSED(pkt)) 5651 5652 int ret = 0; 5653 5654 D1(NULL, "vsw_check_dring_info enter\n"); 5655 5656 if ((pkt->num_descriptors == 0) || 5657 (pkt->descriptor_size == 0) || 5658 (pkt->ncookies != 1)) { 5659 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5660 ret = 1; 5661 } 5662 5663 D1(NULL, "vsw_check_dring_info exit\n"); 5664 5665 return (ret); 5666 } 5667 5668 /* 5669 * Returns 1 if two memory cookies match. Otherwise returns 0. 5670 */ 5671 static int 5672 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5673 { 5674 if ((m1->addr != m2->addr) || 5675 (m2->size != m2->size)) { 5676 return (0); 5677 } else { 5678 return (1); 5679 } 5680 } 5681 5682 /* 5683 * Returns 1 if ring described in reg message matches that 5684 * described by dring_info structure. Otherwise returns 0. 5685 */ 5686 static int 5687 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5688 { 5689 if ((msg->descriptor_size != dp->descriptor_size) || 5690 (msg->num_descriptors != dp->num_descriptors) || 5691 (msg->ncookies != dp->ncookies) || 5692 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5693 return (0); 5694 } else { 5695 return (1); 5696 } 5697 5698 } 5699 5700 /* 5701 * Reset and free all the resources associated with 5702 * the channel. 5703 */ 5704 static void 5705 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5706 { 5707 dring_info_t *dp, *dpp; 5708 lane_t *lp = NULL; 5709 5710 ASSERT(ldcp != NULL); 5711 5712 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5713 5714 if (dir == INBOUND) { 5715 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5716 " of channel %lld", __func__, ldcp->ldc_id); 5717 lp = &ldcp->lane_in; 5718 } else { 5719 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5720 " of channel %lld", __func__, ldcp->ldc_id); 5721 lp = &ldcp->lane_out; 5722 } 5723 5724 lp->lstate = VSW_LANE_INACTIV; 5725 lp->seq_num = VNET_ISS; 5726 5727 if (lp->dringp) { 5728 if (dir == INBOUND) { 5729 WRITE_ENTER(&lp->dlistrw); 5730 dp = lp->dringp; 5731 while (dp != NULL) { 5732 dpp = dp->next; 5733 if (dp->handle != NULL) 5734 (void) ldc_mem_dring_unmap(dp->handle); 5735 kmem_free(dp, sizeof (dring_info_t)); 5736 dp = dpp; 5737 } 5738 RW_EXIT(&lp->dlistrw); 5739 } else { 5740 /* 5741 * unbind, destroy exported dring, free dring struct 5742 */ 5743 WRITE_ENTER(&lp->dlistrw); 5744 dp = lp->dringp; 5745 vsw_free_ring(dp); 5746 RW_EXIT(&lp->dlistrw); 5747 } 5748 lp->dringp = NULL; 5749 } 5750 5751 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5752 } 5753 5754 /* 5755 * Free ring and all associated resources. 5756 * 5757 * Should be called with dlistrw rwlock held as writer. 5758 */ 5759 static void 5760 vsw_free_ring(dring_info_t *dp) 5761 { 5762 vsw_private_desc_t *paddr = NULL; 5763 dring_info_t *dpp; 5764 int i; 5765 5766 while (dp != NULL) { 5767 mutex_enter(&dp->dlock); 5768 dpp = dp->next; 5769 if (dp->priv_addr != NULL) { 5770 /* 5771 * First unbind and free the memory handles 5772 * stored in each descriptor within the ring. 5773 */ 5774 for (i = 0; i < vsw_ntxds; i++) { 5775 paddr = (vsw_private_desc_t *) 5776 dp->priv_addr + i; 5777 if (paddr->memhandle != NULL) { 5778 if (paddr->bound == 1) { 5779 if (ldc_mem_unbind_handle( 5780 paddr->memhandle) != 0) { 5781 DERR(NULL, "error " 5782 "unbinding handle for " 5783 "ring 0x%llx at pos %d", 5784 dp, i); 5785 continue; 5786 } 5787 paddr->bound = 0; 5788 } 5789 5790 if (ldc_mem_free_handle( 5791 paddr->memhandle) != 0) { 5792 DERR(NULL, "error freeing " 5793 "handle for ring 0x%llx " 5794 "at pos %d", dp, i); 5795 continue; 5796 } 5797 paddr->memhandle = NULL; 5798 } 5799 mutex_destroy(&paddr->dstate_lock); 5800 } 5801 kmem_free(dp->priv_addr, 5802 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5803 } 5804 5805 /* 5806 * Now unbind and destroy the ring itself. 5807 */ 5808 if (dp->handle != NULL) { 5809 (void) ldc_mem_dring_unbind(dp->handle); 5810 (void) ldc_mem_dring_destroy(dp->handle); 5811 } 5812 5813 if (dp->data_addr != NULL) { 5814 kmem_free(dp->data_addr, dp->data_sz); 5815 } 5816 5817 mutex_exit(&dp->dlock); 5818 mutex_destroy(&dp->dlock); 5819 mutex_destroy(&dp->restart_lock); 5820 kmem_free(dp, sizeof (dring_info_t)); 5821 5822 dp = dpp; 5823 } 5824 } 5825 5826 /* 5827 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5828 * This thread is woken up by the LDC interrupt handler to process 5829 * LDC packets and receive data. 5830 */ 5831 static void 5832 vsw_ldc_rx_worker(void *arg) 5833 { 5834 callb_cpr_t cprinfo; 5835 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5836 vsw_t *vswp = ldcp->ldc_vswp; 5837 5838 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5839 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5840 "vsw_rx_thread"); 5841 mutex_enter(&ldcp->rx_thr_lock); 5842 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5843 5844 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5845 /* 5846 * Wait until the data is received or a stop 5847 * request is received. 5848 */ 5849 while (!(ldcp->rx_thr_flags & 5850 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5851 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5852 } 5853 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5854 5855 /* 5856 * First process the stop request. 5857 */ 5858 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5859 D2(vswp, "%s(%lld):Rx thread stopped\n", 5860 __func__, ldcp->ldc_id); 5861 break; 5862 } 5863 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5864 mutex_exit(&ldcp->rx_thr_lock); 5865 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5866 __func__, ldcp->ldc_id); 5867 mutex_enter(&ldcp->ldc_cblock); 5868 vsw_process_pkt(ldcp); 5869 mutex_exit(&ldcp->ldc_cblock); 5870 mutex_enter(&ldcp->rx_thr_lock); 5871 } 5872 5873 /* 5874 * Update the run status and wakeup the thread that 5875 * has sent the stop request. 5876 */ 5877 ldcp->rx_thr_flags &= ~VSW_WTHR_STOP; 5878 ldcp->rx_thread = NULL; 5879 CALLB_CPR_EXIT(&cprinfo); 5880 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5881 thread_exit(); 5882 } 5883 5884 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5885 static void 5886 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5887 { 5888 kt_did_t tid = 0; 5889 vsw_t *vswp = ldcp->ldc_vswp; 5890 5891 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5892 /* 5893 * Send a stop request by setting the stop flag and 5894 * wait until the receive thread stops. 5895 */ 5896 mutex_enter(&ldcp->rx_thr_lock); 5897 if (ldcp->rx_thread != NULL) { 5898 tid = ldcp->rx_thread->t_did; 5899 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5900 cv_signal(&ldcp->rx_thr_cv); 5901 } 5902 mutex_exit(&ldcp->rx_thr_lock); 5903 5904 if (tid != 0) { 5905 thread_join(tid); 5906 } 5907 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5908 } 5909 5910 /* 5911 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5912 * This thread is woken up by the vsw_portsend to transmit 5913 * packets. 5914 */ 5915 static void 5916 vsw_ldc_tx_worker(void *arg) 5917 { 5918 callb_cpr_t cprinfo; 5919 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5920 vsw_t *vswp = ldcp->ldc_vswp; 5921 mblk_t *mp; 5922 mblk_t *tmp; 5923 5924 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5925 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5926 "vnet_tx_thread"); 5927 mutex_enter(&ldcp->tx_thr_lock); 5928 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5929 5930 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5931 /* 5932 * Wait until the data is received or a stop 5933 * request is received. 5934 */ 5935 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5936 (ldcp->tx_mhead == NULL)) { 5937 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5938 } 5939 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5940 5941 /* 5942 * First process the stop request. 5943 */ 5944 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5945 D2(vswp, "%s(%lld):tx thread stopped\n", 5946 __func__, ldcp->ldc_id); 5947 break; 5948 } 5949 mp = ldcp->tx_mhead; 5950 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5951 ldcp->tx_cnt = 0; 5952 mutex_exit(&ldcp->tx_thr_lock); 5953 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5954 __func__, ldcp->ldc_id); 5955 while (mp != NULL) { 5956 tmp = mp->b_next; 5957 mp->b_next = mp->b_prev = NULL; 5958 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5959 mp = tmp; 5960 } 5961 mutex_enter(&ldcp->tx_thr_lock); 5962 } 5963 5964 /* 5965 * Update the run status and wakeup the thread that 5966 * has sent the stop request. 5967 */ 5968 ldcp->tx_thr_flags &= ~VSW_WTHR_STOP; 5969 ldcp->tx_thread = NULL; 5970 CALLB_CPR_EXIT(&cprinfo); 5971 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5972 thread_exit(); 5973 } 5974 5975 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 5976 static void 5977 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 5978 { 5979 kt_did_t tid = 0; 5980 vsw_t *vswp = ldcp->ldc_vswp; 5981 5982 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5983 /* 5984 * Send a stop request by setting the stop flag and 5985 * wait until the receive thread stops. 5986 */ 5987 mutex_enter(&ldcp->tx_thr_lock); 5988 if (ldcp->tx_thread != NULL) { 5989 tid = ldcp->tx_thread->t_did; 5990 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 5991 cv_signal(&ldcp->tx_thr_cv); 5992 } 5993 mutex_exit(&ldcp->tx_thr_lock); 5994 5995 if (tid != 0) { 5996 thread_join(tid); 5997 } 5998 5999 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6000 } 6001 6002 /* vsw_reclaim_dring -- reclaim descriptors */ 6003 static int 6004 vsw_reclaim_dring(dring_info_t *dp, int start) 6005 { 6006 int i, j, len; 6007 vsw_private_desc_t *priv_addr; 6008 vnet_public_desc_t *pub_addr; 6009 6010 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 6011 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6012 len = dp->num_descriptors; 6013 6014 D2(NULL, "%s: start index %ld\n", __func__, start); 6015 6016 j = 0; 6017 for (i = start; j < len; i = (i + 1) % len, j++) { 6018 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 6019 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6020 6021 mutex_enter(&priv_addr->dstate_lock); 6022 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 6023 mutex_exit(&priv_addr->dstate_lock); 6024 break; 6025 } 6026 pub_addr->hdr.dstate = VIO_DESC_FREE; 6027 priv_addr->dstate = VIO_DESC_FREE; 6028 /* clear all the fields */ 6029 priv_addr->datalen = 0; 6030 pub_addr->hdr.ack = 0; 6031 mutex_exit(&priv_addr->dstate_lock); 6032 6033 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 6034 i, pub_addr->hdr.dstate, priv_addr->dstate); 6035 } 6036 return (j); 6037 } 6038 6039 /* 6040 * Debugging routines 6041 */ 6042 static void 6043 display_state(void) 6044 { 6045 vsw_t *vswp; 6046 vsw_port_list_t *plist; 6047 vsw_port_t *port; 6048 vsw_ldc_list_t *ldcl; 6049 vsw_ldc_t *ldcp; 6050 extern vsw_t *vsw_head; 6051 6052 cmn_err(CE_NOTE, "***** system state *****"); 6053 6054 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6055 plist = &vswp->plist; 6056 READ_ENTER(&plist->lockrw); 6057 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6058 vswp->instance, plist->num_ports); 6059 6060 for (port = plist->head; port != NULL; port = port->p_next) { 6061 ldcl = &port->p_ldclist; 6062 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6063 port->p_instance, port->num_ldcs); 6064 READ_ENTER(&ldcl->lockrw); 6065 ldcp = ldcl->head; 6066 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6067 cmn_err(CE_CONT, "chan %lu : dev %d : " 6068 "status %d : phase %u\n", 6069 ldcp->ldc_id, ldcp->dev_class, 6070 ldcp->ldc_status, ldcp->hphase); 6071 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6072 "psession %lu\n", ldcp->ldc_id, 6073 ldcp->local_session, ldcp->peer_session); 6074 6075 cmn_err(CE_CONT, "Inbound lane:\n"); 6076 display_lane(&ldcp->lane_in); 6077 cmn_err(CE_CONT, "Outbound lane:\n"); 6078 display_lane(&ldcp->lane_out); 6079 } 6080 RW_EXIT(&ldcl->lockrw); 6081 } 6082 RW_EXIT(&plist->lockrw); 6083 } 6084 cmn_err(CE_NOTE, "***** system state *****"); 6085 } 6086 6087 static void 6088 display_lane(lane_t *lp) 6089 { 6090 dring_info_t *drp; 6091 6092 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6093 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6094 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6095 lp->addr_type, lp->addr, lp->xfer_mode); 6096 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6097 6098 cmn_err(CE_CONT, "Dring info:\n"); 6099 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6100 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6101 drp->num_descriptors, drp->descriptor_size); 6102 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6103 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6104 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6105 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6106 drp->ident, drp->end_idx); 6107 display_ring(drp); 6108 } 6109 } 6110 6111 static void 6112 display_ring(dring_info_t *dringp) 6113 { 6114 uint64_t i; 6115 uint64_t priv_count = 0; 6116 uint64_t pub_count = 0; 6117 vnet_public_desc_t *pub_addr = NULL; 6118 vsw_private_desc_t *priv_addr = NULL; 6119 6120 for (i = 0; i < vsw_ntxds; i++) { 6121 if (dringp->pub_addr != NULL) { 6122 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6123 6124 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6125 pub_count++; 6126 } 6127 6128 if (dringp->priv_addr != NULL) { 6129 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6130 6131 if (priv_addr->dstate == VIO_DESC_FREE) 6132 priv_count++; 6133 } 6134 } 6135 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6136 i, priv_count, pub_count); 6137 } 6138 6139 static void 6140 dump_flags(uint64_t state) 6141 { 6142 int i; 6143 6144 typedef struct flag_name { 6145 int flag_val; 6146 char *flag_name; 6147 } flag_name_t; 6148 6149 flag_name_t flags[] = { 6150 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6151 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6152 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6153 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6154 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6155 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6156 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6157 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6158 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6159 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6160 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6161 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6162 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6163 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6164 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6165 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6166 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6167 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6168 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6169 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6170 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6171 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6172 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6173 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6174 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6175 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6176 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6177 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6178 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6179 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6180 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6181 6182 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6183 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6184 if (state & flags[i].flag_val) 6185 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6186 } 6187 } 6188