1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* Port add/deletion/etc routines */ 75 static int vsw_port_delete(vsw_port_t *port); 76 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 77 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 78 static int vsw_init_ldcs(vsw_port_t *port); 79 static int vsw_uninit_ldcs(vsw_port_t *port); 80 static int vsw_ldc_init(vsw_ldc_t *ldcp); 81 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 82 static int vsw_drain_ldcs(vsw_port_t *port); 83 static int vsw_drain_port_taskq(vsw_port_t *port); 84 static void vsw_marker_task(void *); 85 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 86 int vsw_detach_ports(vsw_t *vswp); 87 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 88 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 89 int vsw_port_detach(vsw_t *vswp, int p_instance); 90 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 91 int vsw_port_attach(vsw_port_t *portp); 92 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 93 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 94 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 95 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 96 void vsw_reset_ports(vsw_t *vswp); 97 void vsw_port_reset(vsw_port_t *portp); 98 99 /* Interrupt routines */ 100 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 101 102 /* Handshake routines */ 103 static void vsw_ldc_reinit(vsw_ldc_t *); 104 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 105 static void vsw_conn_task(void *); 106 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 107 static void vsw_next_milestone(vsw_ldc_t *); 108 static int vsw_supported_version(vio_ver_msg_t *); 109 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 110 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 111 112 /* Data processing routines */ 113 static void vsw_process_pkt(void *); 114 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 115 static void vsw_process_ctrl_pkt(void *); 116 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 117 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 123 uint32_t); 124 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 125 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 126 static void vsw_process_pkt_data(void *, void *, uint32_t); 127 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 128 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 129 130 /* Switching/data transmit routines */ 131 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 132 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 133 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 134 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 135 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 136 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 138 /* Packet creation routines */ 139 static void vsw_send_ver(void *); 140 static void vsw_send_attr(vsw_ldc_t *); 141 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 142 static void vsw_send_dring_info(vsw_ldc_t *); 143 static void vsw_send_rdx(vsw_ldc_t *); 144 145 /* Dring routines */ 146 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 147 static void vsw_create_privring(vsw_ldc_t *); 148 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 149 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 150 int *); 151 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 152 static int vsw_reclaim_dring(dring_info_t *dp, int start); 153 154 static void vsw_set_lane_attr(vsw_t *, lane_t *); 155 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 156 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 157 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 158 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 159 160 /* Rcv/Tx thread routines */ 161 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 162 static void vsw_ldc_tx_worker(void *arg); 163 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 164 static void vsw_ldc_rx_worker(void *arg); 165 166 /* Misc support routines */ 167 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 168 static void vsw_free_ring(dring_info_t *); 169 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 170 static int vsw_get_same_dest_list(struct ether_header *ehp, 171 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 172 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 173 174 /* Debugging routines */ 175 static void dump_flags(uint64_t); 176 static void display_state(void); 177 static void display_lane(lane_t *); 178 static void display_ring(dring_info_t *); 179 180 /* 181 * Functions imported from other files. 182 */ 183 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 184 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 185 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 186 extern void vsw_del_mcst_port(vsw_port_t *port); 187 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 188 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 189 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 190 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 191 extern void vsw_create_vlans(void *arg, int type); 192 extern void vsw_destroy_vlans(void *arg, int type); 193 extern void vsw_vlan_add_ids(void *arg, int type); 194 extern void vsw_vlan_remove_ids(void *arg, int type); 195 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 196 struct ether_header *ehp, uint16_t *vidp); 197 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 198 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 199 mblk_t **npt); 200 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 201 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 202 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 203 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 204 extern void vsw_hio_stop_port(vsw_port_t *portp); 205 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 206 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 207 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 208 209 210 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 211 212 /* 213 * Tunables used in this file. 214 */ 215 extern int vsw_num_handshakes; 216 extern int vsw_wretries; 217 extern int vsw_desc_delay; 218 extern int vsw_read_attempts; 219 extern int vsw_ldc_tx_delay; 220 extern int vsw_ldc_tx_retries; 221 extern boolean_t vsw_ldc_rxthr_enabled; 222 extern boolean_t vsw_ldc_txthr_enabled; 223 extern uint32_t vsw_ntxds; 224 extern uint32_t vsw_max_tx_qcount; 225 extern uint32_t vsw_chain_len; 226 extern uint32_t vsw_mblk_size1; 227 extern uint32_t vsw_mblk_size2; 228 extern uint32_t vsw_mblk_size3; 229 extern uint32_t vsw_mblk_size4; 230 extern uint32_t vsw_num_mblks1; 231 extern uint32_t vsw_num_mblks2; 232 extern uint32_t vsw_num_mblks3; 233 extern uint32_t vsw_num_mblks4; 234 extern boolean_t vsw_obp_ver_proto_workaround; 235 extern uint32_t vsw_publish_macaddr_count; 236 extern boolean_t vsw_jumbo_rxpools; 237 238 #define LDC_ENTER_LOCK(ldcp) \ 239 mutex_enter(&((ldcp)->ldc_cblock));\ 240 mutex_enter(&((ldcp)->ldc_rxlock));\ 241 mutex_enter(&((ldcp)->ldc_txlock)); 242 #define LDC_EXIT_LOCK(ldcp) \ 243 mutex_exit(&((ldcp)->ldc_txlock));\ 244 mutex_exit(&((ldcp)->ldc_rxlock));\ 245 mutex_exit(&((ldcp)->ldc_cblock)); 246 247 #define VSW_VER_EQ(ldcp, major, minor) \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor == (minor)) 250 251 #define VSW_VER_LT(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major < (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor < (minor))) 255 256 #define VSW_VER_GTEQ(ldcp, major, minor) \ 257 (((ldcp)->lane_out.ver_major > (major)) || \ 258 ((ldcp)->lane_out.ver_major == (major) && \ 259 (ldcp)->lane_out.ver_minor >= (minor))) 260 261 /* supported versions */ 262 static ver_sup_t vsw_versions[] = { {1, 4} }; 263 264 /* 265 * For the moment the state dump routines have their own 266 * private flag. 267 */ 268 #define DUMP_STATE 0 269 270 #if DUMP_STATE 271 272 #define DUMP_TAG(tag) \ 273 { \ 274 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 275 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 276 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 277 } 278 279 #define DUMP_TAG_PTR(tag) \ 280 { \ 281 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 282 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 283 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 284 } 285 286 #define DUMP_FLAGS(flags) dump_flags(flags); 287 #define DISPLAY_STATE() display_state() 288 289 #else 290 291 #define DUMP_TAG(tag) 292 #define DUMP_TAG_PTR(tag) 293 #define DUMP_FLAGS(state) 294 #define DISPLAY_STATE() 295 296 #endif /* DUMP_STATE */ 297 298 /* 299 * Attach the specified port. 300 * 301 * Returns 0 on success, 1 on failure. 302 */ 303 int 304 vsw_port_attach(vsw_port_t *port) 305 { 306 vsw_t *vswp = port->p_vswp; 307 vsw_port_list_t *plist = &vswp->plist; 308 vsw_port_t *p, **pp; 309 int i; 310 int nids = port->num_ldcs; 311 uint64_t *ldcids; 312 int rv; 313 314 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 315 316 /* port already exists? */ 317 READ_ENTER(&plist->lockrw); 318 for (p = plist->head; p != NULL; p = p->p_next) { 319 if (p->p_instance == port->p_instance) { 320 DWARN(vswp, "%s: port instance %d already attached", 321 __func__, p->p_instance); 322 RW_EXIT(&plist->lockrw); 323 return (1); 324 } 325 } 326 RW_EXIT(&plist->lockrw); 327 328 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 329 330 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 331 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 332 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 333 334 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 335 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 336 port->state = VSW_PORT_INIT; 337 338 D2(vswp, "%s: %d nids", __func__, nids); 339 ldcids = port->ldc_ids; 340 for (i = 0; i < nids; i++) { 341 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 342 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 343 DERR(vswp, "%s: ldc_attach failed", __func__); 344 goto exit_error; 345 } 346 } 347 348 if (vswp->switching_setup_done == B_TRUE) { 349 /* 350 * If the underlying network device has been setup, 351 * then open a mac client and porgram the mac address 352 * for this port. 353 */ 354 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 355 if (rv != 0) { 356 goto exit_error; 357 } 358 } 359 360 /* create the fdb entry for this port/mac address */ 361 vsw_fdbe_add(vswp, port); 362 363 vsw_create_vlans(port, VSW_VNETPORT); 364 365 WRITE_ENTER(&plist->lockrw); 366 367 /* link it into the list of ports for this vsw instance */ 368 pp = (vsw_port_t **)(&plist->head); 369 port->p_next = *pp; 370 *pp = port; 371 plist->num_ports++; 372 373 RW_EXIT(&plist->lockrw); 374 375 /* 376 * Initialise the port and any ldc's under it. 377 */ 378 (void) vsw_init_ldcs(port); 379 380 /* announce macaddr of vnet to the physical switch */ 381 if (vsw_publish_macaddr_count != 0) { /* enabled */ 382 vsw_publish_macaddr(vswp, port); 383 } 384 385 D1(vswp, "%s: exit", __func__); 386 return (0); 387 388 exit_error: 389 rw_destroy(&port->p_ldclist.lockrw); 390 391 cv_destroy(&port->state_cv); 392 mutex_destroy(&port->state_lock); 393 394 rw_destroy(&port->maccl_rwlock); 395 mutex_destroy(&port->tx_lock); 396 mutex_destroy(&port->mca_lock); 397 kmem_free(port, sizeof (vsw_port_t)); 398 return (1); 399 } 400 401 /* 402 * Detach the specified port. 403 * 404 * Returns 0 on success, 1 on failure. 405 */ 406 int 407 vsw_port_detach(vsw_t *vswp, int p_instance) 408 { 409 vsw_port_t *port = NULL; 410 vsw_port_list_t *plist = &vswp->plist; 411 412 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 413 414 WRITE_ENTER(&plist->lockrw); 415 416 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 417 RW_EXIT(&plist->lockrw); 418 return (1); 419 } 420 421 if (vsw_plist_del_node(vswp, port)) { 422 RW_EXIT(&plist->lockrw); 423 return (1); 424 } 425 426 /* cleanup any HybridIO for this port */ 427 vsw_hio_stop_port(port); 428 429 /* 430 * No longer need to hold writer lock on port list now 431 * that we have unlinked the target port from the list. 432 */ 433 RW_EXIT(&plist->lockrw); 434 435 /* Cleanup and close the mac client */ 436 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 437 438 /* Remove the fdb entry for this port/mac address */ 439 vsw_fdbe_del(vswp, &(port->p_macaddr)); 440 vsw_destroy_vlans(port, VSW_VNETPORT); 441 442 /* Remove any multicast addresses.. */ 443 vsw_del_mcst_port(port); 444 445 if (vsw_port_delete(port)) { 446 return (1); 447 } 448 449 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 450 return (0); 451 } 452 453 /* 454 * Detach all active ports. 455 * 456 * Returns 0 on success, 1 on failure. 457 */ 458 int 459 vsw_detach_ports(vsw_t *vswp) 460 { 461 vsw_port_list_t *plist = &vswp->plist; 462 vsw_port_t *port = NULL; 463 464 D1(vswp, "%s: enter", __func__); 465 466 WRITE_ENTER(&plist->lockrw); 467 468 while ((port = plist->head) != NULL) { 469 if (vsw_plist_del_node(vswp, port)) { 470 DERR(vswp, "%s: Error deleting port %d" 471 " from port list", __func__, port->p_instance); 472 RW_EXIT(&plist->lockrw); 473 return (1); 474 } 475 476 /* Cleanup and close the mac client */ 477 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 478 479 /* Remove the fdb entry for this port/mac address */ 480 vsw_fdbe_del(vswp, &(port->p_macaddr)); 481 vsw_destroy_vlans(port, VSW_VNETPORT); 482 483 /* Remove any multicast addresses.. */ 484 vsw_del_mcst_port(port); 485 486 /* 487 * No longer need to hold the lock on the port list 488 * now that we have unlinked the target port from the 489 * list. 490 */ 491 RW_EXIT(&plist->lockrw); 492 if (vsw_port_delete(port)) { 493 DERR(vswp, "%s: Error deleting port %d", 494 __func__, port->p_instance); 495 return (1); 496 } 497 WRITE_ENTER(&plist->lockrw); 498 } 499 RW_EXIT(&plist->lockrw); 500 501 D1(vswp, "%s: exit", __func__); 502 503 return (0); 504 } 505 506 /* 507 * Delete the specified port. 508 * 509 * Returns 0 on success, 1 on failure. 510 */ 511 static int 512 vsw_port_delete(vsw_port_t *port) 513 { 514 vsw_ldc_list_t *ldcl; 515 vsw_t *vswp = port->p_vswp; 516 int num_ldcs; 517 518 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 519 520 (void) vsw_uninit_ldcs(port); 521 522 /* 523 * Wait for any pending ctrl msg tasks which reference this 524 * port to finish. 525 */ 526 if (vsw_drain_port_taskq(port)) 527 return (1); 528 529 /* 530 * Wait for any active callbacks to finish 531 */ 532 if (vsw_drain_ldcs(port)) 533 return (1); 534 535 ldcl = &port->p_ldclist; 536 num_ldcs = port->num_ldcs; 537 WRITE_ENTER(&ldcl->lockrw); 538 while (num_ldcs > 0) { 539 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 540 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 541 vswp->instance, ldcl->head->ldc_id); 542 RW_EXIT(&ldcl->lockrw); 543 port->num_ldcs = num_ldcs; 544 return (1); 545 } 546 num_ldcs--; 547 } 548 RW_EXIT(&ldcl->lockrw); 549 550 rw_destroy(&port->p_ldclist.lockrw); 551 552 rw_destroy(&port->maccl_rwlock); 553 mutex_destroy(&port->mca_lock); 554 mutex_destroy(&port->tx_lock); 555 556 cv_destroy(&port->state_cv); 557 mutex_destroy(&port->state_lock); 558 559 if (port->num_ldcs != 0) { 560 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 561 port->num_ldcs = 0; 562 } 563 564 if (port->nvids != 0) { 565 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 566 } 567 568 kmem_free(port, sizeof (vsw_port_t)); 569 570 D1(vswp, "%s: exit", __func__); 571 572 return (0); 573 } 574 575 static int 576 vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp) 577 { 578 size_t data_sz; 579 int rv; 580 uint32_t sz1 = 0; 581 uint32_t sz2 = 0; 582 uint32_t sz3 = 0; 583 uint32_t sz4 = 0; 584 585 /* 586 * We round up the mtu specified to be a multiple of 2K to limit the 587 * number of rx buffer pools created for a given mtu. 588 */ 589 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 590 data_sz = VNET_ROUNDUP_2K(data_sz); 591 592 /* 593 * If pool sizes are specified, use them. Note that the presence of 594 * the first tunable will be used as a hint. 595 */ 596 if (vsw_mblk_size1 != 0) { 597 sz1 = vsw_mblk_size1; 598 sz2 = vsw_mblk_size2; 599 sz3 = vsw_mblk_size3; 600 sz4 = vsw_mblk_size4; 601 602 if (sz4 == 0) { /* need 3 pools */ 603 604 ldcp->max_rxpool_size = sz3; 605 rv = vio_init_multipools(&ldcp->vmp, 606 VSW_NUM_VMPOOLS, sz1, sz2, sz3, 607 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 608 609 } else { 610 611 ldcp->max_rxpool_size = sz4; 612 rv = vio_init_multipools(&ldcp->vmp, 613 VSW_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 614 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 615 vsw_num_mblks4); 616 617 } 618 619 return (rv); 620 } 621 622 /* 623 * Pool sizes are not specified. We select the pool sizes based on the 624 * mtu if vnet_jumbo_rxpools is enabled. 625 */ 626 if (vsw_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 627 /* 628 * Receive buffer pool allocation based on mtu is disabled. 629 * Use the default mechanism of standard size pool allocation. 630 */ 631 sz1 = VSW_MBLK_SZ_128; 632 sz2 = VSW_MBLK_SZ_256; 633 sz3 = VSW_MBLK_SZ_2048; 634 ldcp->max_rxpool_size = sz3; 635 636 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 637 sz1, sz2, sz3, 638 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 639 640 return (rv); 641 } 642 643 switch (data_sz) { 644 645 case VNET_4K: 646 647 sz1 = VSW_MBLK_SZ_128; 648 sz2 = VSW_MBLK_SZ_256; 649 sz3 = VSW_MBLK_SZ_2048; 650 sz4 = sz3 << 1; /* 4K */ 651 ldcp->max_rxpool_size = sz4; 652 653 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 654 sz1, sz2, sz3, sz4, 655 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 656 vsw_num_mblks4); 657 break; 658 659 default: /* data_sz: 4K+ to 16K */ 660 661 sz1 = VSW_MBLK_SZ_256; 662 sz2 = VSW_MBLK_SZ_2048; 663 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 664 sz4 = data_sz; /* Jumbo-size */ 665 ldcp->max_rxpool_size = sz4; 666 667 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 668 sz1, sz2, sz3, sz4, 669 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 670 vsw_num_mblks4); 671 break; 672 } 673 674 return (rv); 675 676 } 677 678 /* 679 * Attach a logical domain channel (ldc) under a specified port. 680 * 681 * Returns 0 on success, 1 on failure. 682 */ 683 static int 684 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 685 { 686 vsw_t *vswp = port->p_vswp; 687 vsw_ldc_list_t *ldcl = &port->p_ldclist; 688 vsw_ldc_t *ldcp = NULL; 689 ldc_attr_t attr; 690 ldc_status_t istatus; 691 int status = DDI_FAILURE; 692 char kname[MAXNAMELEN]; 693 enum { PROG_init = 0x0, 694 PROG_callback = 0x1, PROG_rx_thread = 0x2, 695 PROG_tx_thread = 0x4} 696 progress; 697 698 progress = PROG_init; 699 700 D1(vswp, "%s: enter", __func__); 701 702 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 703 if (ldcp == NULL) { 704 DERR(vswp, "%s: kmem_zalloc failed", __func__); 705 return (1); 706 } 707 ldcp->ldc_id = ldc_id; 708 709 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 710 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 711 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 712 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 713 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 714 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 715 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 716 717 /* required for handshake with peer */ 718 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 719 ldcp->peer_session = 0; 720 ldcp->session_status = 0; 721 ldcp->hss_id = 1; /* Initial handshake session id */ 722 723 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 724 725 /* only set for outbound lane, inbound set by peer */ 726 vsw_set_lane_attr(vswp, &ldcp->lane_out); 727 728 attr.devclass = LDC_DEV_NT_SVC; 729 attr.instance = ddi_get_instance(vswp->dip); 730 attr.mode = LDC_MODE_UNRELIABLE; 731 attr.mtu = VSW_LDC_MTU; 732 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 733 if (status != 0) { 734 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 735 __func__, ldc_id, status); 736 goto ldc_attach_fail; 737 } 738 739 if (vsw_ldc_rxthr_enabled) { 740 ldcp->rx_thr_flags = 0; 741 742 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 743 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 744 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 745 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 746 747 progress |= PROG_rx_thread; 748 if (ldcp->rx_thread == NULL) { 749 DWARN(vswp, "%s(%lld): Failed to create worker thread", 750 __func__, ldc_id); 751 goto ldc_attach_fail; 752 } 753 } 754 755 if (vsw_ldc_txthr_enabled) { 756 ldcp->tx_thr_flags = 0; 757 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 758 759 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 760 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 761 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 762 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 763 764 progress |= PROG_tx_thread; 765 if (ldcp->tx_thread == NULL) { 766 DWARN(vswp, "%s(%lld): Failed to create worker thread", 767 __func__, ldc_id); 768 goto ldc_attach_fail; 769 } 770 } 771 772 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 773 if (status != 0) { 774 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 775 __func__, ldc_id, status); 776 (void) ldc_fini(ldcp->ldc_handle); 777 goto ldc_attach_fail; 778 } 779 /* 780 * allocate a message for ldc_read()s, big enough to hold ctrl and 781 * data msgs, including raw data msgs used to recv priority frames. 782 */ 783 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 784 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 785 786 progress |= PROG_callback; 787 788 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 789 790 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 791 DERR(vswp, "%s: ldc_status failed", __func__); 792 mutex_destroy(&ldcp->status_lock); 793 goto ldc_attach_fail; 794 } 795 796 ldcp->ldc_status = istatus; 797 ldcp->ldc_port = port; 798 ldcp->ldc_vswp = vswp; 799 800 vsw_reset_vnet_proto_ops(ldcp); 801 802 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 803 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 804 kname, &ldcp->ldc_stats); 805 if (ldcp->ksp == NULL) { 806 DERR(vswp, "%s: kstats setup failed", __func__); 807 goto ldc_attach_fail; 808 } 809 810 /* link it into the list of channels for this port */ 811 WRITE_ENTER(&ldcl->lockrw); 812 ldcp->ldc_next = ldcl->head; 813 ldcl->head = ldcp; 814 RW_EXIT(&ldcl->lockrw); 815 816 D1(vswp, "%s: exit", __func__); 817 return (0); 818 819 ldc_attach_fail: 820 821 if (progress & PROG_callback) { 822 (void) ldc_unreg_callback(ldcp->ldc_handle); 823 kmem_free(ldcp->ldcmsg, ldcp->msglen); 824 } 825 826 if (progress & PROG_rx_thread) { 827 if (ldcp->rx_thread != NULL) { 828 vsw_stop_rx_thread(ldcp); 829 } 830 mutex_destroy(&ldcp->rx_thr_lock); 831 cv_destroy(&ldcp->rx_thr_cv); 832 } 833 834 if (progress & PROG_tx_thread) { 835 if (ldcp->tx_thread != NULL) { 836 vsw_stop_tx_thread(ldcp); 837 } 838 mutex_destroy(&ldcp->tx_thr_lock); 839 cv_destroy(&ldcp->tx_thr_cv); 840 } 841 if (ldcp->ksp != NULL) { 842 vgen_destroy_kstats(ldcp->ksp); 843 } 844 mutex_destroy(&ldcp->ldc_txlock); 845 mutex_destroy(&ldcp->ldc_rxlock); 846 mutex_destroy(&ldcp->ldc_cblock); 847 mutex_destroy(&ldcp->drain_cv_lock); 848 849 cv_destroy(&ldcp->drain_cv); 850 851 rw_destroy(&ldcp->lane_in.dlistrw); 852 rw_destroy(&ldcp->lane_out.dlistrw); 853 854 kmem_free(ldcp, sizeof (vsw_ldc_t)); 855 856 return (1); 857 } 858 859 /* 860 * Detach a logical domain channel (ldc) belonging to a 861 * particular port. 862 * 863 * Returns 0 on success, 1 on failure. 864 */ 865 static int 866 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 867 { 868 vsw_t *vswp = port->p_vswp; 869 vsw_ldc_t *ldcp, *prev_ldcp; 870 vsw_ldc_list_t *ldcl = &port->p_ldclist; 871 int rv; 872 873 prev_ldcp = ldcl->head; 874 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 875 if (ldcp->ldc_id == ldc_id) { 876 break; 877 } 878 } 879 880 /* specified ldc id not found */ 881 if (ldcp == NULL) { 882 DERR(vswp, "%s: ldcp = NULL", __func__); 883 return (1); 884 } 885 886 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 887 888 /* Stop the receive thread */ 889 if (ldcp->rx_thread != NULL) { 890 vsw_stop_rx_thread(ldcp); 891 mutex_destroy(&ldcp->rx_thr_lock); 892 cv_destroy(&ldcp->rx_thr_cv); 893 } 894 kmem_free(ldcp->ldcmsg, ldcp->msglen); 895 896 /* Stop the tx thread */ 897 if (ldcp->tx_thread != NULL) { 898 vsw_stop_tx_thread(ldcp); 899 mutex_destroy(&ldcp->tx_thr_lock); 900 cv_destroy(&ldcp->tx_thr_cv); 901 if (ldcp->tx_mhead != NULL) { 902 freemsgchain(ldcp->tx_mhead); 903 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 904 ldcp->tx_cnt = 0; 905 } 906 } 907 908 /* Destory kstats */ 909 vgen_destroy_kstats(ldcp->ksp); 910 911 /* 912 * Before we can close the channel we must release any mapped 913 * resources (e.g. drings). 914 */ 915 vsw_free_lane_resources(ldcp, INBOUND); 916 vsw_free_lane_resources(ldcp, OUTBOUND); 917 918 /* 919 * If the close fails we are in serious trouble, as won't 920 * be able to delete the parent port. 921 */ 922 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 923 DERR(vswp, "%s: error %d closing channel %lld", 924 __func__, rv, ldcp->ldc_id); 925 return (1); 926 } 927 928 (void) ldc_fini(ldcp->ldc_handle); 929 930 ldcp->ldc_status = LDC_INIT; 931 ldcp->ldc_handle = NULL; 932 ldcp->ldc_vswp = NULL; 933 934 935 /* 936 * Most likely some mblks are still in use and 937 * have not been returned to the pool. These mblks are 938 * added to the pool that is maintained in the device instance. 939 * Another attempt will be made to destroy the pool 940 * when the device detaches. 941 */ 942 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 943 944 /* unlink it from the list */ 945 prev_ldcp = ldcp->ldc_next; 946 947 mutex_destroy(&ldcp->ldc_txlock); 948 mutex_destroy(&ldcp->ldc_rxlock); 949 mutex_destroy(&ldcp->ldc_cblock); 950 cv_destroy(&ldcp->drain_cv); 951 mutex_destroy(&ldcp->drain_cv_lock); 952 mutex_destroy(&ldcp->status_lock); 953 rw_destroy(&ldcp->lane_in.dlistrw); 954 rw_destroy(&ldcp->lane_out.dlistrw); 955 956 kmem_free(ldcp, sizeof (vsw_ldc_t)); 957 958 return (0); 959 } 960 961 /* 962 * Open and attempt to bring up the channel. Note that channel 963 * can only be brought up if peer has also opened channel. 964 * 965 * Returns 0 if can open and bring up channel, otherwise 966 * returns 1. 967 */ 968 static int 969 vsw_ldc_init(vsw_ldc_t *ldcp) 970 { 971 vsw_t *vswp = ldcp->ldc_vswp; 972 ldc_status_t istatus = 0; 973 int rv; 974 975 D1(vswp, "%s: enter", __func__); 976 977 LDC_ENTER_LOCK(ldcp); 978 979 /* don't start at 0 in case clients don't like that */ 980 ldcp->next_ident = 1; 981 982 rv = ldc_open(ldcp->ldc_handle); 983 if (rv != 0) { 984 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 985 __func__, ldcp->ldc_id, rv); 986 LDC_EXIT_LOCK(ldcp); 987 return (1); 988 } 989 990 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 991 DERR(vswp, "%s: unable to get status", __func__); 992 LDC_EXIT_LOCK(ldcp); 993 return (1); 994 995 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 996 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 997 __func__, ldcp->ldc_id, istatus); 998 LDC_EXIT_LOCK(ldcp); 999 return (1); 1000 } 1001 1002 mutex_enter(&ldcp->status_lock); 1003 ldcp->ldc_status = istatus; 1004 mutex_exit(&ldcp->status_lock); 1005 1006 rv = ldc_up(ldcp->ldc_handle); 1007 if (rv != 0) { 1008 /* 1009 * Not a fatal error for ldc_up() to fail, as peer 1010 * end point may simply not be ready yet. 1011 */ 1012 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 1013 ldcp->ldc_id, rv); 1014 LDC_EXIT_LOCK(ldcp); 1015 return (1); 1016 } 1017 1018 /* 1019 * ldc_up() call is non-blocking so need to explicitly 1020 * check channel status to see if in fact the channel 1021 * is UP. 1022 */ 1023 mutex_enter(&ldcp->status_lock); 1024 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 1025 DERR(vswp, "%s: unable to get status", __func__); 1026 mutex_exit(&ldcp->status_lock); 1027 LDC_EXIT_LOCK(ldcp); 1028 return (1); 1029 1030 } 1031 1032 if (ldcp->ldc_status == LDC_UP) { 1033 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 1034 ldcp->ldc_id, istatus); 1035 mutex_exit(&ldcp->status_lock); 1036 LDC_EXIT_LOCK(ldcp); 1037 1038 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1039 return (0); 1040 } 1041 1042 mutex_exit(&ldcp->status_lock); 1043 LDC_EXIT_LOCK(ldcp); 1044 1045 D1(vswp, "%s: exit", __func__); 1046 return (0); 1047 } 1048 1049 /* disable callbacks on the channel */ 1050 static int 1051 vsw_ldc_uninit(vsw_ldc_t *ldcp) 1052 { 1053 vsw_t *vswp = ldcp->ldc_vswp; 1054 int rv; 1055 1056 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 1057 1058 LDC_ENTER_LOCK(ldcp); 1059 1060 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 1061 if (rv != 0) { 1062 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 1063 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 1064 LDC_EXIT_LOCK(ldcp); 1065 return (1); 1066 } 1067 1068 mutex_enter(&ldcp->status_lock); 1069 ldcp->ldc_status = LDC_INIT; 1070 mutex_exit(&ldcp->status_lock); 1071 1072 LDC_EXIT_LOCK(ldcp); 1073 1074 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 1075 1076 return (0); 1077 } 1078 1079 static int 1080 vsw_init_ldcs(vsw_port_t *port) 1081 { 1082 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1083 vsw_ldc_t *ldcp; 1084 1085 READ_ENTER(&ldcl->lockrw); 1086 ldcp = ldcl->head; 1087 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1088 (void) vsw_ldc_init(ldcp); 1089 } 1090 RW_EXIT(&ldcl->lockrw); 1091 1092 return (0); 1093 } 1094 1095 static int 1096 vsw_uninit_ldcs(vsw_port_t *port) 1097 { 1098 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1099 vsw_ldc_t *ldcp; 1100 1101 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1102 1103 READ_ENTER(&ldcl->lockrw); 1104 ldcp = ldcl->head; 1105 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1106 (void) vsw_ldc_uninit(ldcp); 1107 } 1108 RW_EXIT(&ldcl->lockrw); 1109 1110 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1111 1112 return (0); 1113 } 1114 1115 /* 1116 * Wait until the callback(s) associated with the ldcs under the specified 1117 * port have completed. 1118 * 1119 * Prior to this function being invoked each channel under this port 1120 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1121 * 1122 * A short explaination of what we are doing below.. 1123 * 1124 * The simplest approach would be to have a reference counter in 1125 * the ldc structure which is increment/decremented by the callbacks as 1126 * they use the channel. The drain function could then simply disable any 1127 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1128 * there is a tiny window here - before the callback is able to get the lock 1129 * on the channel it is interrupted and this function gets to execute. It 1130 * sees that the ref count is zero and believes its free to delete the 1131 * associated data structures. 1132 * 1133 * We get around this by taking advantage of the fact that before the ldc 1134 * framework invokes a callback it sets a flag to indicate that there is a 1135 * callback active (or about to become active). If when we attempt to 1136 * unregister a callback when this active flag is set then the unregister 1137 * will fail with EWOULDBLOCK. 1138 * 1139 * If the unregister fails we do a cv_timedwait. We will either be signaled 1140 * by the callback as it is exiting (note we have to wait a short period to 1141 * allow the callback to return fully to the ldc framework and it to clear 1142 * the active flag), or by the timer expiring. In either case we again attempt 1143 * the unregister. We repeat this until we can succesfully unregister the 1144 * callback. 1145 * 1146 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1147 * the case where the callback has finished but the ldc framework has not yet 1148 * cleared the active flag. In this case we would never get a cv_signal. 1149 */ 1150 static int 1151 vsw_drain_ldcs(vsw_port_t *port) 1152 { 1153 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1154 vsw_ldc_t *ldcp; 1155 vsw_t *vswp = port->p_vswp; 1156 1157 D1(vswp, "%s: enter", __func__); 1158 1159 READ_ENTER(&ldcl->lockrw); 1160 1161 ldcp = ldcl->head; 1162 1163 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1164 /* 1165 * If we can unregister the channel callback then we 1166 * know that there is no callback either running or 1167 * scheduled to run for this channel so move on to next 1168 * channel in the list. 1169 */ 1170 mutex_enter(&ldcp->drain_cv_lock); 1171 1172 /* prompt active callbacks to quit */ 1173 ldcp->drain_state = VSW_LDC_DRAINING; 1174 1175 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1176 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1177 ldcp->ldc_id); 1178 mutex_exit(&ldcp->drain_cv_lock); 1179 continue; 1180 } else { 1181 /* 1182 * If we end up here we know that either 1) a callback 1183 * is currently executing, 2) is about to start (i.e. 1184 * the ldc framework has set the active flag but 1185 * has not actually invoked the callback yet, or 3) 1186 * has finished and has returned to the ldc framework 1187 * but the ldc framework has not yet cleared the 1188 * active bit. 1189 * 1190 * Wait for it to finish. 1191 */ 1192 while (ldc_unreg_callback(ldcp->ldc_handle) 1193 == EWOULDBLOCK) 1194 (void) cv_timedwait(&ldcp->drain_cv, 1195 &ldcp->drain_cv_lock, lbolt + hz); 1196 1197 mutex_exit(&ldcp->drain_cv_lock); 1198 D2(vswp, "%s: unreg callback for chan %ld after " 1199 "timeout", __func__, ldcp->ldc_id); 1200 } 1201 } 1202 RW_EXIT(&ldcl->lockrw); 1203 1204 D1(vswp, "%s: exit", __func__); 1205 return (0); 1206 } 1207 1208 /* 1209 * Wait until all tasks which reference this port have completed. 1210 * 1211 * Prior to this function being invoked each channel under this port 1212 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1213 */ 1214 static int 1215 vsw_drain_port_taskq(vsw_port_t *port) 1216 { 1217 vsw_t *vswp = port->p_vswp; 1218 1219 D1(vswp, "%s: enter", __func__); 1220 1221 /* 1222 * Mark the port as in the process of being detached, and 1223 * dispatch a marker task to the queue so we know when all 1224 * relevant tasks have completed. 1225 */ 1226 mutex_enter(&port->state_lock); 1227 port->state = VSW_PORT_DETACHING; 1228 1229 if ((vswp->taskq_p == NULL) || 1230 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1231 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1232 DERR(vswp, "%s: unable to dispatch marker task", 1233 __func__); 1234 mutex_exit(&port->state_lock); 1235 return (1); 1236 } 1237 1238 /* 1239 * Wait for the marker task to finish. 1240 */ 1241 while (port->state != VSW_PORT_DETACHABLE) 1242 cv_wait(&port->state_cv, &port->state_lock); 1243 1244 mutex_exit(&port->state_lock); 1245 1246 D1(vswp, "%s: exit", __func__); 1247 1248 return (0); 1249 } 1250 1251 static void 1252 vsw_marker_task(void *arg) 1253 { 1254 vsw_port_t *port = arg; 1255 vsw_t *vswp = port->p_vswp; 1256 1257 D1(vswp, "%s: enter", __func__); 1258 1259 mutex_enter(&port->state_lock); 1260 1261 /* 1262 * No further tasks should be dispatched which reference 1263 * this port so ok to mark it as safe to detach. 1264 */ 1265 port->state = VSW_PORT_DETACHABLE; 1266 1267 cv_signal(&port->state_cv); 1268 1269 mutex_exit(&port->state_lock); 1270 1271 D1(vswp, "%s: exit", __func__); 1272 } 1273 1274 vsw_port_t * 1275 vsw_lookup_port(vsw_t *vswp, int p_instance) 1276 { 1277 vsw_port_list_t *plist = &vswp->plist; 1278 vsw_port_t *port; 1279 1280 for (port = plist->head; port != NULL; port = port->p_next) { 1281 if (port->p_instance == p_instance) { 1282 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1283 return (port); 1284 } 1285 } 1286 1287 return (NULL); 1288 } 1289 1290 void 1291 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1292 { 1293 vsw_ldc_list_t *ldclp; 1294 vsw_ldc_t *ldcp; 1295 1296 ldclp = &portp->p_ldclist; 1297 1298 READ_ENTER(&ldclp->lockrw); 1299 1300 /* 1301 * NOTE: for now, we will assume we have a single channel. 1302 */ 1303 if (ldclp->head == NULL) { 1304 RW_EXIT(&ldclp->lockrw); 1305 return; 1306 } 1307 ldcp = ldclp->head; 1308 1309 mutex_enter(&ldcp->ldc_cblock); 1310 1311 /* 1312 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1313 * the connection. See comments in vsw_set_vnet_proto_ops(). 1314 */ 1315 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1316 portp->nvids != 0) { 1317 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1318 } 1319 1320 mutex_exit(&ldcp->ldc_cblock); 1321 1322 RW_EXIT(&ldclp->lockrw); 1323 } 1324 1325 void 1326 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1327 { 1328 vsw_ldc_list_t *ldclp; 1329 vsw_ldc_t *ldcp; 1330 1331 ldclp = &portp->p_ldclist; 1332 1333 READ_ENTER(&ldclp->lockrw); 1334 1335 /* 1336 * NOTE: for now, we will assume we have a single channel. 1337 */ 1338 if (ldclp->head == NULL) { 1339 RW_EXIT(&ldclp->lockrw); 1340 return; 1341 } 1342 ldcp = ldclp->head; 1343 1344 mutex_enter(&ldcp->ldc_cblock); 1345 1346 /* 1347 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1348 * to trigger re-negotiation, which inturn trigger HybridIO 1349 * setup/cleanup. 1350 */ 1351 if ((ldcp->hphase == VSW_MILESTONE4) && 1352 (portp->p_hio_capable == B_TRUE)) { 1353 if (immediate == B_TRUE) { 1354 (void) ldc_down(ldcp->ldc_handle); 1355 } else { 1356 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1357 } 1358 } 1359 1360 mutex_exit(&ldcp->ldc_cblock); 1361 1362 RW_EXIT(&ldclp->lockrw); 1363 } 1364 1365 void 1366 vsw_port_reset(vsw_port_t *portp) 1367 { 1368 vsw_ldc_list_t *ldclp; 1369 vsw_ldc_t *ldcp; 1370 1371 ldclp = &portp->p_ldclist; 1372 1373 READ_ENTER(&ldclp->lockrw); 1374 1375 /* 1376 * NOTE: for now, we will assume we have a single channel. 1377 */ 1378 if (ldclp->head == NULL) { 1379 RW_EXIT(&ldclp->lockrw); 1380 return; 1381 } 1382 ldcp = ldclp->head; 1383 1384 mutex_enter(&ldcp->ldc_cblock); 1385 1386 /* 1387 * reset channel and terminate the connection. 1388 */ 1389 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1390 1391 mutex_exit(&ldcp->ldc_cblock); 1392 1393 RW_EXIT(&ldclp->lockrw); 1394 } 1395 1396 void 1397 vsw_reset_ports(vsw_t *vswp) 1398 { 1399 vsw_port_list_t *plist = &vswp->plist; 1400 vsw_port_t *portp; 1401 1402 READ_ENTER(&plist->lockrw); 1403 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1404 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1405 vsw_hio_stop_port(portp); 1406 } 1407 vsw_port_reset(portp); 1408 } 1409 RW_EXIT(&plist->lockrw); 1410 } 1411 1412 1413 /* 1414 * Search for and remove the specified port from the port 1415 * list. Returns 0 if able to locate and remove port, otherwise 1416 * returns 1. 1417 */ 1418 static int 1419 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1420 { 1421 vsw_port_list_t *plist = &vswp->plist; 1422 vsw_port_t *curr_p, *prev_p; 1423 1424 if (plist->head == NULL) 1425 return (1); 1426 1427 curr_p = prev_p = plist->head; 1428 1429 while (curr_p != NULL) { 1430 if (curr_p == port) { 1431 if (prev_p == curr_p) { 1432 plist->head = curr_p->p_next; 1433 } else { 1434 prev_p->p_next = curr_p->p_next; 1435 } 1436 plist->num_ports--; 1437 break; 1438 } else { 1439 prev_p = curr_p; 1440 curr_p = curr_p->p_next; 1441 } 1442 } 1443 return (0); 1444 } 1445 1446 /* 1447 * Interrupt handler for ldc messages. 1448 */ 1449 static uint_t 1450 vsw_ldc_cb(uint64_t event, caddr_t arg) 1451 { 1452 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1453 vsw_t *vswp = ldcp->ldc_vswp; 1454 1455 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1456 1457 mutex_enter(&ldcp->ldc_cblock); 1458 ldcp->ldc_stats.callbacks++; 1459 1460 mutex_enter(&ldcp->status_lock); 1461 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1462 mutex_exit(&ldcp->status_lock); 1463 mutex_exit(&ldcp->ldc_cblock); 1464 return (LDC_SUCCESS); 1465 } 1466 mutex_exit(&ldcp->status_lock); 1467 1468 if (event & LDC_EVT_UP) { 1469 /* 1470 * Channel has come up. 1471 */ 1472 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1473 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1474 1475 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1476 1477 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1478 } 1479 1480 if (event & LDC_EVT_READ) { 1481 /* 1482 * Data available for reading. 1483 */ 1484 D2(vswp, "%s: id(ld) event(%llx) data READ", 1485 __func__, ldcp->ldc_id, event); 1486 1487 if (ldcp->rx_thread != NULL) { 1488 /* 1489 * If the receive thread is enabled, then 1490 * wakeup the receive thread to process the 1491 * LDC messages. 1492 */ 1493 mutex_exit(&ldcp->ldc_cblock); 1494 mutex_enter(&ldcp->rx_thr_lock); 1495 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1496 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1497 cv_signal(&ldcp->rx_thr_cv); 1498 } 1499 mutex_exit(&ldcp->rx_thr_lock); 1500 mutex_enter(&ldcp->ldc_cblock); 1501 } else { 1502 vsw_process_pkt(ldcp); 1503 } 1504 1505 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1506 1507 goto vsw_cb_exit; 1508 } 1509 1510 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1511 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1512 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1513 1514 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1515 } 1516 1517 /* 1518 * Catch either LDC_EVT_WRITE which we don't support or any 1519 * unknown event. 1520 */ 1521 if (event & 1522 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1523 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1524 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1525 } 1526 1527 vsw_cb_exit: 1528 mutex_exit(&ldcp->ldc_cblock); 1529 1530 /* 1531 * Let the drain function know we are finishing if it 1532 * is waiting. 1533 */ 1534 mutex_enter(&ldcp->drain_cv_lock); 1535 if (ldcp->drain_state == VSW_LDC_DRAINING) 1536 cv_signal(&ldcp->drain_cv); 1537 mutex_exit(&ldcp->drain_cv_lock); 1538 1539 return (LDC_SUCCESS); 1540 } 1541 1542 /* 1543 * Reinitialise data structures associated with the channel. 1544 */ 1545 static void 1546 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1547 { 1548 vsw_t *vswp = ldcp->ldc_vswp; 1549 vsw_port_t *port; 1550 vsw_ldc_list_t *ldcl; 1551 1552 D1(vswp, "%s: enter", __func__); 1553 1554 /* free receive mblk pools for the channel */ 1555 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 1556 1557 port = ldcp->ldc_port; 1558 ldcl = &port->p_ldclist; 1559 1560 READ_ENTER(&ldcl->lockrw); 1561 1562 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1563 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1564 1565 vsw_free_lane_resources(ldcp, INBOUND); 1566 vsw_free_lane_resources(ldcp, OUTBOUND); 1567 RW_EXIT(&ldcl->lockrw); 1568 1569 ldcp->lane_in.lstate = 0; 1570 ldcp->lane_out.lstate = 0; 1571 1572 /* Remove the fdb entry for this port/mac address */ 1573 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1574 1575 /* remove the port from vlans it has been assigned to */ 1576 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1577 1578 /* 1579 * Remove parent port from any multicast groups 1580 * it may have registered with. Client must resend 1581 * multicast add command after handshake completes. 1582 */ 1583 vsw_del_mcst_port(port); 1584 1585 ldcp->peer_session = 0; 1586 ldcp->session_status = 0; 1587 ldcp->hcnt = 0; 1588 ldcp->hphase = VSW_MILESTONE0; 1589 1590 vsw_reset_vnet_proto_ops(ldcp); 1591 1592 D1(vswp, "%s: exit", __func__); 1593 } 1594 1595 /* 1596 * Process a connection event. 1597 * 1598 * Note - care must be taken to ensure that this function is 1599 * not called with the dlistrw lock held. 1600 */ 1601 static void 1602 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1603 { 1604 vsw_t *vswp = ldcp->ldc_vswp; 1605 vsw_conn_evt_t *conn = NULL; 1606 1607 D1(vswp, "%s: enter", __func__); 1608 1609 /* 1610 * Check if either a reset or restart event is pending 1611 * or in progress. If so just return. 1612 * 1613 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1614 * being received by the callback handler, or a ECONNRESET error 1615 * code being returned from a ldc_read() or ldc_write() call. 1616 * 1617 * A VSW_CONN_RESTART event occurs when some error checking code 1618 * decides that there is a problem with data from the channel, 1619 * and that the handshake should be restarted. 1620 */ 1621 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1622 (ldstub((uint8_t *)&ldcp->reset_active))) 1623 return; 1624 1625 /* 1626 * If it is an LDC_UP event we first check the recorded 1627 * state of the channel. If this is UP then we know that 1628 * the channel moving to the UP state has already been dealt 1629 * with and don't need to dispatch a new task. 1630 * 1631 * The reason for this check is that when we do a ldc_up(), 1632 * depending on the state of the peer, we may or may not get 1633 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1634 * every time we do ldc_up() we explicitly check the channel 1635 * status to see has it come up (ldc_up() is asynch and will 1636 * complete at some undefined time), and take the appropriate 1637 * action. 1638 * 1639 * The flip side of this is that we may get a LDC_UP event 1640 * when we have already seen that the channel is up and have 1641 * dealt with that. 1642 */ 1643 mutex_enter(&ldcp->status_lock); 1644 if (evt == VSW_CONN_UP) { 1645 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1646 mutex_exit(&ldcp->status_lock); 1647 return; 1648 } 1649 } 1650 mutex_exit(&ldcp->status_lock); 1651 1652 /* 1653 * The transaction group id allows us to identify and discard 1654 * any tasks which are still pending on the taskq and refer 1655 * to the handshake session we are about to restart or reset. 1656 * These stale messages no longer have any real meaning. 1657 */ 1658 (void) atomic_inc_32(&ldcp->hss_id); 1659 1660 ASSERT(vswp->taskq_p != NULL); 1661 1662 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1663 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1664 " connection event", vswp->instance); 1665 goto err_exit; 1666 } 1667 1668 conn->evt = evt; 1669 conn->ldcp = ldcp; 1670 1671 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1672 DDI_NOSLEEP) != DDI_SUCCESS) { 1673 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1674 vswp->instance); 1675 1676 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1677 goto err_exit; 1678 } 1679 1680 D1(vswp, "%s: exit", __func__); 1681 return; 1682 1683 err_exit: 1684 /* 1685 * Have mostly likely failed due to memory shortage. Clear the flag so 1686 * that future requests will at least be attempted and will hopefully 1687 * succeed. 1688 */ 1689 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1690 ldcp->reset_active = 0; 1691 } 1692 1693 /* 1694 * Deal with events relating to a connection. Invoked from a taskq. 1695 */ 1696 static void 1697 vsw_conn_task(void *arg) 1698 { 1699 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1700 vsw_ldc_t *ldcp = NULL; 1701 vsw_port_t *portp; 1702 vsw_t *vswp = NULL; 1703 uint16_t evt; 1704 ldc_status_t curr_status; 1705 1706 ldcp = conn->ldcp; 1707 evt = conn->evt; 1708 vswp = ldcp->ldc_vswp; 1709 portp = ldcp->ldc_port; 1710 1711 D1(vswp, "%s: enter", __func__); 1712 1713 /* can safely free now have copied out data */ 1714 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1715 1716 mutex_enter(&ldcp->status_lock); 1717 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1718 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1719 "channel %ld", vswp->instance, ldcp->ldc_id); 1720 mutex_exit(&ldcp->status_lock); 1721 return; 1722 } 1723 1724 /* 1725 * If we wish to restart the handshake on this channel, then if 1726 * the channel is UP we bring it DOWN to flush the underlying 1727 * ldc queue. 1728 */ 1729 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1730 (void) ldc_down(ldcp->ldc_handle); 1731 1732 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1733 vsw_hio_stop(vswp, ldcp); 1734 } 1735 1736 /* 1737 * re-init all the associated data structures. 1738 */ 1739 vsw_ldc_reinit(ldcp); 1740 1741 /* 1742 * Bring the channel back up (note it does no harm to 1743 * do this even if the channel is already UP, Just 1744 * becomes effectively a no-op). 1745 */ 1746 (void) ldc_up(ldcp->ldc_handle); 1747 1748 /* 1749 * Check if channel is now UP. This will only happen if 1750 * peer has also done a ldc_up(). 1751 */ 1752 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1753 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1754 "channel %ld", vswp->instance, ldcp->ldc_id); 1755 mutex_exit(&ldcp->status_lock); 1756 return; 1757 } 1758 1759 ldcp->ldc_status = curr_status; 1760 1761 /* channel UP so restart handshake by sending version info */ 1762 if (curr_status == LDC_UP) { 1763 if (ldcp->hcnt++ > vsw_num_handshakes) { 1764 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1765 " handshake attempts (%d) on channel %ld", 1766 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1767 mutex_exit(&ldcp->status_lock); 1768 return; 1769 } 1770 1771 if (vsw_obp_ver_proto_workaround == B_FALSE && 1772 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1773 DDI_NOSLEEP) != DDI_SUCCESS)) { 1774 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1775 vswp->instance); 1776 1777 /* 1778 * Don't count as valid restart attempt if couldn't 1779 * send version msg. 1780 */ 1781 if (ldcp->hcnt > 0) 1782 ldcp->hcnt--; 1783 } 1784 } 1785 1786 /* 1787 * Mark that the process is complete by clearing the flag. 1788 * 1789 * Note is it possible that the taskq dispatch above may have failed, 1790 * most likely due to memory shortage. We still clear the flag so 1791 * future attempts will at least be attempted and will hopefully 1792 * succeed. 1793 */ 1794 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1795 ldcp->reset_active = 0; 1796 1797 mutex_exit(&ldcp->status_lock); 1798 1799 D1(vswp, "%s: exit", __func__); 1800 } 1801 1802 /* 1803 * returns 0 if legal for event signified by flag to have 1804 * occured at the time it did. Otherwise returns 1. 1805 */ 1806 int 1807 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1808 { 1809 vsw_t *vswp = ldcp->ldc_vswp; 1810 uint64_t state; 1811 uint64_t phase; 1812 1813 if (dir == INBOUND) 1814 state = ldcp->lane_in.lstate; 1815 else 1816 state = ldcp->lane_out.lstate; 1817 1818 phase = ldcp->hphase; 1819 1820 switch (flag) { 1821 case VSW_VER_INFO_RECV: 1822 if (phase > VSW_MILESTONE0) { 1823 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1824 " when in state %d\n", ldcp->ldc_id, phase); 1825 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1826 return (1); 1827 } 1828 break; 1829 1830 case VSW_VER_ACK_RECV: 1831 case VSW_VER_NACK_RECV: 1832 if (!(state & VSW_VER_INFO_SENT)) { 1833 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1834 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1835 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1836 return (1); 1837 } else 1838 state &= ~VSW_VER_INFO_SENT; 1839 break; 1840 1841 case VSW_ATTR_INFO_RECV: 1842 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1843 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1844 " when in state %d\n", ldcp->ldc_id, phase); 1845 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1846 return (1); 1847 } 1848 break; 1849 1850 case VSW_ATTR_ACK_RECV: 1851 case VSW_ATTR_NACK_RECV: 1852 if (!(state & VSW_ATTR_INFO_SENT)) { 1853 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1854 " or ATTR_NACK when in state %d\n", 1855 ldcp->ldc_id, phase); 1856 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1857 return (1); 1858 } else 1859 state &= ~VSW_ATTR_INFO_SENT; 1860 break; 1861 1862 case VSW_DRING_INFO_RECV: 1863 if (phase < VSW_MILESTONE1) { 1864 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1865 " when in state %d\n", ldcp->ldc_id, phase); 1866 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1867 return (1); 1868 } 1869 break; 1870 1871 case VSW_DRING_ACK_RECV: 1872 case VSW_DRING_NACK_RECV: 1873 if (!(state & VSW_DRING_INFO_SENT)) { 1874 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1875 " or DRING_NACK when in state %d\n", 1876 ldcp->ldc_id, phase); 1877 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1878 return (1); 1879 } else 1880 state &= ~VSW_DRING_INFO_SENT; 1881 break; 1882 1883 case VSW_RDX_INFO_RECV: 1884 if (phase < VSW_MILESTONE3) { 1885 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1886 " when in state %d\n", ldcp->ldc_id, phase); 1887 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1888 return (1); 1889 } 1890 break; 1891 1892 case VSW_RDX_ACK_RECV: 1893 case VSW_RDX_NACK_RECV: 1894 if (!(state & VSW_RDX_INFO_SENT)) { 1895 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1896 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1897 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1898 return (1); 1899 } else 1900 state &= ~VSW_RDX_INFO_SENT; 1901 break; 1902 1903 case VSW_MCST_INFO_RECV: 1904 if (phase < VSW_MILESTONE3) { 1905 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1906 " when in state %d\n", ldcp->ldc_id, phase); 1907 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1908 return (1); 1909 } 1910 break; 1911 1912 default: 1913 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1914 ldcp->ldc_id, flag); 1915 return (1); 1916 } 1917 1918 if (dir == INBOUND) 1919 ldcp->lane_in.lstate = state; 1920 else 1921 ldcp->lane_out.lstate = state; 1922 1923 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1924 1925 return (0); 1926 } 1927 1928 void 1929 vsw_next_milestone(vsw_ldc_t *ldcp) 1930 { 1931 vsw_t *vswp = ldcp->ldc_vswp; 1932 vsw_port_t *portp = ldcp->ldc_port; 1933 1934 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1935 ldcp->ldc_id, ldcp->hphase); 1936 1937 DUMP_FLAGS(ldcp->lane_in.lstate); 1938 DUMP_FLAGS(ldcp->lane_out.lstate); 1939 1940 switch (ldcp->hphase) { 1941 1942 case VSW_MILESTONE0: 1943 /* 1944 * If we haven't started to handshake with our peer, 1945 * start to do so now. 1946 */ 1947 if (ldcp->lane_out.lstate == 0) { 1948 D2(vswp, "%s: (chan %lld) starting handshake " 1949 "with peer", __func__, ldcp->ldc_id); 1950 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1951 } 1952 1953 /* 1954 * Only way to pass this milestone is to have successfully 1955 * negotiated version info. 1956 */ 1957 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1958 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1959 1960 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1961 __func__, ldcp->ldc_id); 1962 1963 vsw_set_vnet_proto_ops(ldcp); 1964 1965 /* 1966 * Next milestone is passed when attribute 1967 * information has been successfully exchanged. 1968 */ 1969 ldcp->hphase = VSW_MILESTONE1; 1970 vsw_send_attr(ldcp); 1971 1972 } 1973 break; 1974 1975 case VSW_MILESTONE1: 1976 /* 1977 * Only way to pass this milestone is to have successfully 1978 * negotiated attribute information. 1979 */ 1980 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1981 1982 ldcp->hphase = VSW_MILESTONE2; 1983 1984 /* 1985 * If the peer device has said it wishes to 1986 * use descriptor rings then we send it our ring 1987 * info, otherwise we just set up a private ring 1988 * which we use an internal buffer 1989 */ 1990 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1991 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1992 (VSW_VER_LT(ldcp, 1, 2) && 1993 (ldcp->lane_in.xfer_mode == 1994 VIO_DRING_MODE_V1_0))) { 1995 vsw_send_dring_info(ldcp); 1996 } 1997 } 1998 break; 1999 2000 case VSW_MILESTONE2: 2001 /* 2002 * If peer has indicated in its attribute message that 2003 * it wishes to use descriptor rings then the only way 2004 * to pass this milestone is for us to have received 2005 * valid dring info. 2006 * 2007 * If peer is not using descriptor rings then just fall 2008 * through. 2009 */ 2010 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2011 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2012 (VSW_VER_LT(ldcp, 1, 2) && 2013 (ldcp->lane_in.xfer_mode == 2014 VIO_DRING_MODE_V1_0))) { 2015 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 2016 break; 2017 } 2018 2019 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2020 __func__, ldcp->ldc_id); 2021 2022 ldcp->hphase = VSW_MILESTONE3; 2023 vsw_send_rdx(ldcp); 2024 break; 2025 2026 case VSW_MILESTONE3: 2027 /* 2028 * Pass this milestone when all paramaters have been 2029 * successfully exchanged and RDX sent in both directions. 2030 * 2031 * Mark outbound lane as available to transmit data. 2032 */ 2033 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 2034 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 2035 2036 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2037 __func__, ldcp->ldc_id); 2038 D2(vswp, "%s: ** handshake complete (0x%llx : " 2039 "0x%llx) **", __func__, ldcp->lane_in.lstate, 2040 ldcp->lane_out.lstate); 2041 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2042 ldcp->hphase = VSW_MILESTONE4; 2043 ldcp->hcnt = 0; 2044 DISPLAY_STATE(); 2045 /* Start HIO if enabled and capable */ 2046 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 2047 D2(vswp, "%s: start HybridIO setup", __func__); 2048 vsw_hio_start(vswp, ldcp); 2049 } 2050 } else { 2051 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 2052 __func__, ldcp->lane_in.lstate, 2053 ldcp->lane_out.lstate); 2054 } 2055 break; 2056 2057 case VSW_MILESTONE4: 2058 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2059 ldcp->ldc_id); 2060 break; 2061 2062 default: 2063 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2064 ldcp->ldc_id, ldcp->hphase); 2065 } 2066 2067 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2068 ldcp->hphase); 2069 } 2070 2071 /* 2072 * Check if major version is supported. 2073 * 2074 * Returns 0 if finds supported major number, and if necessary 2075 * adjusts the minor field. 2076 * 2077 * Returns 1 if can't match major number exactly. Sets mjor/minor 2078 * to next lowest support values, or to zero if no other values possible. 2079 */ 2080 static int 2081 vsw_supported_version(vio_ver_msg_t *vp) 2082 { 2083 int i; 2084 2085 D1(NULL, "vsw_supported_version: enter"); 2086 2087 for (i = 0; i < VSW_NUM_VER; i++) { 2088 if (vsw_versions[i].ver_major == vp->ver_major) { 2089 /* 2090 * Matching or lower major version found. Update 2091 * minor number if necessary. 2092 */ 2093 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2094 D2(NULL, "%s: adjusting minor value from %d " 2095 "to %d", __func__, vp->ver_minor, 2096 vsw_versions[i].ver_minor); 2097 vp->ver_minor = vsw_versions[i].ver_minor; 2098 } 2099 2100 return (0); 2101 } 2102 2103 /* 2104 * If the message contains a higher major version number, set 2105 * the message's major/minor versions to the current values 2106 * and return false, so this message will get resent with 2107 * these values. 2108 */ 2109 if (vsw_versions[i].ver_major < vp->ver_major) { 2110 D2(NULL, "%s: adjusting major and minor " 2111 "values to %d, %d\n", 2112 __func__, vsw_versions[i].ver_major, 2113 vsw_versions[i].ver_minor); 2114 vp->ver_major = vsw_versions[i].ver_major; 2115 vp->ver_minor = vsw_versions[i].ver_minor; 2116 return (1); 2117 } 2118 } 2119 2120 /* No match was possible, zero out fields */ 2121 vp->ver_major = 0; 2122 vp->ver_minor = 0; 2123 2124 D1(NULL, "vsw_supported_version: exit"); 2125 2126 return (1); 2127 } 2128 2129 /* 2130 * Set vnet-protocol-version dependent functions based on version. 2131 */ 2132 static void 2133 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 2134 { 2135 vsw_t *vswp = ldcp->ldc_vswp; 2136 lane_t *lp = &ldcp->lane_out; 2137 2138 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2139 /* 2140 * If the version negotiated with peer is >= 1.4(Jumbo Frame 2141 * Support), set the mtu in our attributes to max_frame_size. 2142 */ 2143 lp->mtu = vswp->max_frame_size; 2144 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 2145 /* 2146 * If the version negotiated with peer is == 1.3 (Vlan Tag 2147 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 2148 */ 2149 lp->mtu = ETHERMAX + VLAN_TAGSZ; 2150 } else { 2151 vsw_port_t *portp = ldcp->ldc_port; 2152 /* 2153 * Pre-1.3 peers expect max frame size of ETHERMAX. 2154 * We can negotiate that size with those peers provided only 2155 * pvid is defined for our peer and there are no vids. Then we 2156 * can send/recv only untagged frames of max size ETHERMAX. 2157 * Note that pvid of the peer can be different, as vsw has to 2158 * serve the vnet in that vlan even if itself is not assigned 2159 * to that vlan. 2160 */ 2161 if (portp->nvids == 0) { 2162 lp->mtu = ETHERMAX; 2163 } 2164 } 2165 2166 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2167 /* Versions >= 1.2 */ 2168 2169 if (VSW_PRI_ETH_DEFINED(vswp)) { 2170 /* 2171 * enable priority routines and pkt mode only if 2172 * at least one pri-eth-type is specified in MD. 2173 */ 2174 ldcp->tx = vsw_ldctx_pri; 2175 ldcp->rx_pktdata = vsw_process_pkt_data; 2176 2177 /* set xfer mode for vsw_send_attr() */ 2178 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2179 } else { 2180 /* no priority eth types defined in MD */ 2181 2182 ldcp->tx = vsw_ldctx; 2183 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2184 2185 /* set xfer mode for vsw_send_attr() */ 2186 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2187 } 2188 2189 } else { 2190 /* Versions prior to 1.2 */ 2191 2192 vsw_reset_vnet_proto_ops(ldcp); 2193 } 2194 } 2195 2196 /* 2197 * Reset vnet-protocol-version dependent functions to v1.0. 2198 */ 2199 static void 2200 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2201 { 2202 lane_t *lp = &ldcp->lane_out; 2203 2204 ldcp->tx = vsw_ldctx; 2205 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2206 2207 /* set xfer mode for vsw_send_attr() */ 2208 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2209 } 2210 2211 /* 2212 * Main routine for processing messages received over LDC. 2213 */ 2214 static void 2215 vsw_process_pkt(void *arg) 2216 { 2217 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2218 vsw_t *vswp = ldcp->ldc_vswp; 2219 size_t msglen; 2220 vio_msg_tag_t *tagp; 2221 uint64_t *ldcmsg; 2222 int rv = 0; 2223 2224 2225 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2226 2227 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2228 2229 ldcmsg = ldcp->ldcmsg; 2230 /* 2231 * If channel is up read messages until channel is empty. 2232 */ 2233 do { 2234 msglen = ldcp->msglen; 2235 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2236 2237 if (rv != 0) { 2238 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2239 __func__, ldcp->ldc_id, rv, msglen); 2240 } 2241 2242 /* channel has been reset */ 2243 if (rv == ECONNRESET) { 2244 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2245 break; 2246 } 2247 2248 if (msglen == 0) { 2249 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2250 ldcp->ldc_id); 2251 break; 2252 } 2253 2254 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2255 ldcp->ldc_id, msglen); 2256 2257 /* 2258 * Figure out what sort of packet we have gotten by 2259 * examining the msg tag, and then switch it appropriately. 2260 */ 2261 tagp = (vio_msg_tag_t *)ldcmsg; 2262 2263 switch (tagp->vio_msgtype) { 2264 case VIO_TYPE_CTRL: 2265 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2266 break; 2267 case VIO_TYPE_DATA: 2268 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2269 break; 2270 case VIO_TYPE_ERR: 2271 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2272 break; 2273 default: 2274 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2275 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2276 break; 2277 } 2278 } while (msglen); 2279 2280 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2281 } 2282 2283 /* 2284 * Dispatch a task to process a VIO control message. 2285 */ 2286 static void 2287 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2288 { 2289 vsw_ctrl_task_t *ctaskp = NULL; 2290 vsw_port_t *port = ldcp->ldc_port; 2291 vsw_t *vswp = port->p_vswp; 2292 2293 D1(vswp, "%s: enter", __func__); 2294 2295 /* 2296 * We need to handle RDX ACK messages in-band as once they 2297 * are exchanged it is possible that we will get an 2298 * immediate (legitimate) data packet. 2299 */ 2300 if ((tagp->vio_subtype_env == VIO_RDX) && 2301 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2302 2303 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2304 return; 2305 2306 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2307 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2308 "(ostate 0x%llx : hphase %d)", __func__, 2309 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2310 vsw_next_milestone(ldcp); 2311 return; 2312 } 2313 2314 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2315 2316 if (ctaskp == NULL) { 2317 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2318 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2319 return; 2320 } 2321 2322 ctaskp->ldcp = ldcp; 2323 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2324 ctaskp->hss_id = ldcp->hss_id; 2325 2326 /* 2327 * Dispatch task to processing taskq if port is not in 2328 * the process of being detached. 2329 */ 2330 mutex_enter(&port->state_lock); 2331 if (port->state == VSW_PORT_INIT) { 2332 if ((vswp->taskq_p == NULL) || 2333 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2334 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2335 mutex_exit(&port->state_lock); 2336 DERR(vswp, "%s: unable to dispatch task to taskq", 2337 __func__); 2338 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2339 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2340 return; 2341 } 2342 } else { 2343 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2344 DWARN(vswp, "%s: port %d detaching, not dispatching " 2345 "task", __func__, port->p_instance); 2346 } 2347 2348 mutex_exit(&port->state_lock); 2349 2350 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2351 ldcp->ldc_id); 2352 D1(vswp, "%s: exit", __func__); 2353 } 2354 2355 /* 2356 * Process a VIO ctrl message. Invoked from taskq. 2357 */ 2358 static void 2359 vsw_process_ctrl_pkt(void *arg) 2360 { 2361 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2362 vsw_ldc_t *ldcp = ctaskp->ldcp; 2363 vsw_t *vswp = ldcp->ldc_vswp; 2364 vio_msg_tag_t tag; 2365 uint16_t env; 2366 2367 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2368 2369 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2370 env = tag.vio_subtype_env; 2371 2372 /* stale pkt check */ 2373 if (ctaskp->hss_id < ldcp->hss_id) { 2374 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2375 " (%ld) handshake session", __func__, ctaskp->hss_id); 2376 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2377 return; 2378 } 2379 2380 /* session id check */ 2381 if (ldcp->session_status & VSW_PEER_SESSION) { 2382 if (ldcp->peer_session != tag.vio_sid) { 2383 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2384 __func__, ldcp->ldc_id, tag.vio_sid); 2385 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2386 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2387 return; 2388 } 2389 } 2390 2391 /* 2392 * Switch on vio_subtype envelope, then let lower routines 2393 * decide if its an INFO, ACK or NACK packet. 2394 */ 2395 switch (env) { 2396 case VIO_VER_INFO: 2397 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2398 break; 2399 case VIO_DRING_REG: 2400 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2401 break; 2402 case VIO_DRING_UNREG: 2403 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2404 break; 2405 case VIO_ATTR_INFO: 2406 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2407 break; 2408 case VNET_MCAST_INFO: 2409 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2410 break; 2411 case VIO_RDX: 2412 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2413 break; 2414 case VIO_DDS_INFO: 2415 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2416 break; 2417 default: 2418 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2419 } 2420 2421 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2422 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2423 } 2424 2425 /* 2426 * Version negotiation. We can end up here either because our peer 2427 * has responded to a handshake message we have sent it, or our peer 2428 * has initiated a handshake with us. If its the former then can only 2429 * be ACK or NACK, if its the later can only be INFO. 2430 * 2431 * If its an ACK we move to the next stage of the handshake, namely 2432 * attribute exchange. If its a NACK we see if we can specify another 2433 * version, if we can't we stop. 2434 * 2435 * If it is an INFO we reset all params associated with communication 2436 * in that direction over this channel (remember connection is 2437 * essentially 2 independent simplex channels). 2438 */ 2439 void 2440 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2441 { 2442 vio_ver_msg_t *ver_pkt; 2443 vsw_t *vswp = ldcp->ldc_vswp; 2444 2445 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2446 2447 /* 2448 * We know this is a ctrl/version packet so 2449 * cast it into the correct structure. 2450 */ 2451 ver_pkt = (vio_ver_msg_t *)pkt; 2452 2453 switch (ver_pkt->tag.vio_subtype) { 2454 case VIO_SUBTYPE_INFO: 2455 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2456 2457 /* 2458 * Record the session id, which we will use from now 2459 * until we see another VER_INFO msg. Even then the 2460 * session id in most cases will be unchanged, execpt 2461 * if channel was reset. 2462 */ 2463 if ((ldcp->session_status & VSW_PEER_SESSION) && 2464 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2465 DERR(vswp, "%s: updating session id for chan %lld " 2466 "from %llx to %llx", __func__, ldcp->ldc_id, 2467 ldcp->peer_session, ver_pkt->tag.vio_sid); 2468 } 2469 2470 ldcp->peer_session = ver_pkt->tag.vio_sid; 2471 ldcp->session_status |= VSW_PEER_SESSION; 2472 2473 /* Legal message at this time ? */ 2474 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2475 return; 2476 2477 /* 2478 * First check the device class. Currently only expect 2479 * to be talking to a network device. In the future may 2480 * also talk to another switch. 2481 */ 2482 if (ver_pkt->dev_class != VDEV_NETWORK) { 2483 DERR(vswp, "%s: illegal device class %d", __func__, 2484 ver_pkt->dev_class); 2485 2486 ver_pkt->tag.vio_sid = ldcp->local_session; 2487 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2488 2489 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2490 2491 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2492 sizeof (vio_ver_msg_t), B_TRUE); 2493 2494 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2495 vsw_next_milestone(ldcp); 2496 return; 2497 } else { 2498 ldcp->dev_class = ver_pkt->dev_class; 2499 } 2500 2501 /* 2502 * Now check the version. 2503 */ 2504 if (vsw_supported_version(ver_pkt) == 0) { 2505 /* 2506 * Support this major version and possibly 2507 * adjusted minor version. 2508 */ 2509 2510 D2(vswp, "%s: accepted ver %d:%d", __func__, 2511 ver_pkt->ver_major, ver_pkt->ver_minor); 2512 2513 /* Store accepted values */ 2514 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2515 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2516 2517 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2518 2519 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2520 2521 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2522 /* 2523 * Send a version info message 2524 * using the accepted version that 2525 * we are about to ack. Also note that 2526 * we send our ver info before we ack. 2527 * Otherwise, as soon as receiving the 2528 * ack, obp sends attr info msg, which 2529 * breaks vsw_check_flag() invoked 2530 * from vsw_process_ctrl_attr_pkt(); 2531 * as we also need VSW_VER_ACK_RECV to 2532 * be set in lane_out.lstate, before 2533 * we can receive attr info. 2534 */ 2535 vsw_send_ver(ldcp); 2536 } 2537 } else { 2538 /* 2539 * NACK back with the next lower major/minor 2540 * pairing we support (if don't suuport any more 2541 * versions then they will be set to zero. 2542 */ 2543 2544 D2(vswp, "%s: replying with ver %d:%d", __func__, 2545 ver_pkt->ver_major, ver_pkt->ver_minor); 2546 2547 /* Store updated values */ 2548 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2549 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2550 2551 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2552 2553 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2554 } 2555 2556 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2557 ver_pkt->tag.vio_sid = ldcp->local_session; 2558 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2559 sizeof (vio_ver_msg_t), B_TRUE); 2560 2561 vsw_next_milestone(ldcp); 2562 break; 2563 2564 case VIO_SUBTYPE_ACK: 2565 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2566 2567 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2568 return; 2569 2570 /* Store updated values */ 2571 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2572 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2573 2574 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2575 vsw_next_milestone(ldcp); 2576 2577 break; 2578 2579 case VIO_SUBTYPE_NACK: 2580 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2581 2582 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2583 return; 2584 2585 /* 2586 * If our peer sent us a NACK with the ver fields set to 2587 * zero then there is nothing more we can do. Otherwise see 2588 * if we support either the version suggested, or a lesser 2589 * one. 2590 */ 2591 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2592 DERR(vswp, "%s: peer unable to negotiate any " 2593 "further.", __func__); 2594 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2595 vsw_next_milestone(ldcp); 2596 return; 2597 } 2598 2599 /* 2600 * Check to see if we support this major version or 2601 * a lower one. If we don't then maj/min will be set 2602 * to zero. 2603 */ 2604 (void) vsw_supported_version(ver_pkt); 2605 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2606 /* Nothing more we can do */ 2607 DERR(vswp, "%s: version negotiation failed.\n", 2608 __func__); 2609 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2610 vsw_next_milestone(ldcp); 2611 } else { 2612 /* found a supported major version */ 2613 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2614 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2615 2616 D2(vswp, "%s: resending with updated values (%x, %x)", 2617 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2618 2619 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2620 ver_pkt->tag.vio_sid = ldcp->local_session; 2621 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2622 2623 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2624 2625 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2626 sizeof (vio_ver_msg_t), B_TRUE); 2627 2628 vsw_next_milestone(ldcp); 2629 2630 } 2631 break; 2632 2633 default: 2634 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2635 ver_pkt->tag.vio_subtype); 2636 } 2637 2638 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2639 } 2640 2641 /* 2642 * Process an attribute packet. We can end up here either because our peer 2643 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2644 * peer has sent us an attribute INFO message 2645 * 2646 * If its an ACK we then move to the next stage of the handshake which 2647 * is to send our descriptor ring info to our peer. If its a NACK then 2648 * there is nothing more we can (currently) do. 2649 * 2650 * If we get a valid/acceptable INFO packet (and we have already negotiated 2651 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2652 * NACK back and reset channel state to INACTIV. 2653 * 2654 * FUTURE: in time we will probably negotiate over attributes, but for 2655 * the moment unacceptable attributes are regarded as a fatal error. 2656 * 2657 */ 2658 void 2659 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2660 { 2661 vnet_attr_msg_t *attr_pkt; 2662 vsw_t *vswp = ldcp->ldc_vswp; 2663 vsw_port_t *port = ldcp->ldc_port; 2664 uint64_t macaddr = 0; 2665 lane_t *lane_out = &ldcp->lane_out; 2666 lane_t *lane_in = &ldcp->lane_in; 2667 uint32_t mtu; 2668 boolean_t ack = B_TRUE; 2669 int i; 2670 2671 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2672 2673 /* 2674 * We know this is a ctrl/attr packet so 2675 * cast it into the correct structure. 2676 */ 2677 attr_pkt = (vnet_attr_msg_t *)pkt; 2678 2679 switch (attr_pkt->tag.vio_subtype) { 2680 case VIO_SUBTYPE_INFO: 2681 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2682 2683 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2684 return; 2685 2686 /* 2687 * If the attributes are unacceptable then we NACK back. 2688 */ 2689 if (vsw_check_attr(attr_pkt, ldcp)) { 2690 ack = B_FALSE; 2691 2692 DERR(vswp, "%s (chan %d): invalid attributes", 2693 __func__, ldcp->ldc_id); 2694 2695 } else { 2696 2697 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2698 /* 2699 * Versions >= 1.4: 2700 * The mtu is negotiated down to the 2701 * minimum of our mtu and peer's mtu. 2702 */ 2703 mtu = MIN(attr_pkt->mtu, vswp->max_frame_size); 2704 2705 /* 2706 * If we have received an ack for the attr info 2707 * that we sent, then check if the mtu computed 2708 * above matches the mtu that the peer had ack'd 2709 * (saved in local hparams). If they don't 2710 * match, we fail the handshake. 2711 */ 2712 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2713 if (mtu != lane_out->mtu) { 2714 /* send NACK */ 2715 ack = B_FALSE; 2716 } 2717 } else { 2718 /* 2719 * Save the mtu computed above in our 2720 * attr parameters, so it gets sent in 2721 * the attr info from us to the peer. 2722 */ 2723 lane_out->mtu = mtu; 2724 } 2725 } 2726 2727 } 2728 2729 if (ack == B_FALSE) { 2730 2731 vsw_free_lane_resources(ldcp, INBOUND); 2732 2733 attr_pkt->tag.vio_sid = ldcp->local_session; 2734 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2735 2736 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2737 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2738 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2739 sizeof (vnet_attr_msg_t), B_TRUE); 2740 2741 vsw_next_milestone(ldcp); 2742 return; 2743 } 2744 2745 /* 2746 * Otherwise store attributes for this lane and update 2747 * lane state. 2748 */ 2749 lane_in->mtu = attr_pkt->mtu; 2750 lane_in->addr = attr_pkt->addr; 2751 lane_in->addr_type = attr_pkt->addr_type; 2752 lane_in->xfer_mode = attr_pkt->xfer_mode; 2753 lane_in->ack_freq = attr_pkt->ack_freq; 2754 2755 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2756 /* save the MIN mtu in the msg to be replied */ 2757 attr_pkt->mtu = mtu; 2758 } 2759 2760 macaddr = lane_in->addr; 2761 for (i = ETHERADDRL - 1; i >= 0; i--) { 2762 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2763 macaddr >>= 8; 2764 } 2765 2766 /* create the fdb entry for this port/mac address */ 2767 vsw_fdbe_add(vswp, port); 2768 2769 /* add the port to the specified vlans */ 2770 vsw_vlan_add_ids(port, VSW_VNETPORT); 2771 2772 /* setup device specifc xmit routines */ 2773 mutex_enter(&port->tx_lock); 2774 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2775 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2776 (VSW_VER_LT(ldcp, 1, 2) && 2777 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2778 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2779 port->transmit = vsw_dringsend; 2780 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2781 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2782 vsw_create_privring(ldcp); 2783 port->transmit = vsw_descrsend; 2784 lane_out->xfer_mode = VIO_DESC_MODE; 2785 } 2786 2787 /* 2788 * HybridIO is supported only vnet, not by OBP. 2789 * So, set hio_capable to true only when in DRING mode. 2790 */ 2791 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2792 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2793 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2794 } else { 2795 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2796 } 2797 2798 mutex_exit(&port->tx_lock); 2799 2800 attr_pkt->tag.vio_sid = ldcp->local_session; 2801 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2802 2803 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2804 2805 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2806 2807 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2808 sizeof (vnet_attr_msg_t), B_TRUE); 2809 2810 vsw_next_milestone(ldcp); 2811 break; 2812 2813 case VIO_SUBTYPE_ACK: 2814 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2815 2816 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2817 return; 2818 2819 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2820 /* 2821 * Versions >= 1.4: 2822 * The ack msg sent by the peer contains the minimum of 2823 * our mtu (that we had sent in our attr info) and the 2824 * peer's mtu. 2825 * 2826 * If we have sent an ack for the attr info msg from 2827 * the peer, check if the mtu that was computed then 2828 * (saved in lane_out params) matches the mtu that the 2829 * peer has ack'd. If they don't match, we fail the 2830 * handshake. 2831 */ 2832 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2833 if (lane_out->mtu != attr_pkt->mtu) { 2834 return; 2835 } 2836 } else { 2837 /* 2838 * If the mtu ack'd by the peer is > our mtu 2839 * fail handshake. Otherwise, save the mtu, so 2840 * we can validate it when we receive attr info 2841 * from our peer. 2842 */ 2843 if (attr_pkt->mtu > lane_out->mtu) { 2844 return; 2845 } 2846 if (attr_pkt->mtu <= lane_out->mtu) { 2847 lane_out->mtu = attr_pkt->mtu; 2848 } 2849 } 2850 } 2851 2852 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2853 vsw_next_milestone(ldcp); 2854 break; 2855 2856 case VIO_SUBTYPE_NACK: 2857 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2858 2859 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2860 return; 2861 2862 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2863 vsw_next_milestone(ldcp); 2864 break; 2865 2866 default: 2867 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2868 attr_pkt->tag.vio_subtype); 2869 } 2870 2871 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2872 } 2873 2874 /* 2875 * Process a dring info packet. We can end up here either because our peer 2876 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2877 * peer has sent us a dring INFO message. 2878 * 2879 * If we get a valid/acceptable INFO packet (and we have already negotiated 2880 * a version) we ACK back and update the lane state, otherwise we NACK back. 2881 * 2882 * FUTURE: nothing to stop client from sending us info on multiple dring's 2883 * but for the moment we will just use the first one we are given. 2884 * 2885 */ 2886 void 2887 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2888 { 2889 vio_dring_reg_msg_t *dring_pkt; 2890 vsw_t *vswp = ldcp->ldc_vswp; 2891 ldc_mem_info_t minfo; 2892 dring_info_t *dp, *dbp; 2893 int dring_found = 0; 2894 2895 /* 2896 * We know this is a ctrl/dring packet so 2897 * cast it into the correct structure. 2898 */ 2899 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2900 2901 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2902 2903 switch (dring_pkt->tag.vio_subtype) { 2904 case VIO_SUBTYPE_INFO: 2905 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2906 2907 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2908 return; 2909 2910 /* 2911 * If the dring params are unacceptable then we NACK back. 2912 */ 2913 if (vsw_check_dring_info(dring_pkt)) { 2914 2915 DERR(vswp, "%s (%lld): invalid dring info", 2916 __func__, ldcp->ldc_id); 2917 2918 vsw_free_lane_resources(ldcp, INBOUND); 2919 2920 dring_pkt->tag.vio_sid = ldcp->local_session; 2921 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2922 2923 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2924 2925 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2926 2927 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2928 sizeof (vio_dring_reg_msg_t), B_TRUE); 2929 2930 vsw_next_milestone(ldcp); 2931 return; 2932 } 2933 2934 /* 2935 * Otherwise, attempt to map in the dring using the 2936 * cookie. If that succeeds we send back a unique dring 2937 * identifier that the sending side will use in future 2938 * to refer to this descriptor ring. 2939 */ 2940 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2941 2942 dp->num_descriptors = dring_pkt->num_descriptors; 2943 dp->descriptor_size = dring_pkt->descriptor_size; 2944 dp->options = dring_pkt->options; 2945 dp->ncookies = dring_pkt->ncookies; 2946 2947 /* 2948 * Note: should only get one cookie. Enforced in 2949 * the ldc layer. 2950 */ 2951 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2952 sizeof (ldc_mem_cookie_t)); 2953 2954 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2955 dp->num_descriptors, dp->descriptor_size); 2956 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2957 dp->options, dp->ncookies); 2958 2959 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2960 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2961 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 2962 2963 DERR(vswp, "%s: dring_map failed\n", __func__); 2964 2965 kmem_free(dp, sizeof (dring_info_t)); 2966 vsw_free_lane_resources(ldcp, INBOUND); 2967 2968 dring_pkt->tag.vio_sid = ldcp->local_session; 2969 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2970 2971 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2972 2973 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2974 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2975 sizeof (vio_dring_reg_msg_t), B_TRUE); 2976 2977 vsw_next_milestone(ldcp); 2978 return; 2979 } 2980 2981 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2982 2983 DERR(vswp, "%s: dring_addr failed\n", __func__); 2984 2985 kmem_free(dp, sizeof (dring_info_t)); 2986 vsw_free_lane_resources(ldcp, INBOUND); 2987 2988 dring_pkt->tag.vio_sid = ldcp->local_session; 2989 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2990 2991 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2992 2993 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2994 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2995 sizeof (vio_dring_reg_msg_t), B_TRUE); 2996 2997 vsw_next_milestone(ldcp); 2998 return; 2999 } else { 3000 /* store the address of the pub part of ring */ 3001 dp->pub_addr = minfo.vaddr; 3002 3003 /* cache the dring mtype */ 3004 dp->dring_mtype = minfo.mtype; 3005 } 3006 3007 /* no private section as we are importing */ 3008 dp->priv_addr = NULL; 3009 3010 /* 3011 * Using simple mono increasing int for ident at 3012 * the moment. 3013 */ 3014 dp->ident = ldcp->next_ident; 3015 ldcp->next_ident++; 3016 3017 dp->end_idx = 0; 3018 dp->next = NULL; 3019 3020 /* 3021 * Link it onto the end of the list of drings 3022 * for this lane. 3023 */ 3024 if (ldcp->lane_in.dringp == NULL) { 3025 D2(vswp, "%s: adding first INBOUND dring", __func__); 3026 ldcp->lane_in.dringp = dp; 3027 } else { 3028 dbp = ldcp->lane_in.dringp; 3029 3030 while (dbp->next != NULL) 3031 dbp = dbp->next; 3032 3033 dbp->next = dp; 3034 } 3035 3036 /* acknowledge it */ 3037 dring_pkt->tag.vio_sid = ldcp->local_session; 3038 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3039 dring_pkt->dring_ident = dp->ident; 3040 3041 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3042 sizeof (vio_dring_reg_msg_t), B_TRUE); 3043 3044 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3045 vsw_next_milestone(ldcp); 3046 break; 3047 3048 case VIO_SUBTYPE_ACK: 3049 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3050 3051 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3052 return; 3053 3054 /* 3055 * Peer is acknowledging our dring info and will have 3056 * sent us a dring identifier which we will use to 3057 * refer to this ring w.r.t. our peer. 3058 */ 3059 dp = ldcp->lane_out.dringp; 3060 if (dp != NULL) { 3061 /* 3062 * Find the ring this ident should be associated 3063 * with. 3064 */ 3065 if (vsw_dring_match(dp, dring_pkt)) { 3066 dring_found = 1; 3067 3068 } else while (dp != NULL) { 3069 if (vsw_dring_match(dp, dring_pkt)) { 3070 dring_found = 1; 3071 break; 3072 } 3073 dp = dp->next; 3074 } 3075 3076 if (dring_found == 0) { 3077 DERR(NULL, "%s: unrecognised ring cookie", 3078 __func__); 3079 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3080 return; 3081 } 3082 3083 } else { 3084 DERR(vswp, "%s: DRING ACK received but no drings " 3085 "allocated", __func__); 3086 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3087 return; 3088 } 3089 3090 /* store ident */ 3091 dp->ident = dring_pkt->dring_ident; 3092 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3093 vsw_next_milestone(ldcp); 3094 break; 3095 3096 case VIO_SUBTYPE_NACK: 3097 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3098 3099 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3100 return; 3101 3102 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3103 vsw_next_milestone(ldcp); 3104 break; 3105 3106 default: 3107 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3108 dring_pkt->tag.vio_subtype); 3109 } 3110 3111 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3112 } 3113 3114 /* 3115 * Process a request from peer to unregister a dring. 3116 * 3117 * For the moment we just restart the handshake if our 3118 * peer endpoint attempts to unregister a dring. 3119 */ 3120 void 3121 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3122 { 3123 vsw_t *vswp = ldcp->ldc_vswp; 3124 vio_dring_unreg_msg_t *dring_pkt; 3125 3126 /* 3127 * We know this is a ctrl/dring packet so 3128 * cast it into the correct structure. 3129 */ 3130 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3131 3132 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3133 3134 switch (dring_pkt->tag.vio_subtype) { 3135 case VIO_SUBTYPE_INFO: 3136 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3137 3138 DWARN(vswp, "%s: restarting handshake..", __func__); 3139 break; 3140 3141 case VIO_SUBTYPE_ACK: 3142 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3143 3144 DWARN(vswp, "%s: restarting handshake..", __func__); 3145 break; 3146 3147 case VIO_SUBTYPE_NACK: 3148 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3149 3150 DWARN(vswp, "%s: restarting handshake..", __func__); 3151 break; 3152 3153 default: 3154 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3155 dring_pkt->tag.vio_subtype); 3156 } 3157 3158 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3159 3160 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3161 } 3162 3163 #define SND_MCST_NACK(ldcp, pkt) \ 3164 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3165 pkt->tag.vio_sid = ldcp->local_session; \ 3166 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3167 sizeof (vnet_mcast_msg_t), B_TRUE); 3168 3169 /* 3170 * Process a multicast request from a vnet. 3171 * 3172 * Vnet's specify a multicast address that they are interested in. This 3173 * address is used as a key into the hash table which forms the multicast 3174 * forwarding database (mFDB). 3175 * 3176 * The table keys are the multicast addresses, while the table entries 3177 * are pointers to lists of ports which wish to receive packets for the 3178 * specified multicast address. 3179 * 3180 * When a multicast packet is being switched we use the address as a key 3181 * into the hash table, and then walk the appropriate port list forwarding 3182 * the pkt to each port in turn. 3183 * 3184 * If a vnet is no longer interested in a particular multicast grouping 3185 * we simply find the correct location in the hash table and then delete 3186 * the relevant port from the port list. 3187 * 3188 * To deal with the case whereby a port is being deleted without first 3189 * removing itself from the lists in the hash table, we maintain a list 3190 * of multicast addresses the port has registered an interest in, within 3191 * the port structure itself. We then simply walk that list of addresses 3192 * using them as keys into the hash table and remove the port from the 3193 * appropriate lists. 3194 */ 3195 static void 3196 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3197 { 3198 vnet_mcast_msg_t *mcst_pkt; 3199 vsw_port_t *port = ldcp->ldc_port; 3200 vsw_t *vswp = ldcp->ldc_vswp; 3201 int i; 3202 3203 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3204 3205 /* 3206 * We know this is a ctrl/mcast packet so 3207 * cast it into the correct structure. 3208 */ 3209 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3210 3211 switch (mcst_pkt->tag.vio_subtype) { 3212 case VIO_SUBTYPE_INFO: 3213 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3214 3215 /* 3216 * Check if in correct state to receive a multicast 3217 * message (i.e. handshake complete). If not reset 3218 * the handshake. 3219 */ 3220 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3221 return; 3222 3223 /* 3224 * Before attempting to add or remove address check 3225 * that they are valid multicast addresses. 3226 * If not, then NACK back. 3227 */ 3228 for (i = 0; i < mcst_pkt->count; i++) { 3229 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3230 DERR(vswp, "%s: invalid multicast address", 3231 __func__); 3232 SND_MCST_NACK(ldcp, mcst_pkt); 3233 return; 3234 } 3235 } 3236 3237 /* 3238 * Now add/remove the addresses. If this fails we 3239 * NACK back. 3240 */ 3241 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3242 SND_MCST_NACK(ldcp, mcst_pkt); 3243 return; 3244 } 3245 3246 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3247 mcst_pkt->tag.vio_sid = ldcp->local_session; 3248 3249 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3250 3251 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3252 sizeof (vnet_mcast_msg_t), B_TRUE); 3253 break; 3254 3255 case VIO_SUBTYPE_ACK: 3256 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3257 3258 /* 3259 * We shouldn't ever get a multicast ACK message as 3260 * at the moment we never request multicast addresses 3261 * to be set on some other device. This may change in 3262 * the future if we have cascading switches. 3263 */ 3264 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3265 return; 3266 3267 /* Do nothing */ 3268 break; 3269 3270 case VIO_SUBTYPE_NACK: 3271 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3272 3273 /* 3274 * We shouldn't get a multicast NACK packet for the 3275 * same reasons as we shouldn't get a ACK packet. 3276 */ 3277 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3278 return; 3279 3280 /* Do nothing */ 3281 break; 3282 3283 default: 3284 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3285 mcst_pkt->tag.vio_subtype); 3286 } 3287 3288 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3289 } 3290 3291 static void 3292 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3293 { 3294 vio_rdx_msg_t *rdx_pkt; 3295 vsw_t *vswp = ldcp->ldc_vswp; 3296 3297 /* 3298 * We know this is a ctrl/rdx packet so 3299 * cast it into the correct structure. 3300 */ 3301 rdx_pkt = (vio_rdx_msg_t *)pkt; 3302 3303 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3304 3305 switch (rdx_pkt->tag.vio_subtype) { 3306 case VIO_SUBTYPE_INFO: 3307 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3308 3309 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3310 return; 3311 3312 rdx_pkt->tag.vio_sid = ldcp->local_session; 3313 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3314 3315 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3316 3317 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3318 3319 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3320 sizeof (vio_rdx_msg_t), B_TRUE); 3321 3322 vsw_next_milestone(ldcp); 3323 break; 3324 3325 case VIO_SUBTYPE_ACK: 3326 /* 3327 * Should be handled in-band by callback handler. 3328 */ 3329 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3330 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3331 break; 3332 3333 case VIO_SUBTYPE_NACK: 3334 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3335 3336 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3337 return; 3338 3339 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3340 vsw_next_milestone(ldcp); 3341 break; 3342 3343 default: 3344 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3345 rdx_pkt->tag.vio_subtype); 3346 } 3347 3348 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3349 } 3350 3351 static void 3352 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3353 uint32_t msglen) 3354 { 3355 uint16_t env = tagp->vio_subtype_env; 3356 vsw_t *vswp = ldcp->ldc_vswp; 3357 3358 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3359 3360 /* session id check */ 3361 if (ldcp->session_status & VSW_PEER_SESSION) { 3362 if (ldcp->peer_session != tagp->vio_sid) { 3363 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3364 __func__, ldcp->ldc_id, tagp->vio_sid); 3365 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3366 return; 3367 } 3368 } 3369 3370 /* 3371 * It is an error for us to be getting data packets 3372 * before the handshake has completed. 3373 */ 3374 if (ldcp->hphase != VSW_MILESTONE4) { 3375 DERR(vswp, "%s: got data packet before handshake complete " 3376 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3377 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3378 DUMP_FLAGS(ldcp->lane_in.lstate); 3379 DUMP_FLAGS(ldcp->lane_out.lstate); 3380 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3381 return; 3382 } 3383 3384 /* 3385 * To reduce the locking contention, release the 3386 * ldc_cblock here and re-acquire it once we are done 3387 * receiving packets. 3388 */ 3389 mutex_exit(&ldcp->ldc_cblock); 3390 mutex_enter(&ldcp->ldc_rxlock); 3391 3392 /* 3393 * Switch on vio_subtype envelope, then let lower routines 3394 * decide if its an INFO, ACK or NACK packet. 3395 */ 3396 if (env == VIO_DRING_DATA) { 3397 vsw_process_data_dring_pkt(ldcp, dpkt); 3398 } else if (env == VIO_PKT_DATA) { 3399 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3400 } else if (env == VIO_DESC_DATA) { 3401 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3402 } else { 3403 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3404 } 3405 3406 mutex_exit(&ldcp->ldc_rxlock); 3407 mutex_enter(&ldcp->ldc_cblock); 3408 3409 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3410 } 3411 3412 #define SND_DRING_NACK(ldcp, pkt) \ 3413 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3414 pkt->tag.vio_sid = ldcp->local_session; \ 3415 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3416 sizeof (vio_dring_msg_t), B_TRUE); 3417 3418 static void 3419 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3420 { 3421 vio_dring_msg_t *dring_pkt; 3422 vnet_public_desc_t desc, *pub_addr = NULL; 3423 vsw_private_desc_t *priv_addr = NULL; 3424 dring_info_t *dp = NULL; 3425 vsw_t *vswp = ldcp->ldc_vswp; 3426 mblk_t *mp = NULL; 3427 mblk_t *bp = NULL; 3428 mblk_t *bpt = NULL; 3429 size_t nbytes = 0; 3430 uint64_t chain = 0; 3431 uint64_t len; 3432 uint32_t pos, start; 3433 uint32_t range_start, range_end; 3434 int32_t end, num, cnt = 0; 3435 int i, rv, rng_rv = 0, msg_rv = 0; 3436 boolean_t prev_desc_ack = B_FALSE; 3437 int read_attempts = 0; 3438 struct ether_header *ehp; 3439 lane_t *lp = &ldcp->lane_out; 3440 3441 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3442 3443 /* 3444 * We know this is a data/dring packet so 3445 * cast it into the correct structure. 3446 */ 3447 dring_pkt = (vio_dring_msg_t *)dpkt; 3448 3449 /* 3450 * Switch on the vio_subtype. If its INFO then we need to 3451 * process the data. If its an ACK we need to make sure 3452 * it makes sense (i.e did we send an earlier data/info), 3453 * and if its a NACK then we maybe attempt a retry. 3454 */ 3455 switch (dring_pkt->tag.vio_subtype) { 3456 case VIO_SUBTYPE_INFO: 3457 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3458 3459 READ_ENTER(&ldcp->lane_in.dlistrw); 3460 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3461 dring_pkt->dring_ident)) == NULL) { 3462 RW_EXIT(&ldcp->lane_in.dlistrw); 3463 3464 DERR(vswp, "%s(%lld): unable to find dring from " 3465 "ident 0x%llx", __func__, ldcp->ldc_id, 3466 dring_pkt->dring_ident); 3467 3468 SND_DRING_NACK(ldcp, dring_pkt); 3469 return; 3470 } 3471 3472 start = pos = dring_pkt->start_idx; 3473 end = dring_pkt->end_idx; 3474 len = dp->num_descriptors; 3475 3476 range_start = range_end = pos; 3477 3478 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3479 __func__, ldcp->ldc_id, start, end); 3480 3481 if (end == -1) { 3482 num = -1; 3483 } else if (end >= 0) { 3484 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3485 3486 /* basic sanity check */ 3487 if (end > len) { 3488 RW_EXIT(&ldcp->lane_in.dlistrw); 3489 DERR(vswp, "%s(%lld): endpoint %lld outside " 3490 "ring length %lld", __func__, 3491 ldcp->ldc_id, end, len); 3492 3493 SND_DRING_NACK(ldcp, dring_pkt); 3494 return; 3495 } 3496 } else { 3497 RW_EXIT(&ldcp->lane_in.dlistrw); 3498 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3499 __func__, ldcp->ldc_id, end); 3500 SND_DRING_NACK(ldcp, dring_pkt); 3501 return; 3502 } 3503 3504 while (cnt != num) { 3505 vsw_recheck_desc: 3506 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3507 3508 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3509 &desc, dp->dring_mtype, dp->handle, 3510 pos, pos)) != 0) { 3511 DERR(vswp, "%s(%lld): unable to copy " 3512 "descriptor at pos %d: err %d", 3513 __func__, pos, ldcp->ldc_id, rng_rv); 3514 ldcp->ldc_stats.ierrors++; 3515 break; 3516 } 3517 3518 /* 3519 * When given a bounded range of descriptors 3520 * to process, its an error to hit a descriptor 3521 * which is not ready. In the non-bounded case 3522 * (end_idx == -1) this simply indicates we have 3523 * reached the end of the current active range. 3524 */ 3525 if (desc.hdr.dstate != VIO_DESC_READY) { 3526 /* unbound - no error */ 3527 if (end == -1) { 3528 if (read_attempts == vsw_read_attempts) 3529 break; 3530 3531 delay(drv_usectohz(vsw_desc_delay)); 3532 read_attempts++; 3533 goto vsw_recheck_desc; 3534 } 3535 3536 /* bounded - error - so NACK back */ 3537 RW_EXIT(&ldcp->lane_in.dlistrw); 3538 DERR(vswp, "%s(%lld): descriptor not READY " 3539 "(%d)", __func__, ldcp->ldc_id, 3540 desc.hdr.dstate); 3541 SND_DRING_NACK(ldcp, dring_pkt); 3542 return; 3543 } 3544 3545 DTRACE_PROBE1(read_attempts, int, read_attempts); 3546 3547 range_end = pos; 3548 3549 /* 3550 * If we ACK'd the previous descriptor then now 3551 * record the new range start position for later 3552 * ACK's. 3553 */ 3554 if (prev_desc_ack) { 3555 range_start = pos; 3556 3557 D2(vswp, "%s(%lld): updating range start to be " 3558 "%d", __func__, ldcp->ldc_id, range_start); 3559 3560 prev_desc_ack = B_FALSE; 3561 } 3562 3563 D2(vswp, "%s(%lld): processing desc %lld at pos" 3564 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3565 __func__, ldcp->ldc_id, pos, &desc, 3566 desc.hdr.dstate, desc.nbytes); 3567 3568 if ((desc.nbytes < ETHERMIN) || 3569 (desc.nbytes > lp->mtu)) { 3570 /* invalid size; drop the packet */ 3571 ldcp->ldc_stats.ierrors++; 3572 goto vsw_process_desc_done; 3573 } 3574 3575 /* 3576 * Ensure that we ask ldc for an aligned 3577 * number of bytes. Data is padded to align on 8 3578 * byte boundary, desc.nbytes is actual data length, 3579 * i.e. minus that padding. 3580 */ 3581 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3582 if (nbytes > ldcp->max_rxpool_size) { 3583 mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3584 BPRI_MED); 3585 } else { 3586 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3587 if (mp == NULL) { 3588 ldcp->ldc_stats.rx_vio_allocb_fail++; 3589 /* 3590 * No free receive buffers available, 3591 * so fallback onto allocb(9F). Make 3592 * sure that we get a data buffer which 3593 * is a multiple of 8 as this is 3594 * required by ldc_mem_copy. 3595 */ 3596 DTRACE_PROBE(allocb); 3597 mp = allocb(desc.nbytes + 3598 VNET_IPALIGN + 8, BPRI_MED); 3599 } 3600 } 3601 if (mp == NULL) { 3602 DERR(vswp, "%s(%ld): allocb failed", 3603 __func__, ldcp->ldc_id); 3604 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3605 dp->dring_mtype, dp->handle, pos, pos, 3606 VIO_DESC_DONE); 3607 ldcp->ldc_stats.ierrors++; 3608 ldcp->ldc_stats.rx_allocb_fail++; 3609 break; 3610 } 3611 3612 rv = ldc_mem_copy(ldcp->ldc_handle, 3613 (caddr_t)mp->b_rptr, 0, &nbytes, 3614 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3615 if (rv != 0) { 3616 DERR(vswp, "%s(%d): unable to copy in data " 3617 "from %d cookies in desc %d (rv %d)", 3618 __func__, ldcp->ldc_id, desc.ncookies, 3619 pos, rv); 3620 freemsg(mp); 3621 3622 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3623 dp->dring_mtype, dp->handle, pos, pos, 3624 VIO_DESC_DONE); 3625 ldcp->ldc_stats.ierrors++; 3626 break; 3627 } else { 3628 D2(vswp, "%s(%d): copied in %ld bytes" 3629 " using %d cookies", __func__, 3630 ldcp->ldc_id, nbytes, desc.ncookies); 3631 } 3632 3633 /* adjust the read pointer to skip over the padding */ 3634 mp->b_rptr += VNET_IPALIGN; 3635 3636 /* point to the actual end of data */ 3637 mp->b_wptr = mp->b_rptr + desc.nbytes; 3638 3639 /* update statistics */ 3640 ehp = (struct ether_header *)mp->b_rptr; 3641 if (IS_BROADCAST(ehp)) 3642 ldcp->ldc_stats.brdcstrcv++; 3643 else if (IS_MULTICAST(ehp)) 3644 ldcp->ldc_stats.multircv++; 3645 3646 ldcp->ldc_stats.ipackets++; 3647 ldcp->ldc_stats.rbytes += desc.nbytes; 3648 3649 /* 3650 * IPALIGN space can be used for VLAN_TAG 3651 */ 3652 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3653 VSW_VNETPORT, mp); 3654 3655 /* build a chain of received packets */ 3656 if (bp == NULL) { 3657 /* first pkt */ 3658 bp = mp; 3659 bp->b_next = bp->b_prev = NULL; 3660 bpt = bp; 3661 chain = 1; 3662 } else { 3663 mp->b_next = mp->b_prev = NULL; 3664 bpt->b_next = mp; 3665 bpt = mp; 3666 chain++; 3667 } 3668 3669 vsw_process_desc_done: 3670 /* mark we are finished with this descriptor */ 3671 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3672 dp->dring_mtype, dp->handle, pos, pos, 3673 VIO_DESC_DONE)) != 0) { 3674 DERR(vswp, "%s(%lld): unable to update " 3675 "dstate at pos %d: err %d", 3676 __func__, pos, ldcp->ldc_id, rng_rv); 3677 ldcp->ldc_stats.ierrors++; 3678 break; 3679 } 3680 3681 /* 3682 * Send an ACK back to peer if requested. 3683 */ 3684 if (desc.hdr.ack) { 3685 dring_pkt->start_idx = range_start; 3686 dring_pkt->end_idx = range_end; 3687 3688 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3689 " requested", __func__, ldcp->ldc_id, 3690 dring_pkt->start_idx, dring_pkt->end_idx); 3691 3692 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3693 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3694 dring_pkt->tag.vio_sid = ldcp->local_session; 3695 3696 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3697 sizeof (vio_dring_msg_t), B_FALSE); 3698 3699 /* 3700 * Check if ACK was successfully sent. If not 3701 * we break and deal with that below. 3702 */ 3703 if (msg_rv != 0) 3704 break; 3705 3706 prev_desc_ack = B_TRUE; 3707 range_start = pos; 3708 } 3709 3710 /* next descriptor */ 3711 pos = (pos + 1) % len; 3712 cnt++; 3713 3714 /* 3715 * Break out of loop here and stop processing to 3716 * allow some other network device (or disk) to 3717 * get access to the cpu. 3718 */ 3719 if (chain > vsw_chain_len) { 3720 D3(vswp, "%s(%lld): switching chain of %d " 3721 "msgs", __func__, ldcp->ldc_id, chain); 3722 break; 3723 } 3724 } 3725 RW_EXIT(&ldcp->lane_in.dlistrw); 3726 3727 /* send the chain of packets to be switched */ 3728 if (bp != NULL) { 3729 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3730 D3(vswp, "%s(%lld): switching chain of %d msgs", 3731 __func__, ldcp->ldc_id, chain); 3732 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3733 ldcp->ldc_port, NULL); 3734 } 3735 3736 /* 3737 * If when we encountered an error when attempting to 3738 * access an imported dring, initiate a connection reset. 3739 */ 3740 if (rng_rv != 0) { 3741 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3742 break; 3743 } 3744 3745 /* 3746 * If when we attempted to send the ACK we found that the 3747 * channel had been reset then now handle this. We deal with 3748 * it here as we cannot reset the channel while holding the 3749 * dlistrw lock, and we don't want to acquire/release it 3750 * continuously in the above loop, as a channel reset should 3751 * be a rare event. 3752 */ 3753 if (msg_rv == ECONNRESET) { 3754 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3755 break; 3756 } 3757 3758 DTRACE_PROBE1(msg_cnt, int, cnt); 3759 3760 /* 3761 * We are now finished so ACK back with the state 3762 * set to STOPPING so our peer knows we are finished 3763 */ 3764 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3765 dring_pkt->tag.vio_sid = ldcp->local_session; 3766 3767 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3768 3769 DTRACE_PROBE(stop_process_sent); 3770 3771 /* 3772 * We have not processed any more descriptors beyond 3773 * the last one we ACK'd. 3774 */ 3775 if (prev_desc_ack) 3776 range_start = range_end; 3777 3778 dring_pkt->start_idx = range_start; 3779 dring_pkt->end_idx = range_end; 3780 3781 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3782 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3783 dring_pkt->end_idx); 3784 3785 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3786 sizeof (vio_dring_msg_t), B_TRUE); 3787 break; 3788 3789 case VIO_SUBTYPE_ACK: 3790 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3791 /* 3792 * Verify that the relevant descriptors are all 3793 * marked as DONE 3794 */ 3795 READ_ENTER(&ldcp->lane_out.dlistrw); 3796 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3797 dring_pkt->dring_ident)) == NULL) { 3798 RW_EXIT(&ldcp->lane_out.dlistrw); 3799 DERR(vswp, "%s: unknown ident in ACK", __func__); 3800 return; 3801 } 3802 3803 start = end = 0; 3804 start = dring_pkt->start_idx; 3805 end = dring_pkt->end_idx; 3806 len = dp->num_descriptors; 3807 3808 3809 mutex_enter(&dp->dlock); 3810 dp->last_ack_recv = end; 3811 ldcp->ldc_stats.dring_data_acks++; 3812 mutex_exit(&dp->dlock); 3813 3814 (void) vsw_reclaim_dring(dp, start); 3815 3816 /* 3817 * If our peer is stopping processing descriptors then 3818 * we check to make sure it has processed all the descriptors 3819 * we have updated. If not then we send it a new message 3820 * to prompt it to restart. 3821 */ 3822 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3823 DTRACE_PROBE(stop_process_recv); 3824 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3825 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3826 dring_pkt->end_idx); 3827 3828 /* 3829 * Check next descriptor in public section of ring. 3830 * If its marked as READY then we need to prompt our 3831 * peer to start processing the ring again. 3832 */ 3833 i = (end + 1) % len; 3834 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3835 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3836 3837 /* 3838 * Hold the restart lock across all of this to 3839 * make sure that its not possible for us to 3840 * decide that a msg needs to be sent in the future 3841 * but the sending code having already checked is 3842 * about to exit. 3843 */ 3844 mutex_enter(&dp->restart_lock); 3845 ldcp->ldc_stats.dring_stopped_acks++; 3846 mutex_enter(&priv_addr->dstate_lock); 3847 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3848 3849 mutex_exit(&priv_addr->dstate_lock); 3850 3851 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3852 dring_pkt->tag.vio_sid = ldcp->local_session; 3853 3854 dring_pkt->start_idx = (end + 1) % len; 3855 dring_pkt->end_idx = -1; 3856 3857 D2(vswp, "%s(%lld) : sending restart msg:" 3858 " %d : %d", __func__, ldcp->ldc_id, 3859 dring_pkt->start_idx, dring_pkt->end_idx); 3860 3861 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3862 sizeof (vio_dring_msg_t), B_FALSE); 3863 ldcp->ldc_stats.dring_data_msgs++; 3864 3865 } else { 3866 mutex_exit(&priv_addr->dstate_lock); 3867 dp->restart_reqd = B_TRUE; 3868 } 3869 mutex_exit(&dp->restart_lock); 3870 } 3871 RW_EXIT(&ldcp->lane_out.dlistrw); 3872 3873 /* only do channel reset after dropping dlistrw lock */ 3874 if (msg_rv == ECONNRESET) 3875 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3876 3877 break; 3878 3879 case VIO_SUBTYPE_NACK: 3880 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3881 __func__, ldcp->ldc_id); 3882 /* 3883 * Something is badly wrong if we are getting NACK's 3884 * for our data pkts. So reset the channel. 3885 */ 3886 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3887 3888 break; 3889 3890 default: 3891 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3892 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3893 } 3894 3895 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3896 } 3897 3898 /* 3899 * dummy pkt data handler function for vnet protocol version 1.0 3900 */ 3901 static void 3902 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3903 { 3904 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3905 } 3906 3907 /* 3908 * This function handles raw pkt data messages received over the channel. 3909 * Currently, only priority-eth-type frames are received through this mechanism. 3910 * In this case, the frame(data) is present within the message itself which 3911 * is copied into an mblk before switching it. 3912 */ 3913 static void 3914 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3915 { 3916 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3917 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3918 uint32_t size; 3919 mblk_t *mp; 3920 vsw_t *vswp = ldcp->ldc_vswp; 3921 vgen_stats_t *statsp = &ldcp->ldc_stats; 3922 lane_t *lp = &ldcp->lane_out; 3923 3924 size = msglen - VIO_PKT_DATA_HDRSIZE; 3925 if (size < ETHERMIN || size > lp->mtu) { 3926 (void) atomic_inc_32(&statsp->rx_pri_fail); 3927 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3928 ldcp->ldc_id, size); 3929 return; 3930 } 3931 3932 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3933 if (mp == NULL) { 3934 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3935 if (mp == NULL) { 3936 (void) atomic_inc_32(&statsp->rx_pri_fail); 3937 DWARN(vswp, "%s(%lld) allocb failure, " 3938 "unable to process priority frame\n", __func__, 3939 ldcp->ldc_id); 3940 return; 3941 } 3942 } 3943 3944 /* skip over the extra space for vlan tag */ 3945 mp->b_rptr += VLAN_TAGSZ; 3946 3947 /* copy the frame from the payload of raw data msg into the mblk */ 3948 bcopy(dpkt->data, mp->b_rptr, size); 3949 mp->b_wptr = mp->b_rptr + size; 3950 3951 /* update stats */ 3952 (void) atomic_inc_64(&statsp->rx_pri_packets); 3953 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3954 3955 /* 3956 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3957 */ 3958 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3959 3960 /* switch the frame to destination */ 3961 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3962 } 3963 3964 /* 3965 * Process an in-band descriptor message (most likely from 3966 * OBP). 3967 */ 3968 static void 3969 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3970 { 3971 vnet_ibnd_desc_t *ibnd_desc; 3972 dring_info_t *dp = NULL; 3973 vsw_private_desc_t *priv_addr = NULL; 3974 vsw_t *vswp = ldcp->ldc_vswp; 3975 mblk_t *mp = NULL; 3976 size_t nbytes = 0; 3977 size_t off = 0; 3978 uint64_t idx = 0; 3979 uint32_t num = 1, len, datalen = 0; 3980 uint64_t ncookies = 0; 3981 int i, rv; 3982 int j = 0; 3983 3984 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3985 3986 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3987 3988 switch (ibnd_desc->hdr.tag.vio_subtype) { 3989 case VIO_SUBTYPE_INFO: 3990 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3991 3992 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3993 return; 3994 3995 /* 3996 * Data is padded to align on a 8 byte boundary, 3997 * nbytes is actual data length, i.e. minus that 3998 * padding. 3999 */ 4000 datalen = ibnd_desc->nbytes; 4001 4002 D2(vswp, "%s(%lld): processing inband desc : " 4003 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4004 4005 ncookies = ibnd_desc->ncookies; 4006 4007 /* 4008 * allocb(9F) returns an aligned data block. We 4009 * need to ensure that we ask ldc for an aligned 4010 * number of bytes also. 4011 */ 4012 nbytes = datalen; 4013 if (nbytes & 0x7) { 4014 off = 8 - (nbytes & 0x7); 4015 nbytes += off; 4016 } 4017 4018 /* alloc extra space for VLAN_TAG */ 4019 mp = allocb(datalen + 8, BPRI_MED); 4020 if (mp == NULL) { 4021 DERR(vswp, "%s(%lld): allocb failed", 4022 __func__, ldcp->ldc_id); 4023 ldcp->ldc_stats.rx_allocb_fail++; 4024 return; 4025 } 4026 4027 /* skip over the extra space for VLAN_TAG */ 4028 mp->b_rptr += 8; 4029 4030 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4031 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4032 LDC_COPY_IN); 4033 4034 if (rv != 0) { 4035 DERR(vswp, "%s(%d): unable to copy in data from " 4036 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 4037 freemsg(mp); 4038 ldcp->ldc_stats.ierrors++; 4039 return; 4040 } 4041 4042 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 4043 __func__, ldcp->ldc_id, nbytes, ncookies); 4044 4045 /* point to the actual end of data */ 4046 mp->b_wptr = mp->b_rptr + datalen; 4047 ldcp->ldc_stats.ipackets++; 4048 ldcp->ldc_stats.rbytes += datalen; 4049 4050 /* 4051 * We ACK back every in-band descriptor message we process 4052 */ 4053 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4054 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4055 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 4056 sizeof (vnet_ibnd_desc_t), B_TRUE); 4057 4058 /* 4059 * there is extra space alloc'd for VLAN_TAG 4060 */ 4061 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4062 4063 /* send the packet to be switched */ 4064 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4065 ldcp->ldc_port, NULL); 4066 4067 break; 4068 4069 case VIO_SUBTYPE_ACK: 4070 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4071 4072 /* Verify the ACK is valid */ 4073 idx = ibnd_desc->hdr.desc_handle; 4074 4075 if (idx >= vsw_ntxds) { 4076 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 4077 "(idx %ld)", vswp->instance, idx); 4078 return; 4079 } 4080 4081 if ((dp = ldcp->lane_out.dringp) == NULL) { 4082 DERR(vswp, "%s: no dring found", __func__); 4083 return; 4084 } 4085 4086 len = dp->num_descriptors; 4087 /* 4088 * If the descriptor we are being ACK'ed for is not the 4089 * one we expected, then pkts were lost somwhere, either 4090 * when we tried to send a msg, or a previous ACK msg from 4091 * our peer. In either case we now reclaim the descriptors 4092 * in the range from the last ACK we received up to the 4093 * current ACK. 4094 */ 4095 if (idx != dp->last_ack_recv) { 4096 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 4097 __func__, dp->last_ack_recv, idx); 4098 num = idx >= dp->last_ack_recv ? 4099 idx - dp->last_ack_recv + 1: 4100 (len - dp->last_ack_recv + 1) + idx; 4101 } 4102 4103 /* 4104 * When we sent the in-band message to our peer we 4105 * marked the copy in our private ring as READY. We now 4106 * check that the descriptor we are being ACK'ed for is in 4107 * fact READY, i.e. it is one we have shared with our peer. 4108 * 4109 * If its not we flag an error, but still reset the descr 4110 * back to FREE. 4111 */ 4112 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 4113 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4114 mutex_enter(&priv_addr->dstate_lock); 4115 if (priv_addr->dstate != VIO_DESC_READY) { 4116 DERR(vswp, "%s: (%ld) desc at index %ld not " 4117 "READY (0x%lx)", __func__, 4118 ldcp->ldc_id, idx, priv_addr->dstate); 4119 DERR(vswp, "%s: bound %d: ncookies %ld : " 4120 "datalen %ld", __func__, 4121 priv_addr->bound, priv_addr->ncookies, 4122 priv_addr->datalen); 4123 } 4124 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4125 ldcp->ldc_id, idx); 4126 /* release resources associated with sent msg */ 4127 priv_addr->datalen = 0; 4128 priv_addr->dstate = VIO_DESC_FREE; 4129 mutex_exit(&priv_addr->dstate_lock); 4130 } 4131 /* update to next expected value */ 4132 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 4133 4134 break; 4135 4136 case VIO_SUBTYPE_NACK: 4137 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4138 4139 /* 4140 * We should only get a NACK if our peer doesn't like 4141 * something about a message we have sent it. If this 4142 * happens we just release the resources associated with 4143 * the message. (We are relying on higher layers to decide 4144 * whether or not to resend. 4145 */ 4146 4147 /* limit check */ 4148 idx = ibnd_desc->hdr.desc_handle; 4149 4150 if (idx >= vsw_ntxds) { 4151 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4152 __func__, idx); 4153 return; 4154 } 4155 4156 if ((dp = ldcp->lane_out.dringp) == NULL) { 4157 DERR(vswp, "%s: no dring found", __func__); 4158 return; 4159 } 4160 4161 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4162 4163 /* move to correct location in ring */ 4164 priv_addr += idx; 4165 4166 /* release resources associated with sent msg */ 4167 mutex_enter(&priv_addr->dstate_lock); 4168 priv_addr->datalen = 0; 4169 priv_addr->dstate = VIO_DESC_FREE; 4170 mutex_exit(&priv_addr->dstate_lock); 4171 4172 break; 4173 4174 default: 4175 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4176 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4177 } 4178 4179 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4180 } 4181 4182 static void 4183 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 4184 { 4185 _NOTE(ARGUNUSED(epkt)) 4186 4187 vsw_t *vswp = ldcp->ldc_vswp; 4188 uint16_t env = tagp->vio_subtype_env; 4189 4190 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4191 4192 /* 4193 * Error vio_subtypes have yet to be defined. So for 4194 * the moment we can't do anything. 4195 */ 4196 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4197 4198 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4199 } 4200 4201 /* transmit the packet over the given port */ 4202 int 4203 vsw_portsend(vsw_port_t *port, mblk_t *mp) 4204 { 4205 vsw_ldc_list_t *ldcl = &port->p_ldclist; 4206 vsw_ldc_t *ldcp; 4207 mblk_t *mpt; 4208 int count; 4209 int status = 0; 4210 4211 READ_ENTER(&ldcl->lockrw); 4212 /* 4213 * Note for now, we have a single channel. 4214 */ 4215 ldcp = ldcl->head; 4216 if (ldcp == NULL) { 4217 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 4218 freemsgchain(mp); 4219 RW_EXIT(&ldcl->lockrw); 4220 return (1); 4221 } 4222 4223 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 4224 4225 if (count != 0) { 4226 status = ldcp->tx(ldcp, mp, mpt, count); 4227 } 4228 4229 RW_EXIT(&ldcl->lockrw); 4230 return (status); 4231 } 4232 4233 /* 4234 * Break up frames into 2 seperate chains: normal and 4235 * priority, based on the frame type. The number of 4236 * priority frames is also counted and returned. 4237 * 4238 * Params: 4239 * vswp: pointer to the instance of vsw 4240 * np: head of packet chain to be broken 4241 * npt: tail of packet chain to be broken 4242 * 4243 * Returns: 4244 * np: head of normal data packets 4245 * npt: tail of normal data packets 4246 * hp: head of high priority packets 4247 * hpt: tail of high priority packets 4248 */ 4249 static uint32_t 4250 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4251 mblk_t **hp, mblk_t **hpt) 4252 { 4253 mblk_t *tmp = NULL; 4254 mblk_t *smp = NULL; 4255 mblk_t *hmp = NULL; /* high prio pkts head */ 4256 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4257 mblk_t *nmp = NULL; /* normal pkts head */ 4258 mblk_t *nmpt = NULL; /* normal pkts tail */ 4259 uint32_t count = 0; 4260 int i; 4261 struct ether_header *ehp; 4262 uint32_t num_types; 4263 uint16_t *types; 4264 4265 tmp = *np; 4266 while (tmp != NULL) { 4267 4268 smp = tmp; 4269 tmp = tmp->b_next; 4270 smp->b_next = NULL; 4271 smp->b_prev = NULL; 4272 4273 ehp = (struct ether_header *)smp->b_rptr; 4274 num_types = vswp->pri_num_types; 4275 types = vswp->pri_types; 4276 for (i = 0; i < num_types; i++) { 4277 if (ehp->ether_type == types[i]) { 4278 /* high priority frame */ 4279 4280 if (hmp != NULL) { 4281 hmpt->b_next = smp; 4282 hmpt = smp; 4283 } else { 4284 hmp = hmpt = smp; 4285 } 4286 count++; 4287 break; 4288 } 4289 } 4290 if (i == num_types) { 4291 /* normal data frame */ 4292 4293 if (nmp != NULL) { 4294 nmpt->b_next = smp; 4295 nmpt = smp; 4296 } else { 4297 nmp = nmpt = smp; 4298 } 4299 } 4300 } 4301 4302 *hp = hmp; 4303 *hpt = hmpt; 4304 *np = nmp; 4305 *npt = nmpt; 4306 4307 return (count); 4308 } 4309 4310 /* 4311 * Wrapper function to transmit normal and/or priority frames over the channel. 4312 */ 4313 static int 4314 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4315 { 4316 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4317 mblk_t *tmp; 4318 mblk_t *smp; 4319 mblk_t *hmp; /* high prio pkts head */ 4320 mblk_t *hmpt; /* high prio pkts tail */ 4321 mblk_t *nmp; /* normal pkts head */ 4322 mblk_t *nmpt; /* normal pkts tail */ 4323 uint32_t n = 0; 4324 vsw_t *vswp = ldcp->ldc_vswp; 4325 4326 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4327 ASSERT(count != 0); 4328 4329 nmp = mp; 4330 nmpt = mpt; 4331 4332 /* gather any priority frames from the chain of packets */ 4333 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4334 4335 /* transmit priority frames */ 4336 tmp = hmp; 4337 while (tmp != NULL) { 4338 smp = tmp; 4339 tmp = tmp->b_next; 4340 smp->b_next = NULL; 4341 vsw_ldcsend_pkt(ldcp, smp); 4342 } 4343 4344 count -= n; 4345 4346 if (count == 0) { 4347 /* no normal data frames to process */ 4348 return (0); 4349 } 4350 4351 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4352 } 4353 4354 /* 4355 * Wrapper function to transmit normal frames over the channel. 4356 */ 4357 static int 4358 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4359 { 4360 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4361 mblk_t *tmp = NULL; 4362 4363 ASSERT(count != 0); 4364 /* 4365 * If the TX thread is enabled, then queue the 4366 * ordinary frames and signal the tx thread. 4367 */ 4368 if (ldcp->tx_thread != NULL) { 4369 4370 mutex_enter(&ldcp->tx_thr_lock); 4371 4372 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4373 /* 4374 * If we reached queue limit, 4375 * do not queue new packets, 4376 * drop them. 4377 */ 4378 ldcp->ldc_stats.tx_qfull += count; 4379 mutex_exit(&ldcp->tx_thr_lock); 4380 freemsgchain(mp); 4381 goto exit; 4382 } 4383 if (ldcp->tx_mhead == NULL) { 4384 ldcp->tx_mhead = mp; 4385 ldcp->tx_mtail = mpt; 4386 cv_signal(&ldcp->tx_thr_cv); 4387 } else { 4388 ldcp->tx_mtail->b_next = mp; 4389 ldcp->tx_mtail = mpt; 4390 } 4391 ldcp->tx_cnt += count; 4392 mutex_exit(&ldcp->tx_thr_lock); 4393 } else { 4394 while (mp != NULL) { 4395 tmp = mp->b_next; 4396 mp->b_next = mp->b_prev = NULL; 4397 (void) vsw_ldcsend(ldcp, mp, 1); 4398 mp = tmp; 4399 } 4400 } 4401 4402 exit: 4403 return (0); 4404 } 4405 4406 /* 4407 * This function transmits the frame in the payload of a raw data 4408 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4409 * send special frames with high priorities, without going through 4410 * the normal data path which uses descriptor ring mechanism. 4411 */ 4412 static void 4413 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4414 { 4415 vio_raw_data_msg_t *pkt; 4416 mblk_t *bp; 4417 mblk_t *nmp = NULL; 4418 caddr_t dst; 4419 uint32_t mblksz; 4420 uint32_t size; 4421 uint32_t nbytes; 4422 int rv; 4423 vsw_t *vswp = ldcp->ldc_vswp; 4424 vgen_stats_t *statsp = &ldcp->ldc_stats; 4425 4426 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4427 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4428 (void) atomic_inc_32(&statsp->tx_pri_fail); 4429 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4430 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4431 ldcp->lane_out.lstate); 4432 goto send_pkt_exit; 4433 } 4434 4435 size = msgsize(mp); 4436 4437 /* frame size bigger than available payload len of raw data msg ? */ 4438 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4439 (void) atomic_inc_32(&statsp->tx_pri_fail); 4440 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4441 ldcp->ldc_id, size); 4442 goto send_pkt_exit; 4443 } 4444 4445 if (size < ETHERMIN) 4446 size = ETHERMIN; 4447 4448 /* alloc space for a raw data message */ 4449 nmp = vio_allocb(vswp->pri_tx_vmp); 4450 if (nmp == NULL) { 4451 (void) atomic_inc_32(&statsp->tx_pri_fail); 4452 DWARN(vswp, "vio_allocb failed\n"); 4453 goto send_pkt_exit; 4454 } 4455 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4456 4457 /* copy frame into the payload of raw data message */ 4458 dst = (caddr_t)pkt->data; 4459 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4460 mblksz = MBLKL(bp); 4461 bcopy(bp->b_rptr, dst, mblksz); 4462 dst += mblksz; 4463 } 4464 4465 /* setup the raw data msg */ 4466 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4467 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4468 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4469 pkt->tag.vio_sid = ldcp->local_session; 4470 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4471 4472 /* send the msg over ldc */ 4473 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4474 if (rv != 0) { 4475 (void) atomic_inc_32(&statsp->tx_pri_fail); 4476 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4477 ldcp->ldc_id); 4478 goto send_pkt_exit; 4479 } 4480 4481 /* update stats */ 4482 (void) atomic_inc_64(&statsp->tx_pri_packets); 4483 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4484 4485 send_pkt_exit: 4486 if (nmp != NULL) 4487 freemsg(nmp); 4488 freemsg(mp); 4489 } 4490 4491 /* 4492 * Transmit the packet over the given LDC channel. 4493 * 4494 * The 'retries' argument indicates how many times a packet 4495 * is retried before it is dropped. Note, the retry is done 4496 * only for a resource related failure, for all other failures 4497 * the packet is dropped immediately. 4498 */ 4499 static int 4500 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4501 { 4502 int i; 4503 int rc; 4504 int status = 0; 4505 vsw_port_t *port = ldcp->ldc_port; 4506 dring_info_t *dp = NULL; 4507 4508 4509 for (i = 0; i < retries; ) { 4510 /* 4511 * Send the message out using the appropriate 4512 * transmit function which will free mblock when it 4513 * is finished with it. 4514 */ 4515 mutex_enter(&port->tx_lock); 4516 if (port->transmit != NULL) { 4517 status = (*port->transmit)(ldcp, mp); 4518 } 4519 if (status == LDC_TX_SUCCESS) { 4520 mutex_exit(&port->tx_lock); 4521 break; 4522 } 4523 i++; /* increment the counter here */ 4524 4525 /* If its the last retry, then update the oerror */ 4526 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4527 ldcp->ldc_stats.oerrors++; 4528 } 4529 mutex_exit(&port->tx_lock); 4530 4531 if (status != LDC_TX_NORESOURCES) { 4532 /* 4533 * No retrying required for errors un-related 4534 * to resources. 4535 */ 4536 break; 4537 } 4538 READ_ENTER(&ldcp->lane_out.dlistrw); 4539 if (((dp = ldcp->lane_out.dringp) != NULL) && 4540 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4541 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4542 ((VSW_VER_LT(ldcp, 1, 2) && 4543 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4544 rc = vsw_reclaim_dring(dp, dp->end_idx); 4545 } else { 4546 /* 4547 * If there is no dring or the xfer_mode is 4548 * set to DESC_MODE(ie., OBP), then simply break here. 4549 */ 4550 RW_EXIT(&ldcp->lane_out.dlistrw); 4551 break; 4552 } 4553 RW_EXIT(&ldcp->lane_out.dlistrw); 4554 4555 /* 4556 * Delay only if none were reclaimed 4557 * and its not the last retry. 4558 */ 4559 if ((rc == 0) && (i < retries)) { 4560 delay(drv_usectohz(vsw_ldc_tx_delay)); 4561 } 4562 } 4563 freemsg(mp); 4564 return (status); 4565 } 4566 4567 /* 4568 * Send packet out via descriptor ring to a logical device. 4569 */ 4570 static int 4571 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4572 { 4573 vio_dring_msg_t dring_pkt; 4574 dring_info_t *dp = NULL; 4575 vsw_private_desc_t *priv_desc = NULL; 4576 vnet_public_desc_t *pub = NULL; 4577 vsw_t *vswp = ldcp->ldc_vswp; 4578 mblk_t *bp; 4579 size_t n, size; 4580 caddr_t bufp; 4581 int idx; 4582 int status = LDC_TX_SUCCESS; 4583 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4584 lane_t *lp = &ldcp->lane_out; 4585 4586 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4587 4588 /* TODO: make test a macro */ 4589 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4590 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4591 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4592 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4593 ldcp->lane_out.lstate); 4594 ldcp->ldc_stats.oerrors++; 4595 return (LDC_TX_FAILURE); 4596 } 4597 4598 /* 4599 * Note - using first ring only, this may change 4600 * in the future. 4601 */ 4602 READ_ENTER(&ldcp->lane_out.dlistrw); 4603 if ((dp = ldcp->lane_out.dringp) == NULL) { 4604 RW_EXIT(&ldcp->lane_out.dlistrw); 4605 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4606 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4607 ldcp->ldc_stats.oerrors++; 4608 return (LDC_TX_FAILURE); 4609 } 4610 4611 size = msgsize(mp); 4612 if (size > (size_t)lp->mtu) { 4613 RW_EXIT(&ldcp->lane_out.dlistrw); 4614 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4615 ldcp->ldc_id, size); 4616 ldcp->ldc_stats.oerrors++; 4617 return (LDC_TX_FAILURE); 4618 } 4619 4620 /* 4621 * Find a free descriptor 4622 * 4623 * Note: for the moment we are assuming that we will only 4624 * have one dring going from the switch to each of its 4625 * peers. This may change in the future. 4626 */ 4627 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4628 D2(vswp, "%s(%lld): no descriptor available for ring " 4629 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4630 4631 /* nothing more we can do */ 4632 status = LDC_TX_NORESOURCES; 4633 ldcp->ldc_stats.tx_no_desc++; 4634 goto vsw_dringsend_free_exit; 4635 } else { 4636 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4637 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4638 } 4639 4640 /* copy data into the descriptor */ 4641 bufp = priv_desc->datap; 4642 bufp += VNET_IPALIGN; 4643 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4644 n = MBLKL(bp); 4645 bcopy(bp->b_rptr, bufp, n); 4646 bufp += n; 4647 } 4648 4649 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4650 4651 pub = priv_desc->descp; 4652 pub->nbytes = priv_desc->datalen; 4653 4654 /* update statistics */ 4655 if (IS_BROADCAST(ehp)) 4656 ldcp->ldc_stats.brdcstxmt++; 4657 else if (IS_MULTICAST(ehp)) 4658 ldcp->ldc_stats.multixmt++; 4659 ldcp->ldc_stats.opackets++; 4660 ldcp->ldc_stats.obytes += priv_desc->datalen; 4661 4662 mutex_enter(&priv_desc->dstate_lock); 4663 pub->hdr.dstate = VIO_DESC_READY; 4664 mutex_exit(&priv_desc->dstate_lock); 4665 4666 /* 4667 * Determine whether or not we need to send a message to our 4668 * peer prompting them to read our newly updated descriptor(s). 4669 */ 4670 mutex_enter(&dp->restart_lock); 4671 if (dp->restart_reqd) { 4672 dp->restart_reqd = B_FALSE; 4673 ldcp->ldc_stats.dring_data_msgs++; 4674 mutex_exit(&dp->restart_lock); 4675 4676 /* 4677 * Send a vio_dring_msg to peer to prompt them to read 4678 * the updated descriptor ring. 4679 */ 4680 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4681 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4682 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4683 dring_pkt.tag.vio_sid = ldcp->local_session; 4684 4685 /* Note - for now using first ring */ 4686 dring_pkt.dring_ident = dp->ident; 4687 4688 /* 4689 * If last_ack_recv is -1 then we know we've not 4690 * received any ack's yet, so this must be the first 4691 * msg sent, so set the start to the begining of the ring. 4692 */ 4693 mutex_enter(&dp->dlock); 4694 if (dp->last_ack_recv == -1) { 4695 dring_pkt.start_idx = 0; 4696 } else { 4697 dring_pkt.start_idx = 4698 (dp->last_ack_recv + 1) % dp->num_descriptors; 4699 } 4700 dring_pkt.end_idx = -1; 4701 mutex_exit(&dp->dlock); 4702 4703 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4704 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4705 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4706 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4707 dring_pkt.end_idx); 4708 4709 RW_EXIT(&ldcp->lane_out.dlistrw); 4710 4711 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4712 sizeof (vio_dring_msg_t), B_TRUE); 4713 4714 return (status); 4715 4716 } else { 4717 mutex_exit(&dp->restart_lock); 4718 D2(vswp, "%s(%lld): updating descp %d", __func__, 4719 ldcp->ldc_id, idx); 4720 } 4721 4722 vsw_dringsend_free_exit: 4723 4724 RW_EXIT(&ldcp->lane_out.dlistrw); 4725 4726 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4727 return (status); 4728 } 4729 4730 /* 4731 * Send an in-band descriptor message over ldc. 4732 */ 4733 static int 4734 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4735 { 4736 vsw_t *vswp = ldcp->ldc_vswp; 4737 vnet_ibnd_desc_t ibnd_msg; 4738 vsw_private_desc_t *priv_desc = NULL; 4739 dring_info_t *dp = NULL; 4740 size_t n, size = 0; 4741 caddr_t bufp; 4742 mblk_t *bp; 4743 int idx, i; 4744 int status = LDC_TX_SUCCESS; 4745 static int warn_msg = 1; 4746 lane_t *lp = &ldcp->lane_out; 4747 4748 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4749 4750 ASSERT(mp != NULL); 4751 4752 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4753 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4754 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4755 __func__, ldcp->ldc_id, ldcp->ldc_status, 4756 ldcp->lane_out.lstate); 4757 ldcp->ldc_stats.oerrors++; 4758 return (LDC_TX_FAILURE); 4759 } 4760 4761 /* 4762 * only expect single dring to exist, which we use 4763 * as an internal buffer, rather than a transfer channel. 4764 */ 4765 READ_ENTER(&ldcp->lane_out.dlistrw); 4766 if ((dp = ldcp->lane_out.dringp) == NULL) { 4767 DERR(vswp, "%s(%lld): no dring for outbound lane", 4768 __func__, ldcp->ldc_id); 4769 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4770 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4771 RW_EXIT(&ldcp->lane_out.dlistrw); 4772 ldcp->ldc_stats.oerrors++; 4773 return (LDC_TX_FAILURE); 4774 } 4775 4776 size = msgsize(mp); 4777 if (size > (size_t)lp->mtu) { 4778 RW_EXIT(&ldcp->lane_out.dlistrw); 4779 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4780 ldcp->ldc_id, size); 4781 ldcp->ldc_stats.oerrors++; 4782 return (LDC_TX_FAILURE); 4783 } 4784 4785 /* 4786 * Find a free descriptor in our buffer ring 4787 */ 4788 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4789 RW_EXIT(&ldcp->lane_out.dlistrw); 4790 if (warn_msg) { 4791 DERR(vswp, "%s(%lld): no descriptor available for ring " 4792 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4793 warn_msg = 0; 4794 } 4795 4796 /* nothing more we can do */ 4797 status = LDC_TX_NORESOURCES; 4798 goto vsw_descrsend_free_exit; 4799 } else { 4800 D2(vswp, "%s(%lld): free private descriptor found at pos " 4801 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4802 warn_msg = 1; 4803 } 4804 4805 /* copy data into the descriptor */ 4806 bufp = priv_desc->datap; 4807 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4808 n = MBLKL(bp); 4809 bcopy(bp->b_rptr, bufp, n); 4810 bufp += n; 4811 } 4812 4813 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4814 4815 /* create and send the in-band descp msg */ 4816 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4817 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4818 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4819 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4820 4821 /* 4822 * Copy the mem cookies describing the data from the 4823 * private region of the descriptor ring into the inband 4824 * descriptor. 4825 */ 4826 for (i = 0; i < priv_desc->ncookies; i++) { 4827 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4828 sizeof (ldc_mem_cookie_t)); 4829 } 4830 4831 ibnd_msg.hdr.desc_handle = idx; 4832 ibnd_msg.ncookies = priv_desc->ncookies; 4833 ibnd_msg.nbytes = size; 4834 4835 ldcp->ldc_stats.opackets++; 4836 ldcp->ldc_stats.obytes += size; 4837 4838 RW_EXIT(&ldcp->lane_out.dlistrw); 4839 4840 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4841 sizeof (vnet_ibnd_desc_t), B_TRUE); 4842 4843 vsw_descrsend_free_exit: 4844 4845 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4846 return (status); 4847 } 4848 4849 static void 4850 vsw_send_ver(void *arg) 4851 { 4852 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4853 vsw_t *vswp = ldcp->ldc_vswp; 4854 lane_t *lp = &ldcp->lane_out; 4855 vio_ver_msg_t ver_msg; 4856 4857 D1(vswp, "%s enter", __func__); 4858 4859 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4860 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4861 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4862 ver_msg.tag.vio_sid = ldcp->local_session; 4863 4864 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4865 ver_msg.ver_major = vsw_versions[0].ver_major; 4866 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4867 } else { 4868 /* use the major,minor that we've ack'd */ 4869 lane_t *lpi = &ldcp->lane_in; 4870 ver_msg.ver_major = lpi->ver_major; 4871 ver_msg.ver_minor = lpi->ver_minor; 4872 } 4873 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4874 4875 lp->lstate |= VSW_VER_INFO_SENT; 4876 lp->ver_major = ver_msg.ver_major; 4877 lp->ver_minor = ver_msg.ver_minor; 4878 4879 DUMP_TAG(ver_msg.tag); 4880 4881 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4882 4883 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4884 } 4885 4886 static void 4887 vsw_send_attr(vsw_ldc_t *ldcp) 4888 { 4889 vsw_t *vswp = ldcp->ldc_vswp; 4890 lane_t *lp = &ldcp->lane_out; 4891 vnet_attr_msg_t attr_msg; 4892 4893 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4894 4895 /* 4896 * Subtype is set to INFO by default 4897 */ 4898 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4899 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4900 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4901 attr_msg.tag.vio_sid = ldcp->local_session; 4902 4903 /* payload copied from default settings for lane */ 4904 attr_msg.mtu = lp->mtu; 4905 attr_msg.addr_type = lp->addr_type; 4906 attr_msg.xfer_mode = lp->xfer_mode; 4907 attr_msg.ack_freq = lp->xfer_mode; 4908 4909 READ_ENTER(&vswp->if_lockrw); 4910 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4911 RW_EXIT(&vswp->if_lockrw); 4912 4913 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4914 4915 DUMP_TAG(attr_msg.tag); 4916 4917 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4918 4919 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4920 } 4921 4922 /* 4923 * Create dring info msg (which also results in the creation of 4924 * a dring). 4925 */ 4926 static vio_dring_reg_msg_t * 4927 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4928 { 4929 vio_dring_reg_msg_t *mp; 4930 dring_info_t *dp; 4931 vsw_t *vswp = ldcp->ldc_vswp; 4932 int rv; 4933 4934 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4935 4936 /* 4937 * If we can't create a dring, obviously no point sending 4938 * a message. 4939 */ 4940 if ((dp = vsw_create_dring(ldcp)) == NULL) 4941 return (NULL); 4942 4943 /* Allocate pools of receive mblks */ 4944 rv = vsw_init_multipools(ldcp, vswp); 4945 if (rv) { 4946 DWARN(vswp, "%s: unable to create free mblk pools for" 4947 " channel %ld (rv %d)", __func__, ldcp->ldc_id, rv); 4948 vsw_free_lane_resources(ldcp, OUTBOUND); 4949 return (NULL); 4950 } 4951 4952 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4953 4954 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4955 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4956 mp->tag.vio_subtype_env = VIO_DRING_REG; 4957 mp->tag.vio_sid = ldcp->local_session; 4958 4959 /* payload */ 4960 mp->num_descriptors = dp->num_descriptors; 4961 mp->descriptor_size = dp->descriptor_size; 4962 mp->options = dp->options; 4963 mp->ncookies = dp->ncookies; 4964 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4965 4966 mp->dring_ident = 0; 4967 4968 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4969 4970 return (mp); 4971 } 4972 4973 static void 4974 vsw_send_dring_info(vsw_ldc_t *ldcp) 4975 { 4976 vio_dring_reg_msg_t *dring_msg; 4977 vsw_t *vswp = ldcp->ldc_vswp; 4978 4979 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4980 4981 dring_msg = vsw_create_dring_info_pkt(ldcp); 4982 if (dring_msg == NULL) { 4983 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4984 vswp->instance, __func__); 4985 return; 4986 } 4987 4988 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4989 4990 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 4991 4992 (void) vsw_send_msg(ldcp, dring_msg, 4993 sizeof (vio_dring_reg_msg_t), B_TRUE); 4994 4995 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 4996 4997 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 4998 } 4999 5000 static void 5001 vsw_send_rdx(vsw_ldc_t *ldcp) 5002 { 5003 vsw_t *vswp = ldcp->ldc_vswp; 5004 vio_rdx_msg_t rdx_msg; 5005 5006 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5007 5008 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5009 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5010 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5011 rdx_msg.tag.vio_sid = ldcp->local_session; 5012 5013 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 5014 5015 DUMP_TAG(rdx_msg.tag); 5016 5017 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 5018 5019 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5020 } 5021 5022 /* 5023 * Generic routine to send message out over ldc channel. 5024 * 5025 * It is possible that when we attempt to write over the ldc channel 5026 * that we get notified that it has been reset. Depending on the value 5027 * of the handle_reset flag we either handle that event here or simply 5028 * notify the caller that the channel was reset. 5029 */ 5030 int 5031 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 5032 { 5033 int rv; 5034 size_t msglen = size; 5035 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5036 vsw_t *vswp = ldcp->ldc_vswp; 5037 vio_dring_msg_t *dmsg; 5038 vio_raw_data_msg_t *rmsg; 5039 vnet_ibnd_desc_t *imsg; 5040 boolean_t data_msg = B_FALSE; 5041 5042 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5043 ldcp->ldc_id, size); 5044 5045 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5046 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5047 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5048 5049 mutex_enter(&ldcp->ldc_txlock); 5050 5051 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 5052 if (tag->vio_subtype_env == VIO_DRING_DATA) { 5053 dmsg = (vio_dring_msg_t *)tag; 5054 dmsg->seq_num = ldcp->lane_out.seq_num; 5055 data_msg = B_TRUE; 5056 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 5057 rmsg = (vio_raw_data_msg_t *)tag; 5058 rmsg->seq_num = ldcp->lane_out.seq_num; 5059 data_msg = B_TRUE; 5060 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 5061 imsg = (vnet_ibnd_desc_t *)tag; 5062 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 5063 data_msg = B_TRUE; 5064 } 5065 } 5066 5067 do { 5068 msglen = size; 5069 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5070 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5071 5072 if (rv == 0 && data_msg == B_TRUE) { 5073 ldcp->lane_out.seq_num++; 5074 } 5075 5076 if ((rv != 0) || (msglen != size)) { 5077 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 5078 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 5079 ldcp->ldc_stats.oerrors++; 5080 } 5081 5082 mutex_exit(&ldcp->ldc_txlock); 5083 5084 /* 5085 * If channel has been reset we either handle it here or 5086 * simply report back that it has been reset and let caller 5087 * decide what to do. 5088 */ 5089 if (rv == ECONNRESET) { 5090 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 5091 5092 /* 5093 * N.B - must never be holding the dlistrw lock when 5094 * we do a reset of the channel. 5095 */ 5096 if (handle_reset) { 5097 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 5098 } 5099 } 5100 5101 return (rv); 5102 } 5103 5104 /* 5105 * Remove the specified address from the list of address maintained 5106 * in this port node. 5107 */ 5108 mcst_addr_t * 5109 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 5110 { 5111 vsw_t *vswp = NULL; 5112 vsw_port_t *port = NULL; 5113 mcst_addr_t *prev_p = NULL; 5114 mcst_addr_t *curr_p = NULL; 5115 5116 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 5117 __func__, devtype, addr); 5118 5119 if (devtype == VSW_VNETPORT) { 5120 port = (vsw_port_t *)arg; 5121 mutex_enter(&port->mca_lock); 5122 prev_p = curr_p = port->mcap; 5123 } else { 5124 vswp = (vsw_t *)arg; 5125 mutex_enter(&vswp->mca_lock); 5126 prev_p = curr_p = vswp->mcap; 5127 } 5128 5129 while (curr_p != NULL) { 5130 if (curr_p->addr == addr) { 5131 D2(NULL, "%s: address found", __func__); 5132 /* match found */ 5133 if (prev_p == curr_p) { 5134 /* list head */ 5135 if (devtype == VSW_VNETPORT) 5136 port->mcap = curr_p->nextp; 5137 else 5138 vswp->mcap = curr_p->nextp; 5139 } else { 5140 prev_p->nextp = curr_p->nextp; 5141 } 5142 break; 5143 } else { 5144 prev_p = curr_p; 5145 curr_p = curr_p->nextp; 5146 } 5147 } 5148 5149 if (devtype == VSW_VNETPORT) 5150 mutex_exit(&port->mca_lock); 5151 else 5152 mutex_exit(&vswp->mca_lock); 5153 5154 D1(NULL, "%s: exit", __func__); 5155 5156 return (curr_p); 5157 } 5158 5159 /* 5160 * Creates a descriptor ring (dring) and links it into the 5161 * link of outbound drings for this channel. 5162 * 5163 * Returns NULL if creation failed. 5164 */ 5165 static dring_info_t * 5166 vsw_create_dring(vsw_ldc_t *ldcp) 5167 { 5168 vsw_private_desc_t *priv_addr = NULL; 5169 vsw_t *vswp = ldcp->ldc_vswp; 5170 ldc_mem_info_t minfo; 5171 dring_info_t *dp, *tp; 5172 int i; 5173 5174 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5175 5176 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5177 5178 /* create public section of ring */ 5179 if ((ldc_mem_dring_create(vsw_ntxds, 5180 VSW_PUB_SIZE, &dp->handle)) != 0) { 5181 5182 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 5183 "failed", ldcp->ldc_id); 5184 goto create_fail_exit; 5185 } 5186 5187 ASSERT(dp->handle != NULL); 5188 5189 /* 5190 * Get the base address of the public section of the ring. 5191 */ 5192 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5193 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 5194 ldcp->ldc_id); 5195 goto dring_fail_exit; 5196 } else { 5197 ASSERT(minfo.vaddr != 0); 5198 dp->pub_addr = minfo.vaddr; 5199 } 5200 5201 dp->num_descriptors = vsw_ntxds; 5202 dp->descriptor_size = VSW_PUB_SIZE; 5203 dp->options = VIO_TX_DRING; 5204 dp->ncookies = 1; /* guaranteed by ldc */ 5205 5206 /* 5207 * create private portion of ring 5208 */ 5209 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 5210 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5211 5212 if (vsw_setup_ring(ldcp, dp)) { 5213 DERR(vswp, "%s: unable to setup ring", __func__); 5214 goto dring_fail_exit; 5215 } 5216 5217 /* haven't used any descriptors yet */ 5218 dp->end_idx = 0; 5219 dp->last_ack_recv = -1; 5220 5221 /* bind dring to the channel */ 5222 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 5223 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 5224 &dp->cookie[0], &dp->ncookies)) != 0) { 5225 DERR(vswp, "vsw_create_dring: unable to bind to channel " 5226 "%lld", ldcp->ldc_id); 5227 goto dring_fail_exit; 5228 } 5229 5230 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5231 dp->restart_reqd = B_TRUE; 5232 5233 /* 5234 * Only ever create rings for outgoing lane. Link it onto 5235 * end of list. 5236 */ 5237 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5238 if (ldcp->lane_out.dringp == NULL) { 5239 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5240 ldcp->lane_out.dringp = dp; 5241 } else { 5242 tp = ldcp->lane_out.dringp; 5243 while (tp->next != NULL) 5244 tp = tp->next; 5245 5246 tp->next = dp; 5247 } 5248 RW_EXIT(&ldcp->lane_out.dlistrw); 5249 5250 return (dp); 5251 5252 dring_fail_exit: 5253 (void) ldc_mem_dring_destroy(dp->handle); 5254 5255 create_fail_exit: 5256 if (dp->priv_addr != NULL) { 5257 priv_addr = dp->priv_addr; 5258 for (i = 0; i < vsw_ntxds; i++) { 5259 if (priv_addr->memhandle != NULL) 5260 (void) ldc_mem_free_handle( 5261 priv_addr->memhandle); 5262 priv_addr++; 5263 } 5264 kmem_free(dp->priv_addr, 5265 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5266 } 5267 mutex_destroy(&dp->dlock); 5268 5269 kmem_free(dp, sizeof (dring_info_t)); 5270 return (NULL); 5271 } 5272 5273 /* 5274 * Create a ring consisting of just a private portion and link 5275 * it into the list of rings for the outbound lane. 5276 * 5277 * These type of rings are used primarily for temporary data 5278 * storage (i.e. as data buffers). 5279 */ 5280 void 5281 vsw_create_privring(vsw_ldc_t *ldcp) 5282 { 5283 dring_info_t *dp, *tp; 5284 vsw_t *vswp = ldcp->ldc_vswp; 5285 5286 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5287 5288 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5289 5290 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5291 5292 /* no public section */ 5293 dp->pub_addr = NULL; 5294 5295 dp->priv_addr = kmem_zalloc( 5296 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5297 5298 dp->num_descriptors = vsw_ntxds; 5299 5300 if (vsw_setup_ring(ldcp, dp)) { 5301 DERR(vswp, "%s: setup of ring failed", __func__); 5302 kmem_free(dp->priv_addr, 5303 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5304 mutex_destroy(&dp->dlock); 5305 kmem_free(dp, sizeof (dring_info_t)); 5306 return; 5307 } 5308 5309 /* haven't used any descriptors yet */ 5310 dp->end_idx = 0; 5311 5312 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5313 dp->restart_reqd = B_TRUE; 5314 5315 /* 5316 * Only ever create rings for outgoing lane. Link it onto 5317 * end of list. 5318 */ 5319 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5320 if (ldcp->lane_out.dringp == NULL) { 5321 D2(vswp, "%s: adding first outbound privring", __func__); 5322 ldcp->lane_out.dringp = dp; 5323 } else { 5324 tp = ldcp->lane_out.dringp; 5325 while (tp->next != NULL) 5326 tp = tp->next; 5327 5328 tp->next = dp; 5329 } 5330 RW_EXIT(&ldcp->lane_out.dlistrw); 5331 5332 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5333 } 5334 5335 /* 5336 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5337 * failure. 5338 */ 5339 int 5340 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5341 { 5342 vnet_public_desc_t *pub_addr = NULL; 5343 vsw_private_desc_t *priv_addr = NULL; 5344 vsw_t *vswp = ldcp->ldc_vswp; 5345 uint64_t *tmpp; 5346 uint64_t offset = 0; 5347 uint32_t ncookies = 0; 5348 static char *name = "vsw_setup_ring"; 5349 int i, j, nc, rv; 5350 size_t data_sz; 5351 void *data_addr; 5352 5353 priv_addr = dp->priv_addr; 5354 pub_addr = dp->pub_addr; 5355 5356 /* public section may be null but private should never be */ 5357 ASSERT(priv_addr != NULL); 5358 5359 /* 5360 * Allocate the region of memory which will be used to hold 5361 * the data the descriptors will refer to. 5362 */ 5363 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5364 5365 /* 5366 * In order to ensure that the number of ldc cookies per descriptor is 5367 * limited to be within the default MAX_COOKIES (2), we take the steps 5368 * outlined below: 5369 * 5370 * Align the entire data buffer area to 8K and carve out per descriptor 5371 * data buffers starting from this 8K aligned base address. 5372 * 5373 * We round up the mtu specified to be a multiple of 2K or 4K. 5374 * For sizes up to 12K we round up the size to the next 2K. 5375 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 5376 * 14K could end up needing 3 cookies, with the buffer spread across 5377 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 5378 */ 5379 if (data_sz <= VNET_12K) { 5380 data_sz = VNET_ROUNDUP_2K(data_sz); 5381 } else { 5382 data_sz = VNET_ROUNDUP_4K(data_sz); 5383 } 5384 5385 dp->desc_data_sz = data_sz; 5386 5387 /* allocate extra 8K bytes for alignment */ 5388 dp->data_sz = (vsw_ntxds * data_sz) + VNET_8K; 5389 data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5390 dp->data_addr = data_addr; 5391 5392 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5393 dp->data_sz, dp->data_addr); 5394 5395 /* align the starting address of the data area to 8K */ 5396 data_addr = (void *)VNET_ROUNDUP_8K((uintptr_t)data_addr); 5397 5398 tmpp = (uint64_t *)data_addr; 5399 offset = dp->desc_data_sz/sizeof (tmpp); 5400 5401 /* 5402 * Initialise some of the private and public (if they exist) 5403 * descriptor fields. 5404 */ 5405 for (i = 0; i < vsw_ntxds; i++) { 5406 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5407 5408 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5409 &priv_addr->memhandle)) != 0) { 5410 DERR(vswp, "%s: alloc mem handle failed", name); 5411 goto setup_ring_cleanup; 5412 } 5413 5414 priv_addr->datap = (void *)tmpp; 5415 5416 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5417 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5418 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5419 &(priv_addr->memcookie[0]), &ncookies); 5420 if (rv != 0) { 5421 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5422 "(rv %d)", name, ldcp->ldc_id, rv); 5423 goto setup_ring_cleanup; 5424 } 5425 priv_addr->bound = 1; 5426 5427 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5428 name, i, priv_addr->memcookie[0].addr, 5429 priv_addr->memcookie[0].size); 5430 5431 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5432 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5433 "invalid num of cookies (%d) for size 0x%llx", 5434 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5435 5436 goto setup_ring_cleanup; 5437 } else { 5438 for (j = 1; j < ncookies; j++) { 5439 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5440 &(priv_addr->memcookie[j])); 5441 if (rv != 0) { 5442 DERR(vswp, "%s: ldc_mem_nextcookie " 5443 "failed rv (%d)", name, rv); 5444 goto setup_ring_cleanup; 5445 } 5446 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5447 "size 0x%llx", name, j, 5448 priv_addr->memcookie[j].addr, 5449 priv_addr->memcookie[j].size); 5450 } 5451 5452 } 5453 priv_addr->ncookies = ncookies; 5454 priv_addr->dstate = VIO_DESC_FREE; 5455 5456 if (pub_addr != NULL) { 5457 5458 /* link pub and private sides */ 5459 priv_addr->descp = pub_addr; 5460 5461 pub_addr->ncookies = priv_addr->ncookies; 5462 5463 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5464 bcopy(&priv_addr->memcookie[nc], 5465 &pub_addr->memcookie[nc], 5466 sizeof (ldc_mem_cookie_t)); 5467 } 5468 5469 pub_addr->hdr.dstate = VIO_DESC_FREE; 5470 pub_addr++; 5471 } 5472 5473 /* 5474 * move to next element in the dring and the next 5475 * position in the data buffer. 5476 */ 5477 priv_addr++; 5478 tmpp += offset; 5479 } 5480 5481 return (0); 5482 5483 setup_ring_cleanup: 5484 priv_addr = dp->priv_addr; 5485 5486 for (j = 0; j < i; j++) { 5487 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5488 (void) ldc_mem_free_handle(priv_addr->memhandle); 5489 5490 mutex_destroy(&priv_addr->dstate_lock); 5491 5492 priv_addr++; 5493 } 5494 kmem_free(dp->data_addr, dp->data_sz); 5495 5496 return (1); 5497 } 5498 5499 /* 5500 * Searches the private section of a ring for a free descriptor, 5501 * starting at the location of the last free descriptor found 5502 * previously. 5503 * 5504 * Returns 0 if free descriptor is available, and updates state 5505 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5506 * 5507 * FUTURE: might need to return contiguous range of descriptors 5508 * as dring info msg assumes all will be contiguous. 5509 */ 5510 static int 5511 vsw_dring_find_free_desc(dring_info_t *dringp, 5512 vsw_private_desc_t **priv_p, int *idx) 5513 { 5514 vsw_private_desc_t *addr = NULL; 5515 int num = vsw_ntxds; 5516 int ret = 1; 5517 5518 D1(NULL, "%s enter\n", __func__); 5519 5520 ASSERT(dringp->priv_addr != NULL); 5521 5522 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5523 __func__, dringp, dringp->end_idx); 5524 5525 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5526 5527 mutex_enter(&addr->dstate_lock); 5528 if (addr->dstate == VIO_DESC_FREE) { 5529 addr->dstate = VIO_DESC_READY; 5530 *priv_p = addr; 5531 *idx = dringp->end_idx; 5532 dringp->end_idx = (dringp->end_idx + 1) % num; 5533 ret = 0; 5534 5535 } 5536 mutex_exit(&addr->dstate_lock); 5537 5538 /* ring full */ 5539 if (ret == 1) { 5540 D2(NULL, "%s: no desp free: started at %d", __func__, 5541 dringp->end_idx); 5542 } 5543 5544 D1(NULL, "%s: exit\n", __func__); 5545 5546 return (ret); 5547 } 5548 5549 /* 5550 * Map from a dring identifier to the ring itself. Returns 5551 * pointer to ring or NULL if no match found. 5552 * 5553 * Should be called with dlistrw rwlock held as reader. 5554 */ 5555 static dring_info_t * 5556 vsw_ident2dring(lane_t *lane, uint64_t ident) 5557 { 5558 dring_info_t *dp = NULL; 5559 5560 if ((dp = lane->dringp) == NULL) { 5561 return (NULL); 5562 } else { 5563 if (dp->ident == ident) 5564 return (dp); 5565 5566 while (dp != NULL) { 5567 if (dp->ident == ident) 5568 break; 5569 dp = dp->next; 5570 } 5571 } 5572 5573 return (dp); 5574 } 5575 5576 /* 5577 * Set the default lane attributes. These are copied into 5578 * the attr msg we send to our peer. If they are not acceptable 5579 * then (currently) the handshake ends. 5580 */ 5581 static void 5582 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5583 { 5584 bzero(lp, sizeof (lane_t)); 5585 5586 READ_ENTER(&vswp->if_lockrw); 5587 ether_copy(&(vswp->if_addr), &(lp->addr)); 5588 RW_EXIT(&vswp->if_lockrw); 5589 5590 lp->mtu = vswp->max_frame_size; 5591 lp->addr_type = ADDR_TYPE_MAC; 5592 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5593 lp->ack_freq = 0; /* for shared mode */ 5594 lp->seq_num = VNET_ISS; 5595 } 5596 5597 /* 5598 * Verify that the attributes are acceptable. 5599 * 5600 * FUTURE: If some attributes are not acceptable, change them 5601 * our desired values. 5602 */ 5603 static int 5604 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5605 { 5606 int ret = 0; 5607 struct ether_addr ea; 5608 vsw_port_t *port = ldcp->ldc_port; 5609 lane_t *lp = &ldcp->lane_out; 5610 5611 D1(NULL, "vsw_check_attr enter\n"); 5612 5613 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5614 (pkt->xfer_mode != lp->xfer_mode)) { 5615 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5616 ret = 1; 5617 } 5618 5619 /* Only support MAC addresses at moment. */ 5620 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5621 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5622 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5623 ret = 1; 5624 } 5625 5626 /* 5627 * MAC address supplied by device should match that stored 5628 * in the vsw-port OBP node. Need to decide what to do if they 5629 * don't match, for the moment just warn but don't fail. 5630 */ 5631 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5632 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5633 DERR(NULL, "vsw_check_attr: device supplied address " 5634 "0x%llx doesn't match node address 0x%llx\n", 5635 pkt->addr, port->p_macaddr); 5636 } 5637 5638 /* 5639 * Ack freq only makes sense in pkt mode, in shared 5640 * mode the ring descriptors say whether or not to 5641 * send back an ACK. 5642 */ 5643 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5644 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5645 (VSW_VER_LT(ldcp, 1, 2) && 5646 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5647 if (pkt->ack_freq > 0) { 5648 D2(NULL, "vsw_check_attr: non zero ack freq " 5649 " in SHM mode\n"); 5650 ret = 1; 5651 } 5652 } 5653 5654 if (VSW_VER_LT(ldcp, 1, 4)) { 5655 /* versions < 1.4, mtu must match */ 5656 if (pkt->mtu != lp->mtu) { 5657 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5658 pkt->mtu); 5659 ret = 1; 5660 } 5661 } else { 5662 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5663 if (pkt->mtu < ETHERMAX) { 5664 ret = 1; 5665 } 5666 } 5667 5668 D1(NULL, "vsw_check_attr exit\n"); 5669 5670 return (ret); 5671 } 5672 5673 /* 5674 * Returns 1 if there is a problem, 0 otherwise. 5675 */ 5676 static int 5677 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5678 { 5679 _NOTE(ARGUNUSED(pkt)) 5680 5681 int ret = 0; 5682 5683 D1(NULL, "vsw_check_dring_info enter\n"); 5684 5685 if ((pkt->num_descriptors == 0) || 5686 (pkt->descriptor_size == 0) || 5687 (pkt->ncookies != 1)) { 5688 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5689 ret = 1; 5690 } 5691 5692 D1(NULL, "vsw_check_dring_info exit\n"); 5693 5694 return (ret); 5695 } 5696 5697 /* 5698 * Returns 1 if two memory cookies match. Otherwise returns 0. 5699 */ 5700 static int 5701 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5702 { 5703 if ((m1->addr != m2->addr) || 5704 (m2->size != m2->size)) { 5705 return (0); 5706 } else { 5707 return (1); 5708 } 5709 } 5710 5711 /* 5712 * Returns 1 if ring described in reg message matches that 5713 * described by dring_info structure. Otherwise returns 0. 5714 */ 5715 static int 5716 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5717 { 5718 if ((msg->descriptor_size != dp->descriptor_size) || 5719 (msg->num_descriptors != dp->num_descriptors) || 5720 (msg->ncookies != dp->ncookies) || 5721 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5722 return (0); 5723 } else { 5724 return (1); 5725 } 5726 5727 } 5728 5729 /* 5730 * Reset and free all the resources associated with 5731 * the channel. 5732 */ 5733 static void 5734 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5735 { 5736 dring_info_t *dp, *dpp; 5737 lane_t *lp = NULL; 5738 5739 ASSERT(ldcp != NULL); 5740 5741 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5742 5743 if (dir == INBOUND) { 5744 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5745 " of channel %lld", __func__, ldcp->ldc_id); 5746 lp = &ldcp->lane_in; 5747 } else { 5748 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5749 " of channel %lld", __func__, ldcp->ldc_id); 5750 lp = &ldcp->lane_out; 5751 } 5752 5753 lp->lstate = VSW_LANE_INACTIV; 5754 lp->seq_num = VNET_ISS; 5755 5756 if (lp->dringp) { 5757 if (dir == INBOUND) { 5758 WRITE_ENTER(&lp->dlistrw); 5759 dp = lp->dringp; 5760 while (dp != NULL) { 5761 dpp = dp->next; 5762 if (dp->handle != NULL) 5763 (void) ldc_mem_dring_unmap(dp->handle); 5764 kmem_free(dp, sizeof (dring_info_t)); 5765 dp = dpp; 5766 } 5767 RW_EXIT(&lp->dlistrw); 5768 } else { 5769 /* 5770 * unbind, destroy exported dring, free dring struct 5771 */ 5772 WRITE_ENTER(&lp->dlistrw); 5773 dp = lp->dringp; 5774 vsw_free_ring(dp); 5775 RW_EXIT(&lp->dlistrw); 5776 } 5777 lp->dringp = NULL; 5778 } 5779 5780 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5781 } 5782 5783 /* 5784 * Free ring and all associated resources. 5785 * 5786 * Should be called with dlistrw rwlock held as writer. 5787 */ 5788 static void 5789 vsw_free_ring(dring_info_t *dp) 5790 { 5791 vsw_private_desc_t *paddr = NULL; 5792 dring_info_t *dpp; 5793 int i; 5794 5795 while (dp != NULL) { 5796 mutex_enter(&dp->dlock); 5797 dpp = dp->next; 5798 if (dp->priv_addr != NULL) { 5799 /* 5800 * First unbind and free the memory handles 5801 * stored in each descriptor within the ring. 5802 */ 5803 for (i = 0; i < vsw_ntxds; i++) { 5804 paddr = (vsw_private_desc_t *) 5805 dp->priv_addr + i; 5806 if (paddr->memhandle != NULL) { 5807 if (paddr->bound == 1) { 5808 if (ldc_mem_unbind_handle( 5809 paddr->memhandle) != 0) { 5810 DERR(NULL, "error " 5811 "unbinding handle for " 5812 "ring 0x%llx at pos %d", 5813 dp, i); 5814 continue; 5815 } 5816 paddr->bound = 0; 5817 } 5818 5819 if (ldc_mem_free_handle( 5820 paddr->memhandle) != 0) { 5821 DERR(NULL, "error freeing " 5822 "handle for ring 0x%llx " 5823 "at pos %d", dp, i); 5824 continue; 5825 } 5826 paddr->memhandle = NULL; 5827 } 5828 mutex_destroy(&paddr->dstate_lock); 5829 } 5830 kmem_free(dp->priv_addr, 5831 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5832 } 5833 5834 /* 5835 * Now unbind and destroy the ring itself. 5836 */ 5837 if (dp->handle != NULL) { 5838 (void) ldc_mem_dring_unbind(dp->handle); 5839 (void) ldc_mem_dring_destroy(dp->handle); 5840 } 5841 5842 if (dp->data_addr != NULL) { 5843 kmem_free(dp->data_addr, dp->data_sz); 5844 } 5845 5846 mutex_exit(&dp->dlock); 5847 mutex_destroy(&dp->dlock); 5848 mutex_destroy(&dp->restart_lock); 5849 kmem_free(dp, sizeof (dring_info_t)); 5850 5851 dp = dpp; 5852 } 5853 } 5854 5855 /* 5856 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5857 * This thread is woken up by the LDC interrupt handler to process 5858 * LDC packets and receive data. 5859 */ 5860 static void 5861 vsw_ldc_rx_worker(void *arg) 5862 { 5863 callb_cpr_t cprinfo; 5864 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5865 vsw_t *vswp = ldcp->ldc_vswp; 5866 5867 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5868 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5869 "vsw_rx_thread"); 5870 mutex_enter(&ldcp->rx_thr_lock); 5871 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5872 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5873 5874 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5875 /* 5876 * Wait until the data is received or a stop 5877 * request is received. 5878 */ 5879 while (!(ldcp->rx_thr_flags & 5880 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5881 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5882 } 5883 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5884 5885 /* 5886 * First process the stop request. 5887 */ 5888 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5889 D2(vswp, "%s(%lld):Rx thread stopped\n", 5890 __func__, ldcp->ldc_id); 5891 break; 5892 } 5893 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5894 mutex_exit(&ldcp->rx_thr_lock); 5895 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5896 __func__, ldcp->ldc_id); 5897 mutex_enter(&ldcp->ldc_cblock); 5898 vsw_process_pkt(ldcp); 5899 mutex_exit(&ldcp->ldc_cblock); 5900 mutex_enter(&ldcp->rx_thr_lock); 5901 } 5902 5903 /* 5904 * Update the run status and wakeup the thread that 5905 * has sent the stop request. 5906 */ 5907 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5908 cv_signal(&ldcp->rx_thr_cv); 5909 CALLB_CPR_EXIT(&cprinfo); 5910 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5911 thread_exit(); 5912 } 5913 5914 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5915 static void 5916 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5917 { 5918 vsw_t *vswp = ldcp->ldc_vswp; 5919 5920 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5921 /* 5922 * Send a stop request by setting the stop flag and 5923 * wait until the receive thread stops. 5924 */ 5925 mutex_enter(&ldcp->rx_thr_lock); 5926 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5927 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5928 cv_signal(&ldcp->rx_thr_cv); 5929 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5930 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5931 } 5932 } 5933 mutex_exit(&ldcp->rx_thr_lock); 5934 ldcp->rx_thread = NULL; 5935 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5936 } 5937 5938 /* 5939 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5940 * This thread is woken up by the vsw_portsend to transmit 5941 * packets. 5942 */ 5943 static void 5944 vsw_ldc_tx_worker(void *arg) 5945 { 5946 callb_cpr_t cprinfo; 5947 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5948 vsw_t *vswp = ldcp->ldc_vswp; 5949 mblk_t *mp; 5950 mblk_t *tmp; 5951 5952 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5953 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5954 "vnet_tx_thread"); 5955 mutex_enter(&ldcp->tx_thr_lock); 5956 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5957 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5958 5959 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5960 /* 5961 * Wait until the data is received or a stop 5962 * request is received. 5963 */ 5964 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5965 (ldcp->tx_mhead == NULL)) { 5966 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5967 } 5968 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5969 5970 /* 5971 * First process the stop request. 5972 */ 5973 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5974 D2(vswp, "%s(%lld):tx thread stopped\n", 5975 __func__, ldcp->ldc_id); 5976 break; 5977 } 5978 mp = ldcp->tx_mhead; 5979 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5980 ldcp->tx_cnt = 0; 5981 mutex_exit(&ldcp->tx_thr_lock); 5982 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 5983 __func__, ldcp->ldc_id); 5984 while (mp != NULL) { 5985 tmp = mp->b_next; 5986 mp->b_next = mp->b_prev = NULL; 5987 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 5988 mp = tmp; 5989 } 5990 mutex_enter(&ldcp->tx_thr_lock); 5991 } 5992 5993 /* 5994 * Update the run status and wakeup the thread that 5995 * has sent the stop request. 5996 */ 5997 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 5998 cv_signal(&ldcp->tx_thr_cv); 5999 CALLB_CPR_EXIT(&cprinfo); 6000 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6001 thread_exit(); 6002 } 6003 6004 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 6005 static void 6006 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 6007 { 6008 vsw_t *vswp = ldcp->ldc_vswp; 6009 6010 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6011 /* 6012 * Send a stop request by setting the stop flag and 6013 * wait until the receive thread stops. 6014 */ 6015 mutex_enter(&ldcp->tx_thr_lock); 6016 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 6017 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 6018 cv_signal(&ldcp->tx_thr_cv); 6019 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 6020 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 6021 } 6022 } 6023 mutex_exit(&ldcp->tx_thr_lock); 6024 ldcp->tx_thread = NULL; 6025 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6026 } 6027 6028 /* vsw_reclaim_dring -- reclaim descriptors */ 6029 static int 6030 vsw_reclaim_dring(dring_info_t *dp, int start) 6031 { 6032 int i, j, len; 6033 vsw_private_desc_t *priv_addr; 6034 vnet_public_desc_t *pub_addr; 6035 6036 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 6037 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6038 len = dp->num_descriptors; 6039 6040 D2(NULL, "%s: start index %ld\n", __func__, start); 6041 6042 j = 0; 6043 for (i = start; j < len; i = (i + 1) % len, j++) { 6044 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 6045 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6046 6047 mutex_enter(&priv_addr->dstate_lock); 6048 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 6049 mutex_exit(&priv_addr->dstate_lock); 6050 break; 6051 } 6052 pub_addr->hdr.dstate = VIO_DESC_FREE; 6053 priv_addr->dstate = VIO_DESC_FREE; 6054 /* clear all the fields */ 6055 priv_addr->datalen = 0; 6056 pub_addr->hdr.ack = 0; 6057 mutex_exit(&priv_addr->dstate_lock); 6058 6059 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 6060 i, pub_addr->hdr.dstate, priv_addr->dstate); 6061 } 6062 return (j); 6063 } 6064 6065 /* 6066 * Debugging routines 6067 */ 6068 static void 6069 display_state(void) 6070 { 6071 vsw_t *vswp; 6072 vsw_port_list_t *plist; 6073 vsw_port_t *port; 6074 vsw_ldc_list_t *ldcl; 6075 vsw_ldc_t *ldcp; 6076 extern vsw_t *vsw_head; 6077 6078 cmn_err(CE_NOTE, "***** system state *****"); 6079 6080 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6081 plist = &vswp->plist; 6082 READ_ENTER(&plist->lockrw); 6083 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6084 vswp->instance, plist->num_ports); 6085 6086 for (port = plist->head; port != NULL; port = port->p_next) { 6087 ldcl = &port->p_ldclist; 6088 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6089 port->p_instance, port->num_ldcs); 6090 READ_ENTER(&ldcl->lockrw); 6091 ldcp = ldcl->head; 6092 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6093 cmn_err(CE_CONT, "chan %lu : dev %d : " 6094 "status %d : phase %u\n", 6095 ldcp->ldc_id, ldcp->dev_class, 6096 ldcp->ldc_status, ldcp->hphase); 6097 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6098 "psession %lu\n", ldcp->ldc_id, 6099 ldcp->local_session, ldcp->peer_session); 6100 6101 cmn_err(CE_CONT, "Inbound lane:\n"); 6102 display_lane(&ldcp->lane_in); 6103 cmn_err(CE_CONT, "Outbound lane:\n"); 6104 display_lane(&ldcp->lane_out); 6105 } 6106 RW_EXIT(&ldcl->lockrw); 6107 } 6108 RW_EXIT(&plist->lockrw); 6109 } 6110 cmn_err(CE_NOTE, "***** system state *****"); 6111 } 6112 6113 static void 6114 display_lane(lane_t *lp) 6115 { 6116 dring_info_t *drp; 6117 6118 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6119 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6120 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6121 lp->addr_type, lp->addr, lp->xfer_mode); 6122 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6123 6124 cmn_err(CE_CONT, "Dring info:\n"); 6125 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6126 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6127 drp->num_descriptors, drp->descriptor_size); 6128 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6129 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6130 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6131 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6132 drp->ident, drp->end_idx); 6133 display_ring(drp); 6134 } 6135 } 6136 6137 static void 6138 display_ring(dring_info_t *dringp) 6139 { 6140 uint64_t i; 6141 uint64_t priv_count = 0; 6142 uint64_t pub_count = 0; 6143 vnet_public_desc_t *pub_addr = NULL; 6144 vsw_private_desc_t *priv_addr = NULL; 6145 6146 for (i = 0; i < vsw_ntxds; i++) { 6147 if (dringp->pub_addr != NULL) { 6148 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6149 6150 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6151 pub_count++; 6152 } 6153 6154 if (dringp->priv_addr != NULL) { 6155 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6156 6157 if (priv_addr->dstate == VIO_DESC_FREE) 6158 priv_count++; 6159 } 6160 } 6161 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6162 i, priv_count, pub_count); 6163 } 6164 6165 static void 6166 dump_flags(uint64_t state) 6167 { 6168 int i; 6169 6170 typedef struct flag_name { 6171 int flag_val; 6172 char *flag_name; 6173 } flag_name_t; 6174 6175 flag_name_t flags[] = { 6176 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6177 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6178 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6179 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6180 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6181 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6182 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6183 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6184 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6185 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6186 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6187 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6188 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6189 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6190 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6191 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6192 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6193 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6194 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6195 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6196 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6197 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6198 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6199 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6200 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6201 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6202 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6203 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6204 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6205 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6206 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6207 6208 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6209 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6210 if (state & flags[i].flag_val) 6211 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6212 } 6213 } 6214