1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/callb.h> 73 #include <sys/vlan.h> 74 75 /* Port add/deletion/etc routines */ 76 static int vsw_port_delete(vsw_port_t *port); 77 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 78 static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 79 static int vsw_init_ldcs(vsw_port_t *port); 80 static int vsw_uninit_ldcs(vsw_port_t *port); 81 static int vsw_ldc_init(vsw_ldc_t *ldcp); 82 static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 83 static int vsw_drain_ldcs(vsw_port_t *port); 84 static int vsw_drain_port_taskq(vsw_port_t *port); 85 static void vsw_marker_task(void *); 86 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 87 int vsw_detach_ports(vsw_t *vswp); 88 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 89 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 90 int vsw_port_detach(vsw_t *vswp, int p_instance); 91 int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count); 92 int vsw_port_attach(vsw_port_t *portp); 93 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 94 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 95 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 96 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 97 void vsw_reset_ports(vsw_t *vswp); 98 void vsw_port_reset(vsw_port_t *portp); 99 100 /* Interrupt routines */ 101 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 102 103 /* Handshake routines */ 104 static void vsw_ldc_reinit(vsw_ldc_t *); 105 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 106 static void vsw_conn_task(void *); 107 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 108 static void vsw_next_milestone(vsw_ldc_t *); 109 static int vsw_supported_version(vio_ver_msg_t *); 110 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 111 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 112 113 /* Data processing routines */ 114 static void vsw_process_pkt(void *); 115 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 116 static void vsw_process_ctrl_pkt(void *); 117 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 118 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 124 uint32_t); 125 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 126 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 127 static void vsw_process_pkt_data(void *, void *, uint32_t); 128 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 129 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 130 131 /* Switching/data transmit routines */ 132 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 133 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 134 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 135 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 136 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 137 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 138 139 /* Packet creation routines */ 140 static void vsw_send_ver(void *); 141 static void vsw_send_attr(vsw_ldc_t *); 142 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 143 static void vsw_send_dring_info(vsw_ldc_t *); 144 static void vsw_send_rdx(vsw_ldc_t *); 145 146 /* Dring routines */ 147 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 148 static void vsw_create_privring(vsw_ldc_t *); 149 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 150 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 151 int *); 152 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 153 static int vsw_reclaim_dring(dring_info_t *dp, int start); 154 155 static void vsw_set_lane_attr(vsw_t *, lane_t *); 156 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 157 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 158 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 159 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 160 161 /* Rcv/Tx thread routines */ 162 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 163 static void vsw_ldc_tx_worker(void *arg); 164 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 165 static void vsw_ldc_rx_worker(void *arg); 166 167 /* Misc support routines */ 168 static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 169 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 170 static void vsw_free_ring(dring_info_t *); 171 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 172 static int vsw_get_same_dest_list(struct ether_header *ehp, 173 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 174 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 175 176 /* Debugging routines */ 177 static void dump_flags(uint64_t); 178 static void display_state(void); 179 static void display_lane(lane_t *); 180 static void display_ring(dring_info_t *); 181 182 /* 183 * Functions imported from other files. 184 */ 185 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 186 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 187 extern void vsw_reconfig_hw(vsw_t *); 188 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 189 extern void vsw_del_mcst_port(vsw_port_t *port); 190 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 191 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 193 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 194 extern void vsw_create_vlans(void *arg, int type); 195 extern void vsw_destroy_vlans(void *arg, int type); 196 extern void vsw_vlan_add_ids(void *arg, int type); 197 extern void vsw_vlan_remove_ids(void *arg, int type); 198 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 199 struct ether_header *ehp, uint16_t *vidp); 200 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 201 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 202 mblk_t **npt); 203 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 204 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 205 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 207 extern void vsw_hio_stop_port(vsw_port_t *portp); 208 extern void vsw_publish_macaddr(vsw_t *vswp, uint8_t *addr); 209 210 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 211 212 /* 213 * Tunables used in this file. 214 */ 215 extern int vsw_num_handshakes; 216 extern int vsw_wretries; 217 extern int vsw_desc_delay; 218 extern int vsw_read_attempts; 219 extern int vsw_ldc_tx_delay; 220 extern int vsw_ldc_tx_retries; 221 extern boolean_t vsw_ldc_rxthr_enabled; 222 extern boolean_t vsw_ldc_txthr_enabled; 223 extern uint32_t vsw_ntxds; 224 extern uint32_t vsw_max_tx_qcount; 225 extern uint32_t vsw_chain_len; 226 extern uint32_t vsw_mblk_size1; 227 extern uint32_t vsw_mblk_size2; 228 extern uint32_t vsw_mblk_size3; 229 extern uint32_t vsw_mblk_size4; 230 extern uint32_t vsw_num_mblks1; 231 extern uint32_t vsw_num_mblks2; 232 extern uint32_t vsw_num_mblks3; 233 extern uint32_t vsw_num_mblks4; 234 extern boolean_t vsw_obp_ver_proto_workaround; 235 extern uint32_t vsw_publish_macaddr_count; 236 extern boolean_t vsw_jumbo_rxpools; 237 238 #define LDC_ENTER_LOCK(ldcp) \ 239 mutex_enter(&((ldcp)->ldc_cblock));\ 240 mutex_enter(&((ldcp)->ldc_rxlock));\ 241 mutex_enter(&((ldcp)->ldc_txlock)); 242 #define LDC_EXIT_LOCK(ldcp) \ 243 mutex_exit(&((ldcp)->ldc_txlock));\ 244 mutex_exit(&((ldcp)->ldc_rxlock));\ 245 mutex_exit(&((ldcp)->ldc_cblock)); 246 247 #define VSW_VER_EQ(ldcp, major, minor) \ 248 ((ldcp)->lane_out.ver_major == (major) && \ 249 (ldcp)->lane_out.ver_minor == (minor)) 250 251 #define VSW_VER_LT(ldcp, major, minor) \ 252 (((ldcp)->lane_out.ver_major < (major)) || \ 253 ((ldcp)->lane_out.ver_major == (major) && \ 254 (ldcp)->lane_out.ver_minor < (minor))) 255 256 #define VSW_VER_GTEQ(ldcp, major, minor) \ 257 (((ldcp)->lane_out.ver_major > (major)) || \ 258 ((ldcp)->lane_out.ver_major == (major) && \ 259 (ldcp)->lane_out.ver_minor >= (minor))) 260 261 /* supported versions */ 262 static ver_sup_t vsw_versions[] = { {1, 4} }; 263 264 /* 265 * For the moment the state dump routines have their own 266 * private flag. 267 */ 268 #define DUMP_STATE 0 269 270 #if DUMP_STATE 271 272 #define DUMP_TAG(tag) \ 273 { \ 274 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 275 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 276 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 277 } 278 279 #define DUMP_TAG_PTR(tag) \ 280 { \ 281 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 282 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 283 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 284 } 285 286 #define DUMP_FLAGS(flags) dump_flags(flags); 287 #define DISPLAY_STATE() display_state() 288 289 #else 290 291 #define DUMP_TAG(tag) 292 #define DUMP_TAG_PTR(tag) 293 #define DUMP_FLAGS(state) 294 #define DISPLAY_STATE() 295 296 #endif /* DUMP_STATE */ 297 298 /* 299 * Attach the specified port. 300 * 301 * Returns 0 on success, 1 on failure. 302 */ 303 int 304 vsw_port_attach(vsw_port_t *port) 305 { 306 vsw_t *vswp = port->p_vswp; 307 vsw_port_list_t *plist = &vswp->plist; 308 vsw_port_t *p, **pp; 309 int i; 310 int nids = port->num_ldcs; 311 uint64_t *ldcids; 312 313 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 314 315 /* port already exists? */ 316 READ_ENTER(&plist->lockrw); 317 for (p = plist->head; p != NULL; p = p->p_next) { 318 if (p->p_instance == port->p_instance) { 319 DWARN(vswp, "%s: port instance %d already attached", 320 __func__, p->p_instance); 321 RW_EXIT(&plist->lockrw); 322 return (1); 323 } 324 } 325 RW_EXIT(&plist->lockrw); 326 327 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 328 329 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 330 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 331 332 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 333 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 334 port->state = VSW_PORT_INIT; 335 336 D2(vswp, "%s: %d nids", __func__, nids); 337 ldcids = port->ldc_ids; 338 for (i = 0; i < nids; i++) { 339 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 340 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 341 DERR(vswp, "%s: ldc_attach failed", __func__); 342 343 rw_destroy(&port->p_ldclist.lockrw); 344 345 cv_destroy(&port->state_cv); 346 mutex_destroy(&port->state_lock); 347 348 mutex_destroy(&port->tx_lock); 349 mutex_destroy(&port->mca_lock); 350 kmem_free(port, sizeof (vsw_port_t)); 351 return (1); 352 } 353 } 354 355 if (vswp->switching_setup_done == B_TRUE) { 356 /* 357 * If the underlying physical device has been setup, 358 * program the mac address of this port in it. 359 * Otherwise, port macaddr will be set after the physical 360 * device is successfully setup by the timeout handler. 361 */ 362 mutex_enter(&vswp->hw_lock); 363 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 364 mutex_exit(&vswp->hw_lock); 365 } 366 367 /* create the fdb entry for this port/mac address */ 368 vsw_fdbe_add(vswp, port); 369 370 vsw_create_vlans(port, VSW_VNETPORT); 371 372 WRITE_ENTER(&plist->lockrw); 373 374 /* link it into the list of ports for this vsw instance */ 375 pp = (vsw_port_t **)(&plist->head); 376 port->p_next = *pp; 377 *pp = port; 378 plist->num_ports++; 379 380 RW_EXIT(&plist->lockrw); 381 382 /* 383 * Initialise the port and any ldc's under it. 384 */ 385 (void) vsw_init_ldcs(port); 386 387 /* announce macaddr of vnet to the physical switch */ 388 if (vsw_publish_macaddr_count != 0) { /* enabled */ 389 vsw_publish_macaddr(vswp, (uint8_t *)&(port->p_macaddr)); 390 } 391 392 D1(vswp, "%s: exit", __func__); 393 return (0); 394 } 395 396 /* 397 * Detach the specified port. 398 * 399 * Returns 0 on success, 1 on failure. 400 */ 401 int 402 vsw_port_detach(vsw_t *vswp, int p_instance) 403 { 404 vsw_port_t *port = NULL; 405 vsw_port_list_t *plist = &vswp->plist; 406 407 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 408 409 WRITE_ENTER(&plist->lockrw); 410 411 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 412 RW_EXIT(&plist->lockrw); 413 return (1); 414 } 415 416 if (vsw_plist_del_node(vswp, port)) { 417 RW_EXIT(&plist->lockrw); 418 return (1); 419 } 420 421 /* cleanup any HybridIO for this port */ 422 vsw_hio_stop_port(port); 423 424 /* 425 * No longer need to hold writer lock on port list now 426 * that we have unlinked the target port from the list. 427 */ 428 RW_EXIT(&plist->lockrw); 429 430 /* Remove the fdb entry for this port/mac address */ 431 vsw_fdbe_del(vswp, &(port->p_macaddr)); 432 vsw_destroy_vlans(port, VSW_VNETPORT); 433 434 /* Remove any multicast addresses.. */ 435 vsw_del_mcst_port(port); 436 437 /* Remove address if was programmed into HW. */ 438 mutex_enter(&vswp->hw_lock); 439 440 /* 441 * Port's address may not have been set in hardware. This could 442 * happen if the underlying physical device is not yet available and 443 * vsw_setup_switching_timeout() may be in progress. 444 * We remove its addr from hardware only if it has been set before. 445 */ 446 if (port->addr_set != VSW_ADDR_UNSET) 447 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 448 449 if (vswp->recfg_reqd) 450 vsw_reconfig_hw(vswp); 451 452 mutex_exit(&vswp->hw_lock); 453 454 if (vsw_port_delete(port)) { 455 return (1); 456 } 457 458 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 459 return (0); 460 } 461 462 /* 463 * Detach all active ports. 464 * 465 * Returns 0 on success, 1 on failure. 466 */ 467 int 468 vsw_detach_ports(vsw_t *vswp) 469 { 470 vsw_port_list_t *plist = &vswp->plist; 471 vsw_port_t *port = NULL; 472 473 D1(vswp, "%s: enter", __func__); 474 475 WRITE_ENTER(&plist->lockrw); 476 477 while ((port = plist->head) != NULL) { 478 if (vsw_plist_del_node(vswp, port)) { 479 DERR(vswp, "%s: Error deleting port %d" 480 " from port list", __func__, port->p_instance); 481 RW_EXIT(&plist->lockrw); 482 return (1); 483 } 484 485 /* Remove address if was programmed into HW. */ 486 mutex_enter(&vswp->hw_lock); 487 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 488 mutex_exit(&vswp->hw_lock); 489 490 /* Remove the fdb entry for this port/mac address */ 491 vsw_fdbe_del(vswp, &(port->p_macaddr)); 492 vsw_destroy_vlans(port, VSW_VNETPORT); 493 494 /* Remove any multicast addresses.. */ 495 vsw_del_mcst_port(port); 496 497 /* 498 * No longer need to hold the lock on the port list 499 * now that we have unlinked the target port from the 500 * list. 501 */ 502 RW_EXIT(&plist->lockrw); 503 if (vsw_port_delete(port)) { 504 DERR(vswp, "%s: Error deleting port %d", 505 __func__, port->p_instance); 506 return (1); 507 } 508 WRITE_ENTER(&plist->lockrw); 509 } 510 RW_EXIT(&plist->lockrw); 511 512 D1(vswp, "%s: exit", __func__); 513 514 return (0); 515 } 516 517 /* 518 * Delete the specified port. 519 * 520 * Returns 0 on success, 1 on failure. 521 */ 522 static int 523 vsw_port_delete(vsw_port_t *port) 524 { 525 vsw_ldc_list_t *ldcl; 526 vsw_t *vswp = port->p_vswp; 527 int num_ldcs; 528 529 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 530 531 (void) vsw_uninit_ldcs(port); 532 533 /* 534 * Wait for any pending ctrl msg tasks which reference this 535 * port to finish. 536 */ 537 if (vsw_drain_port_taskq(port)) 538 return (1); 539 540 /* 541 * Wait for any active callbacks to finish 542 */ 543 if (vsw_drain_ldcs(port)) 544 return (1); 545 546 ldcl = &port->p_ldclist; 547 num_ldcs = port->num_ldcs; 548 WRITE_ENTER(&ldcl->lockrw); 549 while (num_ldcs > 0) { 550 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 551 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 552 vswp->instance, ldcl->head->ldc_id); 553 RW_EXIT(&ldcl->lockrw); 554 port->num_ldcs = num_ldcs; 555 return (1); 556 } 557 num_ldcs--; 558 } 559 RW_EXIT(&ldcl->lockrw); 560 561 rw_destroy(&port->p_ldclist.lockrw); 562 563 mutex_destroy(&port->mca_lock); 564 mutex_destroy(&port->tx_lock); 565 566 cv_destroy(&port->state_cv); 567 mutex_destroy(&port->state_lock); 568 569 if (port->num_ldcs != 0) { 570 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 571 port->num_ldcs = 0; 572 } 573 kmem_free(port, sizeof (vsw_port_t)); 574 575 D1(vswp, "%s: exit", __func__); 576 577 return (0); 578 } 579 580 static int 581 vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp) 582 { 583 size_t data_sz; 584 int rv; 585 uint32_t sz1 = 0; 586 uint32_t sz2 = 0; 587 uint32_t sz3 = 0; 588 uint32_t sz4 = 0; 589 590 /* 591 * We round up the mtu specified to be a multiple of 2K to limit the 592 * number of rx buffer pools created for a given mtu. 593 */ 594 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 595 data_sz = VNET_ROUNDUP_2K(data_sz); 596 597 /* 598 * If pool sizes are specified, use them. Note that the presence of 599 * the first tunable will be used as a hint. 600 */ 601 if (vsw_mblk_size1 != 0) { 602 sz1 = vsw_mblk_size1; 603 sz2 = vsw_mblk_size2; 604 sz3 = vsw_mblk_size3; 605 sz4 = vsw_mblk_size4; 606 607 if (sz4 == 0) { /* need 3 pools */ 608 609 ldcp->max_rxpool_size = sz3; 610 rv = vio_init_multipools(&ldcp->vmp, 611 VSW_NUM_VMPOOLS, sz1, sz2, sz3, 612 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 613 614 } else { 615 616 ldcp->max_rxpool_size = sz4; 617 rv = vio_init_multipools(&ldcp->vmp, 618 VSW_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 619 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 620 vsw_num_mblks4); 621 622 } 623 624 return (rv); 625 } 626 627 /* 628 * Pool sizes are not specified. We select the pool sizes based on the 629 * mtu if vnet_jumbo_rxpools is enabled. 630 */ 631 if (vsw_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 632 /* 633 * Receive buffer pool allocation based on mtu is disabled. 634 * Use the default mechanism of standard size pool allocation. 635 */ 636 sz1 = VSW_MBLK_SZ_128; 637 sz2 = VSW_MBLK_SZ_256; 638 sz3 = VSW_MBLK_SZ_2048; 639 ldcp->max_rxpool_size = sz3; 640 641 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 642 sz1, sz2, sz3, 643 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 644 645 return (rv); 646 } 647 648 switch (data_sz) { 649 650 case VNET_4K: 651 652 sz1 = VSW_MBLK_SZ_128; 653 sz2 = VSW_MBLK_SZ_256; 654 sz3 = VSW_MBLK_SZ_2048; 655 sz4 = sz3 << 1; /* 4K */ 656 ldcp->max_rxpool_size = sz4; 657 658 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 659 sz1, sz2, sz3, sz4, 660 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 661 vsw_num_mblks4); 662 break; 663 664 default: /* data_sz: 4K+ to 16K */ 665 666 sz1 = VSW_MBLK_SZ_256; 667 sz2 = VSW_MBLK_SZ_2048; 668 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 669 sz4 = data_sz; /* Jumbo-size */ 670 ldcp->max_rxpool_size = sz4; 671 672 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 673 sz1, sz2, sz3, sz4, 674 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 675 vsw_num_mblks4); 676 break; 677 } 678 679 return (rv); 680 681 } 682 683 /* 684 * Attach a logical domain channel (ldc) under a specified port. 685 * 686 * Returns 0 on success, 1 on failure. 687 */ 688 static int 689 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 690 { 691 vsw_t *vswp = port->p_vswp; 692 vsw_ldc_list_t *ldcl = &port->p_ldclist; 693 vsw_ldc_t *ldcp = NULL; 694 ldc_attr_t attr; 695 ldc_status_t istatus; 696 int status = DDI_FAILURE; 697 char kname[MAXNAMELEN]; 698 enum { PROG_init = 0x0, 699 PROG_callback = 0x1, PROG_rx_thread = 0x2, 700 PROG_tx_thread = 0x4} 701 progress; 702 703 progress = PROG_init; 704 705 D1(vswp, "%s: enter", __func__); 706 707 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 708 if (ldcp == NULL) { 709 DERR(vswp, "%s: kmem_zalloc failed", __func__); 710 return (1); 711 } 712 ldcp->ldc_id = ldc_id; 713 714 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 715 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 716 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 717 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 718 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 719 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 720 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 721 722 /* required for handshake with peer */ 723 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 724 ldcp->peer_session = 0; 725 ldcp->session_status = 0; 726 ldcp->hss_id = 1; /* Initial handshake session id */ 727 728 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 729 730 /* only set for outbound lane, inbound set by peer */ 731 vsw_set_lane_attr(vswp, &ldcp->lane_out); 732 733 attr.devclass = LDC_DEV_NT_SVC; 734 attr.instance = ddi_get_instance(vswp->dip); 735 attr.mode = LDC_MODE_UNRELIABLE; 736 attr.mtu = VSW_LDC_MTU; 737 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 738 if (status != 0) { 739 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 740 __func__, ldc_id, status); 741 goto ldc_attach_fail; 742 } 743 744 if (vsw_ldc_rxthr_enabled) { 745 ldcp->rx_thr_flags = 0; 746 747 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 748 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 749 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 750 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 751 752 progress |= PROG_rx_thread; 753 if (ldcp->rx_thread == NULL) { 754 DWARN(vswp, "%s(%lld): Failed to create worker thread", 755 __func__, ldc_id); 756 goto ldc_attach_fail; 757 } 758 } 759 760 if (vsw_ldc_txthr_enabled) { 761 ldcp->tx_thr_flags = 0; 762 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 763 764 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 765 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 766 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 767 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 768 769 progress |= PROG_tx_thread; 770 if (ldcp->tx_thread == NULL) { 771 DWARN(vswp, "%s(%lld): Failed to create worker thread", 772 __func__, ldc_id); 773 goto ldc_attach_fail; 774 } 775 } 776 777 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 778 if (status != 0) { 779 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 780 __func__, ldc_id, status); 781 (void) ldc_fini(ldcp->ldc_handle); 782 goto ldc_attach_fail; 783 } 784 /* 785 * allocate a message for ldc_read()s, big enough to hold ctrl and 786 * data msgs, including raw data msgs used to recv priority frames. 787 */ 788 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 789 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 790 791 progress |= PROG_callback; 792 793 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 794 795 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 796 DERR(vswp, "%s: ldc_status failed", __func__); 797 mutex_destroy(&ldcp->status_lock); 798 goto ldc_attach_fail; 799 } 800 801 ldcp->ldc_status = istatus; 802 ldcp->ldc_port = port; 803 ldcp->ldc_vswp = vswp; 804 805 vsw_reset_vnet_proto_ops(ldcp); 806 807 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 808 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 809 kname, &ldcp->ldc_stats); 810 if (ldcp->ksp == NULL) { 811 DERR(vswp, "%s: kstats setup failed", __func__); 812 goto ldc_attach_fail; 813 } 814 815 /* link it into the list of channels for this port */ 816 WRITE_ENTER(&ldcl->lockrw); 817 ldcp->ldc_next = ldcl->head; 818 ldcl->head = ldcp; 819 RW_EXIT(&ldcl->lockrw); 820 821 D1(vswp, "%s: exit", __func__); 822 return (0); 823 824 ldc_attach_fail: 825 826 if (progress & PROG_callback) { 827 (void) ldc_unreg_callback(ldcp->ldc_handle); 828 kmem_free(ldcp->ldcmsg, ldcp->msglen); 829 } 830 831 if (progress & PROG_rx_thread) { 832 if (ldcp->rx_thread != NULL) { 833 vsw_stop_rx_thread(ldcp); 834 } 835 mutex_destroy(&ldcp->rx_thr_lock); 836 cv_destroy(&ldcp->rx_thr_cv); 837 } 838 839 if (progress & PROG_tx_thread) { 840 if (ldcp->tx_thread != NULL) { 841 vsw_stop_tx_thread(ldcp); 842 } 843 mutex_destroy(&ldcp->tx_thr_lock); 844 cv_destroy(&ldcp->tx_thr_cv); 845 } 846 if (ldcp->ksp != NULL) { 847 vgen_destroy_kstats(ldcp->ksp); 848 } 849 mutex_destroy(&ldcp->ldc_txlock); 850 mutex_destroy(&ldcp->ldc_rxlock); 851 mutex_destroy(&ldcp->ldc_cblock); 852 mutex_destroy(&ldcp->drain_cv_lock); 853 854 cv_destroy(&ldcp->drain_cv); 855 856 rw_destroy(&ldcp->lane_in.dlistrw); 857 rw_destroy(&ldcp->lane_out.dlistrw); 858 859 kmem_free(ldcp, sizeof (vsw_ldc_t)); 860 861 return (1); 862 } 863 864 /* 865 * Detach a logical domain channel (ldc) belonging to a 866 * particular port. 867 * 868 * Returns 0 on success, 1 on failure. 869 */ 870 static int 871 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 872 { 873 vsw_t *vswp = port->p_vswp; 874 vsw_ldc_t *ldcp, *prev_ldcp; 875 vsw_ldc_list_t *ldcl = &port->p_ldclist; 876 int rv; 877 878 prev_ldcp = ldcl->head; 879 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 880 if (ldcp->ldc_id == ldc_id) { 881 break; 882 } 883 } 884 885 /* specified ldc id not found */ 886 if (ldcp == NULL) { 887 DERR(vswp, "%s: ldcp = NULL", __func__); 888 return (1); 889 } 890 891 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 892 893 /* Stop the receive thread */ 894 if (ldcp->rx_thread != NULL) { 895 vsw_stop_rx_thread(ldcp); 896 mutex_destroy(&ldcp->rx_thr_lock); 897 cv_destroy(&ldcp->rx_thr_cv); 898 } 899 kmem_free(ldcp->ldcmsg, ldcp->msglen); 900 901 /* Stop the tx thread */ 902 if (ldcp->tx_thread != NULL) { 903 vsw_stop_tx_thread(ldcp); 904 mutex_destroy(&ldcp->tx_thr_lock); 905 cv_destroy(&ldcp->tx_thr_cv); 906 if (ldcp->tx_mhead != NULL) { 907 freemsgchain(ldcp->tx_mhead); 908 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 909 ldcp->tx_cnt = 0; 910 } 911 } 912 913 /* Destory kstats */ 914 vgen_destroy_kstats(ldcp->ksp); 915 916 /* 917 * Before we can close the channel we must release any mapped 918 * resources (e.g. drings). 919 */ 920 vsw_free_lane_resources(ldcp, INBOUND); 921 vsw_free_lane_resources(ldcp, OUTBOUND); 922 923 /* 924 * If the close fails we are in serious trouble, as won't 925 * be able to delete the parent port. 926 */ 927 if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 928 DERR(vswp, "%s: error %d closing channel %lld", 929 __func__, rv, ldcp->ldc_id); 930 return (1); 931 } 932 933 (void) ldc_fini(ldcp->ldc_handle); 934 935 ldcp->ldc_status = LDC_INIT; 936 ldcp->ldc_handle = NULL; 937 ldcp->ldc_vswp = NULL; 938 939 940 /* 941 * Most likely some mblks are still in use and 942 * have not been returned to the pool. These mblks are 943 * added to the pool that is maintained in the device instance. 944 * Another attempt will be made to destroy the pool 945 * when the device detaches. 946 */ 947 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 948 949 /* unlink it from the list */ 950 prev_ldcp = ldcp->ldc_next; 951 952 mutex_destroy(&ldcp->ldc_txlock); 953 mutex_destroy(&ldcp->ldc_rxlock); 954 mutex_destroy(&ldcp->ldc_cblock); 955 cv_destroy(&ldcp->drain_cv); 956 mutex_destroy(&ldcp->drain_cv_lock); 957 mutex_destroy(&ldcp->status_lock); 958 rw_destroy(&ldcp->lane_in.dlistrw); 959 rw_destroy(&ldcp->lane_out.dlistrw); 960 961 kmem_free(ldcp, sizeof (vsw_ldc_t)); 962 963 return (0); 964 } 965 966 /* 967 * Open and attempt to bring up the channel. Note that channel 968 * can only be brought up if peer has also opened channel. 969 * 970 * Returns 0 if can open and bring up channel, otherwise 971 * returns 1. 972 */ 973 static int 974 vsw_ldc_init(vsw_ldc_t *ldcp) 975 { 976 vsw_t *vswp = ldcp->ldc_vswp; 977 ldc_status_t istatus = 0; 978 int rv; 979 980 D1(vswp, "%s: enter", __func__); 981 982 LDC_ENTER_LOCK(ldcp); 983 984 /* don't start at 0 in case clients don't like that */ 985 ldcp->next_ident = 1; 986 987 rv = ldc_open(ldcp->ldc_handle); 988 if (rv != 0) { 989 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 990 __func__, ldcp->ldc_id, rv); 991 LDC_EXIT_LOCK(ldcp); 992 return (1); 993 } 994 995 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 996 DERR(vswp, "%s: unable to get status", __func__); 997 LDC_EXIT_LOCK(ldcp); 998 return (1); 999 1000 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 1001 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 1002 __func__, ldcp->ldc_id, istatus); 1003 LDC_EXIT_LOCK(ldcp); 1004 return (1); 1005 } 1006 1007 mutex_enter(&ldcp->status_lock); 1008 ldcp->ldc_status = istatus; 1009 mutex_exit(&ldcp->status_lock); 1010 1011 rv = ldc_up(ldcp->ldc_handle); 1012 if (rv != 0) { 1013 /* 1014 * Not a fatal error for ldc_up() to fail, as peer 1015 * end point may simply not be ready yet. 1016 */ 1017 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 1018 ldcp->ldc_id, rv); 1019 LDC_EXIT_LOCK(ldcp); 1020 return (1); 1021 } 1022 1023 /* 1024 * ldc_up() call is non-blocking so need to explicitly 1025 * check channel status to see if in fact the channel 1026 * is UP. 1027 */ 1028 mutex_enter(&ldcp->status_lock); 1029 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 1030 DERR(vswp, "%s: unable to get status", __func__); 1031 mutex_exit(&ldcp->status_lock); 1032 LDC_EXIT_LOCK(ldcp); 1033 return (1); 1034 1035 } 1036 1037 if (ldcp->ldc_status == LDC_UP) { 1038 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 1039 ldcp->ldc_id, istatus); 1040 mutex_exit(&ldcp->status_lock); 1041 LDC_EXIT_LOCK(ldcp); 1042 1043 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1044 return (0); 1045 } 1046 1047 mutex_exit(&ldcp->status_lock); 1048 LDC_EXIT_LOCK(ldcp); 1049 1050 D1(vswp, "%s: exit", __func__); 1051 return (0); 1052 } 1053 1054 /* disable callbacks on the channel */ 1055 static int 1056 vsw_ldc_uninit(vsw_ldc_t *ldcp) 1057 { 1058 vsw_t *vswp = ldcp->ldc_vswp; 1059 int rv; 1060 1061 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 1062 1063 LDC_ENTER_LOCK(ldcp); 1064 1065 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 1066 if (rv != 0) { 1067 DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 1068 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 1069 LDC_EXIT_LOCK(ldcp); 1070 return (1); 1071 } 1072 1073 mutex_enter(&ldcp->status_lock); 1074 ldcp->ldc_status = LDC_INIT; 1075 mutex_exit(&ldcp->status_lock); 1076 1077 LDC_EXIT_LOCK(ldcp); 1078 1079 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 1080 1081 return (0); 1082 } 1083 1084 static int 1085 vsw_init_ldcs(vsw_port_t *port) 1086 { 1087 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1088 vsw_ldc_t *ldcp; 1089 1090 READ_ENTER(&ldcl->lockrw); 1091 ldcp = ldcl->head; 1092 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1093 (void) vsw_ldc_init(ldcp); 1094 } 1095 RW_EXIT(&ldcl->lockrw); 1096 1097 return (0); 1098 } 1099 1100 static int 1101 vsw_uninit_ldcs(vsw_port_t *port) 1102 { 1103 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1104 vsw_ldc_t *ldcp; 1105 1106 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1107 1108 READ_ENTER(&ldcl->lockrw); 1109 ldcp = ldcl->head; 1110 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1111 (void) vsw_ldc_uninit(ldcp); 1112 } 1113 RW_EXIT(&ldcl->lockrw); 1114 1115 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1116 1117 return (0); 1118 } 1119 1120 /* 1121 * Wait until the callback(s) associated with the ldcs under the specified 1122 * port have completed. 1123 * 1124 * Prior to this function being invoked each channel under this port 1125 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1126 * 1127 * A short explaination of what we are doing below.. 1128 * 1129 * The simplest approach would be to have a reference counter in 1130 * the ldc structure which is increment/decremented by the callbacks as 1131 * they use the channel. The drain function could then simply disable any 1132 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1133 * there is a tiny window here - before the callback is able to get the lock 1134 * on the channel it is interrupted and this function gets to execute. It 1135 * sees that the ref count is zero and believes its free to delete the 1136 * associated data structures. 1137 * 1138 * We get around this by taking advantage of the fact that before the ldc 1139 * framework invokes a callback it sets a flag to indicate that there is a 1140 * callback active (or about to become active). If when we attempt to 1141 * unregister a callback when this active flag is set then the unregister 1142 * will fail with EWOULDBLOCK. 1143 * 1144 * If the unregister fails we do a cv_timedwait. We will either be signaled 1145 * by the callback as it is exiting (note we have to wait a short period to 1146 * allow the callback to return fully to the ldc framework and it to clear 1147 * the active flag), or by the timer expiring. In either case we again attempt 1148 * the unregister. We repeat this until we can succesfully unregister the 1149 * callback. 1150 * 1151 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1152 * the case where the callback has finished but the ldc framework has not yet 1153 * cleared the active flag. In this case we would never get a cv_signal. 1154 */ 1155 static int 1156 vsw_drain_ldcs(vsw_port_t *port) 1157 { 1158 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1159 vsw_ldc_t *ldcp; 1160 vsw_t *vswp = port->p_vswp; 1161 1162 D1(vswp, "%s: enter", __func__); 1163 1164 READ_ENTER(&ldcl->lockrw); 1165 1166 ldcp = ldcl->head; 1167 1168 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1169 /* 1170 * If we can unregister the channel callback then we 1171 * know that there is no callback either running or 1172 * scheduled to run for this channel so move on to next 1173 * channel in the list. 1174 */ 1175 mutex_enter(&ldcp->drain_cv_lock); 1176 1177 /* prompt active callbacks to quit */ 1178 ldcp->drain_state = VSW_LDC_DRAINING; 1179 1180 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1181 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1182 ldcp->ldc_id); 1183 mutex_exit(&ldcp->drain_cv_lock); 1184 continue; 1185 } else { 1186 /* 1187 * If we end up here we know that either 1) a callback 1188 * is currently executing, 2) is about to start (i.e. 1189 * the ldc framework has set the active flag but 1190 * has not actually invoked the callback yet, or 3) 1191 * has finished and has returned to the ldc framework 1192 * but the ldc framework has not yet cleared the 1193 * active bit. 1194 * 1195 * Wait for it to finish. 1196 */ 1197 while (ldc_unreg_callback(ldcp->ldc_handle) 1198 == EWOULDBLOCK) 1199 (void) cv_timedwait(&ldcp->drain_cv, 1200 &ldcp->drain_cv_lock, lbolt + hz); 1201 1202 mutex_exit(&ldcp->drain_cv_lock); 1203 D2(vswp, "%s: unreg callback for chan %ld after " 1204 "timeout", __func__, ldcp->ldc_id); 1205 } 1206 } 1207 RW_EXIT(&ldcl->lockrw); 1208 1209 D1(vswp, "%s: exit", __func__); 1210 return (0); 1211 } 1212 1213 /* 1214 * Wait until all tasks which reference this port have completed. 1215 * 1216 * Prior to this function being invoked each channel under this port 1217 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1218 */ 1219 static int 1220 vsw_drain_port_taskq(vsw_port_t *port) 1221 { 1222 vsw_t *vswp = port->p_vswp; 1223 1224 D1(vswp, "%s: enter", __func__); 1225 1226 /* 1227 * Mark the port as in the process of being detached, and 1228 * dispatch a marker task to the queue so we know when all 1229 * relevant tasks have completed. 1230 */ 1231 mutex_enter(&port->state_lock); 1232 port->state = VSW_PORT_DETACHING; 1233 1234 if ((vswp->taskq_p == NULL) || 1235 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1236 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1237 DERR(vswp, "%s: unable to dispatch marker task", 1238 __func__); 1239 mutex_exit(&port->state_lock); 1240 return (1); 1241 } 1242 1243 /* 1244 * Wait for the marker task to finish. 1245 */ 1246 while (port->state != VSW_PORT_DETACHABLE) 1247 cv_wait(&port->state_cv, &port->state_lock); 1248 1249 mutex_exit(&port->state_lock); 1250 1251 D1(vswp, "%s: exit", __func__); 1252 1253 return (0); 1254 } 1255 1256 static void 1257 vsw_marker_task(void *arg) 1258 { 1259 vsw_port_t *port = arg; 1260 vsw_t *vswp = port->p_vswp; 1261 1262 D1(vswp, "%s: enter", __func__); 1263 1264 mutex_enter(&port->state_lock); 1265 1266 /* 1267 * No further tasks should be dispatched which reference 1268 * this port so ok to mark it as safe to detach. 1269 */ 1270 port->state = VSW_PORT_DETACHABLE; 1271 1272 cv_signal(&port->state_cv); 1273 1274 mutex_exit(&port->state_lock); 1275 1276 D1(vswp, "%s: exit", __func__); 1277 } 1278 1279 vsw_port_t * 1280 vsw_lookup_port(vsw_t *vswp, int p_instance) 1281 { 1282 vsw_port_list_t *plist = &vswp->plist; 1283 vsw_port_t *port; 1284 1285 for (port = plist->head; port != NULL; port = port->p_next) { 1286 if (port->p_instance == p_instance) { 1287 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1288 return (port); 1289 } 1290 } 1291 1292 return (NULL); 1293 } 1294 1295 void 1296 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1297 { 1298 vsw_ldc_list_t *ldclp; 1299 vsw_ldc_t *ldcp; 1300 1301 ldclp = &portp->p_ldclist; 1302 1303 READ_ENTER(&ldclp->lockrw); 1304 1305 /* 1306 * NOTE: for now, we will assume we have a single channel. 1307 */ 1308 if (ldclp->head == NULL) { 1309 RW_EXIT(&ldclp->lockrw); 1310 return; 1311 } 1312 ldcp = ldclp->head; 1313 1314 mutex_enter(&ldcp->ldc_cblock); 1315 1316 /* 1317 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1318 * the connection. See comments in vsw_set_vnet_proto_ops(). 1319 */ 1320 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1321 portp->nvids != 0) { 1322 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1323 } 1324 1325 mutex_exit(&ldcp->ldc_cblock); 1326 1327 RW_EXIT(&ldclp->lockrw); 1328 } 1329 1330 void 1331 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1332 { 1333 vsw_ldc_list_t *ldclp; 1334 vsw_ldc_t *ldcp; 1335 1336 ldclp = &portp->p_ldclist; 1337 1338 READ_ENTER(&ldclp->lockrw); 1339 1340 /* 1341 * NOTE: for now, we will assume we have a single channel. 1342 */ 1343 if (ldclp->head == NULL) { 1344 RW_EXIT(&ldclp->lockrw); 1345 return; 1346 } 1347 ldcp = ldclp->head; 1348 1349 mutex_enter(&ldcp->ldc_cblock); 1350 1351 /* 1352 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1353 * to trigger re-negotiation, which inturn trigger HybridIO 1354 * setup/cleanup. 1355 */ 1356 if ((ldcp->hphase == VSW_MILESTONE4) && 1357 (portp->p_hio_capable == B_TRUE)) { 1358 if (immediate == B_TRUE) { 1359 (void) ldc_down(ldcp->ldc_handle); 1360 } else { 1361 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1362 } 1363 } 1364 1365 mutex_exit(&ldcp->ldc_cblock); 1366 1367 RW_EXIT(&ldclp->lockrw); 1368 } 1369 1370 void 1371 vsw_port_reset(vsw_port_t *portp) 1372 { 1373 vsw_ldc_list_t *ldclp; 1374 vsw_ldc_t *ldcp; 1375 1376 ldclp = &portp->p_ldclist; 1377 1378 READ_ENTER(&ldclp->lockrw); 1379 1380 /* 1381 * NOTE: for now, we will assume we have a single channel. 1382 */ 1383 if (ldclp->head == NULL) { 1384 RW_EXIT(&ldclp->lockrw); 1385 return; 1386 } 1387 ldcp = ldclp->head; 1388 1389 mutex_enter(&ldcp->ldc_cblock); 1390 1391 /* 1392 * reset channel and terminate the connection. 1393 */ 1394 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1395 1396 mutex_exit(&ldcp->ldc_cblock); 1397 1398 RW_EXIT(&ldclp->lockrw); 1399 } 1400 1401 void 1402 vsw_reset_ports(vsw_t *vswp) 1403 { 1404 vsw_port_list_t *plist = &vswp->plist; 1405 vsw_port_t *portp; 1406 1407 READ_ENTER(&plist->lockrw); 1408 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1409 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1410 vsw_hio_stop_port(portp); 1411 } 1412 vsw_port_reset(portp); 1413 } 1414 RW_EXIT(&plist->lockrw); 1415 } 1416 1417 1418 /* 1419 * Search for and remove the specified port from the port 1420 * list. Returns 0 if able to locate and remove port, otherwise 1421 * returns 1. 1422 */ 1423 static int 1424 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1425 { 1426 vsw_port_list_t *plist = &vswp->plist; 1427 vsw_port_t *curr_p, *prev_p; 1428 1429 if (plist->head == NULL) 1430 return (1); 1431 1432 curr_p = prev_p = plist->head; 1433 1434 while (curr_p != NULL) { 1435 if (curr_p == port) { 1436 if (prev_p == curr_p) { 1437 plist->head = curr_p->p_next; 1438 } else { 1439 prev_p->p_next = curr_p->p_next; 1440 } 1441 plist->num_ports--; 1442 break; 1443 } else { 1444 prev_p = curr_p; 1445 curr_p = curr_p->p_next; 1446 } 1447 } 1448 return (0); 1449 } 1450 1451 /* 1452 * Interrupt handler for ldc messages. 1453 */ 1454 static uint_t 1455 vsw_ldc_cb(uint64_t event, caddr_t arg) 1456 { 1457 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1458 vsw_t *vswp = ldcp->ldc_vswp; 1459 1460 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1461 1462 mutex_enter(&ldcp->ldc_cblock); 1463 ldcp->ldc_stats.callbacks++; 1464 1465 mutex_enter(&ldcp->status_lock); 1466 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1467 mutex_exit(&ldcp->status_lock); 1468 mutex_exit(&ldcp->ldc_cblock); 1469 return (LDC_SUCCESS); 1470 } 1471 mutex_exit(&ldcp->status_lock); 1472 1473 if (event & LDC_EVT_UP) { 1474 /* 1475 * Channel has come up. 1476 */ 1477 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1478 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1479 1480 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1481 1482 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1483 } 1484 1485 if (event & LDC_EVT_READ) { 1486 /* 1487 * Data available for reading. 1488 */ 1489 D2(vswp, "%s: id(ld) event(%llx) data READ", 1490 __func__, ldcp->ldc_id, event); 1491 1492 if (ldcp->rx_thread != NULL) { 1493 /* 1494 * If the receive thread is enabled, then 1495 * wakeup the receive thread to process the 1496 * LDC messages. 1497 */ 1498 mutex_exit(&ldcp->ldc_cblock); 1499 mutex_enter(&ldcp->rx_thr_lock); 1500 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1501 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1502 cv_signal(&ldcp->rx_thr_cv); 1503 } 1504 mutex_exit(&ldcp->rx_thr_lock); 1505 mutex_enter(&ldcp->ldc_cblock); 1506 } else { 1507 vsw_process_pkt(ldcp); 1508 } 1509 1510 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1511 1512 goto vsw_cb_exit; 1513 } 1514 1515 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1516 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1517 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1518 1519 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1520 } 1521 1522 /* 1523 * Catch either LDC_EVT_WRITE which we don't support or any 1524 * unknown event. 1525 */ 1526 if (event & 1527 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1528 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1529 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1530 } 1531 1532 vsw_cb_exit: 1533 mutex_exit(&ldcp->ldc_cblock); 1534 1535 /* 1536 * Let the drain function know we are finishing if it 1537 * is waiting. 1538 */ 1539 mutex_enter(&ldcp->drain_cv_lock); 1540 if (ldcp->drain_state == VSW_LDC_DRAINING) 1541 cv_signal(&ldcp->drain_cv); 1542 mutex_exit(&ldcp->drain_cv_lock); 1543 1544 return (LDC_SUCCESS); 1545 } 1546 1547 /* 1548 * Reinitialise data structures associated with the channel. 1549 */ 1550 static void 1551 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1552 { 1553 vsw_t *vswp = ldcp->ldc_vswp; 1554 vsw_port_t *port; 1555 vsw_ldc_list_t *ldcl; 1556 1557 D1(vswp, "%s: enter", __func__); 1558 1559 /* free receive mblk pools for the channel */ 1560 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 1561 1562 port = ldcp->ldc_port; 1563 ldcl = &port->p_ldclist; 1564 1565 READ_ENTER(&ldcl->lockrw); 1566 1567 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1568 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1569 1570 vsw_free_lane_resources(ldcp, INBOUND); 1571 vsw_free_lane_resources(ldcp, OUTBOUND); 1572 RW_EXIT(&ldcl->lockrw); 1573 1574 ldcp->lane_in.lstate = 0; 1575 ldcp->lane_out.lstate = 0; 1576 1577 /* Remove the fdb entry for this port/mac address */ 1578 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1579 1580 /* remove the port from vlans it has been assigned to */ 1581 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1582 1583 /* 1584 * Remove parent port from any multicast groups 1585 * it may have registered with. Client must resend 1586 * multicast add command after handshake completes. 1587 */ 1588 vsw_del_mcst_port(port); 1589 1590 ldcp->peer_session = 0; 1591 ldcp->session_status = 0; 1592 ldcp->hcnt = 0; 1593 ldcp->hphase = VSW_MILESTONE0; 1594 1595 vsw_reset_vnet_proto_ops(ldcp); 1596 1597 D1(vswp, "%s: exit", __func__); 1598 } 1599 1600 /* 1601 * Process a connection event. 1602 * 1603 * Note - care must be taken to ensure that this function is 1604 * not called with the dlistrw lock held. 1605 */ 1606 static void 1607 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1608 { 1609 vsw_t *vswp = ldcp->ldc_vswp; 1610 vsw_conn_evt_t *conn = NULL; 1611 1612 D1(vswp, "%s: enter", __func__); 1613 1614 /* 1615 * Check if either a reset or restart event is pending 1616 * or in progress. If so just return. 1617 * 1618 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1619 * being received by the callback handler, or a ECONNRESET error 1620 * code being returned from a ldc_read() or ldc_write() call. 1621 * 1622 * A VSW_CONN_RESTART event occurs when some error checking code 1623 * decides that there is a problem with data from the channel, 1624 * and that the handshake should be restarted. 1625 */ 1626 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1627 (ldstub((uint8_t *)&ldcp->reset_active))) 1628 return; 1629 1630 /* 1631 * If it is an LDC_UP event we first check the recorded 1632 * state of the channel. If this is UP then we know that 1633 * the channel moving to the UP state has already been dealt 1634 * with and don't need to dispatch a new task. 1635 * 1636 * The reason for this check is that when we do a ldc_up(), 1637 * depending on the state of the peer, we may or may not get 1638 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1639 * every time we do ldc_up() we explicitly check the channel 1640 * status to see has it come up (ldc_up() is asynch and will 1641 * complete at some undefined time), and take the appropriate 1642 * action. 1643 * 1644 * The flip side of this is that we may get a LDC_UP event 1645 * when we have already seen that the channel is up and have 1646 * dealt with that. 1647 */ 1648 mutex_enter(&ldcp->status_lock); 1649 if (evt == VSW_CONN_UP) { 1650 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1651 mutex_exit(&ldcp->status_lock); 1652 return; 1653 } 1654 } 1655 mutex_exit(&ldcp->status_lock); 1656 1657 /* 1658 * The transaction group id allows us to identify and discard 1659 * any tasks which are still pending on the taskq and refer 1660 * to the handshake session we are about to restart or reset. 1661 * These stale messages no longer have any real meaning. 1662 */ 1663 (void) atomic_inc_32(&ldcp->hss_id); 1664 1665 ASSERT(vswp->taskq_p != NULL); 1666 1667 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1668 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1669 " connection event", vswp->instance); 1670 goto err_exit; 1671 } 1672 1673 conn->evt = evt; 1674 conn->ldcp = ldcp; 1675 1676 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1677 DDI_NOSLEEP) != DDI_SUCCESS) { 1678 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1679 vswp->instance); 1680 1681 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1682 goto err_exit; 1683 } 1684 1685 D1(vswp, "%s: exit", __func__); 1686 return; 1687 1688 err_exit: 1689 /* 1690 * Have mostly likely failed due to memory shortage. Clear the flag so 1691 * that future requests will at least be attempted and will hopefully 1692 * succeed. 1693 */ 1694 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1695 ldcp->reset_active = 0; 1696 } 1697 1698 /* 1699 * Deal with events relating to a connection. Invoked from a taskq. 1700 */ 1701 static void 1702 vsw_conn_task(void *arg) 1703 { 1704 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1705 vsw_ldc_t *ldcp = NULL; 1706 vsw_port_t *portp; 1707 vsw_t *vswp = NULL; 1708 uint16_t evt; 1709 ldc_status_t curr_status; 1710 1711 ldcp = conn->ldcp; 1712 evt = conn->evt; 1713 vswp = ldcp->ldc_vswp; 1714 portp = ldcp->ldc_port; 1715 1716 D1(vswp, "%s: enter", __func__); 1717 1718 /* can safely free now have copied out data */ 1719 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1720 1721 mutex_enter(&ldcp->status_lock); 1722 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1723 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1724 "channel %ld", vswp->instance, ldcp->ldc_id); 1725 mutex_exit(&ldcp->status_lock); 1726 return; 1727 } 1728 1729 /* 1730 * If we wish to restart the handshake on this channel, then if 1731 * the channel is UP we bring it DOWN to flush the underlying 1732 * ldc queue. 1733 */ 1734 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1735 (void) ldc_down(ldcp->ldc_handle); 1736 1737 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1738 vsw_hio_stop(vswp, ldcp); 1739 } 1740 1741 /* 1742 * re-init all the associated data structures. 1743 */ 1744 vsw_ldc_reinit(ldcp); 1745 1746 /* 1747 * Bring the channel back up (note it does no harm to 1748 * do this even if the channel is already UP, Just 1749 * becomes effectively a no-op). 1750 */ 1751 (void) ldc_up(ldcp->ldc_handle); 1752 1753 /* 1754 * Check if channel is now UP. This will only happen if 1755 * peer has also done a ldc_up(). 1756 */ 1757 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1758 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1759 "channel %ld", vswp->instance, ldcp->ldc_id); 1760 mutex_exit(&ldcp->status_lock); 1761 return; 1762 } 1763 1764 ldcp->ldc_status = curr_status; 1765 1766 /* channel UP so restart handshake by sending version info */ 1767 if (curr_status == LDC_UP) { 1768 if (ldcp->hcnt++ > vsw_num_handshakes) { 1769 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1770 " handshake attempts (%d) on channel %ld", 1771 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1772 mutex_exit(&ldcp->status_lock); 1773 return; 1774 } 1775 1776 if (vsw_obp_ver_proto_workaround == B_FALSE && 1777 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1778 DDI_NOSLEEP) != DDI_SUCCESS)) { 1779 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1780 vswp->instance); 1781 1782 /* 1783 * Don't count as valid restart attempt if couldn't 1784 * send version msg. 1785 */ 1786 if (ldcp->hcnt > 0) 1787 ldcp->hcnt--; 1788 } 1789 } 1790 1791 /* 1792 * Mark that the process is complete by clearing the flag. 1793 * 1794 * Note is it possible that the taskq dispatch above may have failed, 1795 * most likely due to memory shortage. We still clear the flag so 1796 * future attempts will at least be attempted and will hopefully 1797 * succeed. 1798 */ 1799 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1800 ldcp->reset_active = 0; 1801 1802 mutex_exit(&ldcp->status_lock); 1803 1804 D1(vswp, "%s: exit", __func__); 1805 } 1806 1807 /* 1808 * returns 0 if legal for event signified by flag to have 1809 * occured at the time it did. Otherwise returns 1. 1810 */ 1811 int 1812 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1813 { 1814 vsw_t *vswp = ldcp->ldc_vswp; 1815 uint64_t state; 1816 uint64_t phase; 1817 1818 if (dir == INBOUND) 1819 state = ldcp->lane_in.lstate; 1820 else 1821 state = ldcp->lane_out.lstate; 1822 1823 phase = ldcp->hphase; 1824 1825 switch (flag) { 1826 case VSW_VER_INFO_RECV: 1827 if (phase > VSW_MILESTONE0) { 1828 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1829 " when in state %d\n", ldcp->ldc_id, phase); 1830 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1831 return (1); 1832 } 1833 break; 1834 1835 case VSW_VER_ACK_RECV: 1836 case VSW_VER_NACK_RECV: 1837 if (!(state & VSW_VER_INFO_SENT)) { 1838 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1839 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1840 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1841 return (1); 1842 } else 1843 state &= ~VSW_VER_INFO_SENT; 1844 break; 1845 1846 case VSW_ATTR_INFO_RECV: 1847 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1848 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1849 " when in state %d\n", ldcp->ldc_id, phase); 1850 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1851 return (1); 1852 } 1853 break; 1854 1855 case VSW_ATTR_ACK_RECV: 1856 case VSW_ATTR_NACK_RECV: 1857 if (!(state & VSW_ATTR_INFO_SENT)) { 1858 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1859 " or ATTR_NACK when in state %d\n", 1860 ldcp->ldc_id, phase); 1861 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1862 return (1); 1863 } else 1864 state &= ~VSW_ATTR_INFO_SENT; 1865 break; 1866 1867 case VSW_DRING_INFO_RECV: 1868 if (phase < VSW_MILESTONE1) { 1869 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1870 " when in state %d\n", ldcp->ldc_id, phase); 1871 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1872 return (1); 1873 } 1874 break; 1875 1876 case VSW_DRING_ACK_RECV: 1877 case VSW_DRING_NACK_RECV: 1878 if (!(state & VSW_DRING_INFO_SENT)) { 1879 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1880 " or DRING_NACK when in state %d\n", 1881 ldcp->ldc_id, phase); 1882 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1883 return (1); 1884 } else 1885 state &= ~VSW_DRING_INFO_SENT; 1886 break; 1887 1888 case VSW_RDX_INFO_RECV: 1889 if (phase < VSW_MILESTONE3) { 1890 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1891 " when in state %d\n", ldcp->ldc_id, phase); 1892 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1893 return (1); 1894 } 1895 break; 1896 1897 case VSW_RDX_ACK_RECV: 1898 case VSW_RDX_NACK_RECV: 1899 if (!(state & VSW_RDX_INFO_SENT)) { 1900 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1901 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1902 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1903 return (1); 1904 } else 1905 state &= ~VSW_RDX_INFO_SENT; 1906 break; 1907 1908 case VSW_MCST_INFO_RECV: 1909 if (phase < VSW_MILESTONE3) { 1910 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1911 " when in state %d\n", ldcp->ldc_id, phase); 1912 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1913 return (1); 1914 } 1915 break; 1916 1917 default: 1918 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1919 ldcp->ldc_id, flag); 1920 return (1); 1921 } 1922 1923 if (dir == INBOUND) 1924 ldcp->lane_in.lstate = state; 1925 else 1926 ldcp->lane_out.lstate = state; 1927 1928 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1929 1930 return (0); 1931 } 1932 1933 void 1934 vsw_next_milestone(vsw_ldc_t *ldcp) 1935 { 1936 vsw_t *vswp = ldcp->ldc_vswp; 1937 vsw_port_t *portp = ldcp->ldc_port; 1938 1939 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1940 ldcp->ldc_id, ldcp->hphase); 1941 1942 DUMP_FLAGS(ldcp->lane_in.lstate); 1943 DUMP_FLAGS(ldcp->lane_out.lstate); 1944 1945 switch (ldcp->hphase) { 1946 1947 case VSW_MILESTONE0: 1948 /* 1949 * If we haven't started to handshake with our peer, 1950 * start to do so now. 1951 */ 1952 if (ldcp->lane_out.lstate == 0) { 1953 D2(vswp, "%s: (chan %lld) starting handshake " 1954 "with peer", __func__, ldcp->ldc_id); 1955 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1956 } 1957 1958 /* 1959 * Only way to pass this milestone is to have successfully 1960 * negotiated version info. 1961 */ 1962 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 1963 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 1964 1965 D2(vswp, "%s: (chan %lld) leaving milestone 0", 1966 __func__, ldcp->ldc_id); 1967 1968 vsw_set_vnet_proto_ops(ldcp); 1969 1970 /* 1971 * Next milestone is passed when attribute 1972 * information has been successfully exchanged. 1973 */ 1974 ldcp->hphase = VSW_MILESTONE1; 1975 vsw_send_attr(ldcp); 1976 1977 } 1978 break; 1979 1980 case VSW_MILESTONE1: 1981 /* 1982 * Only way to pass this milestone is to have successfully 1983 * negotiated attribute information. 1984 */ 1985 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 1986 1987 ldcp->hphase = VSW_MILESTONE2; 1988 1989 /* 1990 * If the peer device has said it wishes to 1991 * use descriptor rings then we send it our ring 1992 * info, otherwise we just set up a private ring 1993 * which we use an internal buffer 1994 */ 1995 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 1996 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 1997 (VSW_VER_LT(ldcp, 1, 2) && 1998 (ldcp->lane_in.xfer_mode == 1999 VIO_DRING_MODE_V1_0))) { 2000 vsw_send_dring_info(ldcp); 2001 } 2002 } 2003 break; 2004 2005 case VSW_MILESTONE2: 2006 /* 2007 * If peer has indicated in its attribute message that 2008 * it wishes to use descriptor rings then the only way 2009 * to pass this milestone is for us to have received 2010 * valid dring info. 2011 * 2012 * If peer is not using descriptor rings then just fall 2013 * through. 2014 */ 2015 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2016 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2017 (VSW_VER_LT(ldcp, 1, 2) && 2018 (ldcp->lane_in.xfer_mode == 2019 VIO_DRING_MODE_V1_0))) { 2020 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 2021 break; 2022 } 2023 2024 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2025 __func__, ldcp->ldc_id); 2026 2027 ldcp->hphase = VSW_MILESTONE3; 2028 vsw_send_rdx(ldcp); 2029 break; 2030 2031 case VSW_MILESTONE3: 2032 /* 2033 * Pass this milestone when all paramaters have been 2034 * successfully exchanged and RDX sent in both directions. 2035 * 2036 * Mark outbound lane as available to transmit data. 2037 */ 2038 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 2039 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 2040 2041 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2042 __func__, ldcp->ldc_id); 2043 D2(vswp, "%s: ** handshake complete (0x%llx : " 2044 "0x%llx) **", __func__, ldcp->lane_in.lstate, 2045 ldcp->lane_out.lstate); 2046 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2047 ldcp->hphase = VSW_MILESTONE4; 2048 ldcp->hcnt = 0; 2049 DISPLAY_STATE(); 2050 /* Start HIO if enabled and capable */ 2051 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 2052 D2(vswp, "%s: start HybridIO setup", __func__); 2053 vsw_hio_start(vswp, ldcp); 2054 } 2055 } else { 2056 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 2057 __func__, ldcp->lane_in.lstate, 2058 ldcp->lane_out.lstate); 2059 } 2060 break; 2061 2062 case VSW_MILESTONE4: 2063 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2064 ldcp->ldc_id); 2065 break; 2066 2067 default: 2068 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2069 ldcp->ldc_id, ldcp->hphase); 2070 } 2071 2072 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2073 ldcp->hphase); 2074 } 2075 2076 /* 2077 * Check if major version is supported. 2078 * 2079 * Returns 0 if finds supported major number, and if necessary 2080 * adjusts the minor field. 2081 * 2082 * Returns 1 if can't match major number exactly. Sets mjor/minor 2083 * to next lowest support values, or to zero if no other values possible. 2084 */ 2085 static int 2086 vsw_supported_version(vio_ver_msg_t *vp) 2087 { 2088 int i; 2089 2090 D1(NULL, "vsw_supported_version: enter"); 2091 2092 for (i = 0; i < VSW_NUM_VER; i++) { 2093 if (vsw_versions[i].ver_major == vp->ver_major) { 2094 /* 2095 * Matching or lower major version found. Update 2096 * minor number if necessary. 2097 */ 2098 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2099 D2(NULL, "%s: adjusting minor value from %d " 2100 "to %d", __func__, vp->ver_minor, 2101 vsw_versions[i].ver_minor); 2102 vp->ver_minor = vsw_versions[i].ver_minor; 2103 } 2104 2105 return (0); 2106 } 2107 2108 /* 2109 * If the message contains a higher major version number, set 2110 * the message's major/minor versions to the current values 2111 * and return false, so this message will get resent with 2112 * these values. 2113 */ 2114 if (vsw_versions[i].ver_major < vp->ver_major) { 2115 D2(NULL, "%s: adjusting major and minor " 2116 "values to %d, %d\n", 2117 __func__, vsw_versions[i].ver_major, 2118 vsw_versions[i].ver_minor); 2119 vp->ver_major = vsw_versions[i].ver_major; 2120 vp->ver_minor = vsw_versions[i].ver_minor; 2121 return (1); 2122 } 2123 } 2124 2125 /* No match was possible, zero out fields */ 2126 vp->ver_major = 0; 2127 vp->ver_minor = 0; 2128 2129 D1(NULL, "vsw_supported_version: exit"); 2130 2131 return (1); 2132 } 2133 2134 /* 2135 * Set vnet-protocol-version dependent functions based on version. 2136 */ 2137 static void 2138 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 2139 { 2140 vsw_t *vswp = ldcp->ldc_vswp; 2141 lane_t *lp = &ldcp->lane_out; 2142 2143 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2144 /* 2145 * If the version negotiated with peer is >= 1.4(Jumbo Frame 2146 * Support), set the mtu in our attributes to max_frame_size. 2147 */ 2148 lp->mtu = vswp->max_frame_size; 2149 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 2150 /* 2151 * If the version negotiated with peer is == 1.3 (Vlan Tag 2152 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 2153 */ 2154 lp->mtu = ETHERMAX + VLAN_TAGSZ; 2155 } else { 2156 vsw_port_t *portp = ldcp->ldc_port; 2157 /* 2158 * Pre-1.3 peers expect max frame size of ETHERMAX. 2159 * We can negotiate that size with those peers provided only 2160 * pvid is defined for our peer and there are no vids. Then we 2161 * can send/recv only untagged frames of max size ETHERMAX. 2162 * Note that pvid of the peer can be different, as vsw has to 2163 * serve the vnet in that vlan even if itself is not assigned 2164 * to that vlan. 2165 */ 2166 if (portp->nvids == 0) { 2167 lp->mtu = ETHERMAX; 2168 } 2169 } 2170 2171 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2172 /* Versions >= 1.2 */ 2173 2174 if (VSW_PRI_ETH_DEFINED(vswp)) { 2175 /* 2176 * enable priority routines and pkt mode only if 2177 * at least one pri-eth-type is specified in MD. 2178 */ 2179 ldcp->tx = vsw_ldctx_pri; 2180 ldcp->rx_pktdata = vsw_process_pkt_data; 2181 2182 /* set xfer mode for vsw_send_attr() */ 2183 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2184 } else { 2185 /* no priority eth types defined in MD */ 2186 2187 ldcp->tx = vsw_ldctx; 2188 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2189 2190 /* set xfer mode for vsw_send_attr() */ 2191 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2192 } 2193 2194 } else { 2195 /* Versions prior to 1.2 */ 2196 2197 vsw_reset_vnet_proto_ops(ldcp); 2198 } 2199 } 2200 2201 /* 2202 * Reset vnet-protocol-version dependent functions to v1.0. 2203 */ 2204 static void 2205 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2206 { 2207 lane_t *lp = &ldcp->lane_out; 2208 2209 ldcp->tx = vsw_ldctx; 2210 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2211 2212 /* set xfer mode for vsw_send_attr() */ 2213 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2214 } 2215 2216 /* 2217 * Main routine for processing messages received over LDC. 2218 */ 2219 static void 2220 vsw_process_pkt(void *arg) 2221 { 2222 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2223 vsw_t *vswp = ldcp->ldc_vswp; 2224 size_t msglen; 2225 vio_msg_tag_t *tagp; 2226 uint64_t *ldcmsg; 2227 int rv = 0; 2228 2229 2230 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2231 2232 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2233 2234 ldcmsg = ldcp->ldcmsg; 2235 /* 2236 * If channel is up read messages until channel is empty. 2237 */ 2238 do { 2239 msglen = ldcp->msglen; 2240 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2241 2242 if (rv != 0) { 2243 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2244 __func__, ldcp->ldc_id, rv, msglen); 2245 } 2246 2247 /* channel has been reset */ 2248 if (rv == ECONNRESET) { 2249 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2250 break; 2251 } 2252 2253 if (msglen == 0) { 2254 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2255 ldcp->ldc_id); 2256 break; 2257 } 2258 2259 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2260 ldcp->ldc_id, msglen); 2261 2262 /* 2263 * Figure out what sort of packet we have gotten by 2264 * examining the msg tag, and then switch it appropriately. 2265 */ 2266 tagp = (vio_msg_tag_t *)ldcmsg; 2267 2268 switch (tagp->vio_msgtype) { 2269 case VIO_TYPE_CTRL: 2270 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2271 break; 2272 case VIO_TYPE_DATA: 2273 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2274 break; 2275 case VIO_TYPE_ERR: 2276 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2277 break; 2278 default: 2279 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2280 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2281 break; 2282 } 2283 } while (msglen); 2284 2285 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2286 } 2287 2288 /* 2289 * Dispatch a task to process a VIO control message. 2290 */ 2291 static void 2292 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2293 { 2294 vsw_ctrl_task_t *ctaskp = NULL; 2295 vsw_port_t *port = ldcp->ldc_port; 2296 vsw_t *vswp = port->p_vswp; 2297 2298 D1(vswp, "%s: enter", __func__); 2299 2300 /* 2301 * We need to handle RDX ACK messages in-band as once they 2302 * are exchanged it is possible that we will get an 2303 * immediate (legitimate) data packet. 2304 */ 2305 if ((tagp->vio_subtype_env == VIO_RDX) && 2306 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2307 2308 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2309 return; 2310 2311 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2312 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2313 "(ostate 0x%llx : hphase %d)", __func__, 2314 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2315 vsw_next_milestone(ldcp); 2316 return; 2317 } 2318 2319 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2320 2321 if (ctaskp == NULL) { 2322 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2323 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2324 return; 2325 } 2326 2327 ctaskp->ldcp = ldcp; 2328 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2329 ctaskp->hss_id = ldcp->hss_id; 2330 2331 /* 2332 * Dispatch task to processing taskq if port is not in 2333 * the process of being detached. 2334 */ 2335 mutex_enter(&port->state_lock); 2336 if (port->state == VSW_PORT_INIT) { 2337 if ((vswp->taskq_p == NULL) || 2338 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2339 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2340 mutex_exit(&port->state_lock); 2341 DERR(vswp, "%s: unable to dispatch task to taskq", 2342 __func__); 2343 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2344 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2345 return; 2346 } 2347 } else { 2348 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2349 DWARN(vswp, "%s: port %d detaching, not dispatching " 2350 "task", __func__, port->p_instance); 2351 } 2352 2353 mutex_exit(&port->state_lock); 2354 2355 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2356 ldcp->ldc_id); 2357 D1(vswp, "%s: exit", __func__); 2358 } 2359 2360 /* 2361 * Process a VIO ctrl message. Invoked from taskq. 2362 */ 2363 static void 2364 vsw_process_ctrl_pkt(void *arg) 2365 { 2366 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2367 vsw_ldc_t *ldcp = ctaskp->ldcp; 2368 vsw_t *vswp = ldcp->ldc_vswp; 2369 vio_msg_tag_t tag; 2370 uint16_t env; 2371 2372 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2373 2374 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2375 env = tag.vio_subtype_env; 2376 2377 /* stale pkt check */ 2378 if (ctaskp->hss_id < ldcp->hss_id) { 2379 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2380 " (%ld) handshake session", __func__, ctaskp->hss_id); 2381 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2382 return; 2383 } 2384 2385 /* session id check */ 2386 if (ldcp->session_status & VSW_PEER_SESSION) { 2387 if (ldcp->peer_session != tag.vio_sid) { 2388 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2389 __func__, ldcp->ldc_id, tag.vio_sid); 2390 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2391 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2392 return; 2393 } 2394 } 2395 2396 /* 2397 * Switch on vio_subtype envelope, then let lower routines 2398 * decide if its an INFO, ACK or NACK packet. 2399 */ 2400 switch (env) { 2401 case VIO_VER_INFO: 2402 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2403 break; 2404 case VIO_DRING_REG: 2405 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2406 break; 2407 case VIO_DRING_UNREG: 2408 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2409 break; 2410 case VIO_ATTR_INFO: 2411 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2412 break; 2413 case VNET_MCAST_INFO: 2414 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2415 break; 2416 case VIO_RDX: 2417 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2418 break; 2419 case VIO_DDS_INFO: 2420 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2421 break; 2422 default: 2423 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2424 } 2425 2426 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2427 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2428 } 2429 2430 /* 2431 * Version negotiation. We can end up here either because our peer 2432 * has responded to a handshake message we have sent it, or our peer 2433 * has initiated a handshake with us. If its the former then can only 2434 * be ACK or NACK, if its the later can only be INFO. 2435 * 2436 * If its an ACK we move to the next stage of the handshake, namely 2437 * attribute exchange. If its a NACK we see if we can specify another 2438 * version, if we can't we stop. 2439 * 2440 * If it is an INFO we reset all params associated with communication 2441 * in that direction over this channel (remember connection is 2442 * essentially 2 independent simplex channels). 2443 */ 2444 void 2445 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2446 { 2447 vio_ver_msg_t *ver_pkt; 2448 vsw_t *vswp = ldcp->ldc_vswp; 2449 2450 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2451 2452 /* 2453 * We know this is a ctrl/version packet so 2454 * cast it into the correct structure. 2455 */ 2456 ver_pkt = (vio_ver_msg_t *)pkt; 2457 2458 switch (ver_pkt->tag.vio_subtype) { 2459 case VIO_SUBTYPE_INFO: 2460 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2461 2462 /* 2463 * Record the session id, which we will use from now 2464 * until we see another VER_INFO msg. Even then the 2465 * session id in most cases will be unchanged, execpt 2466 * if channel was reset. 2467 */ 2468 if ((ldcp->session_status & VSW_PEER_SESSION) && 2469 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2470 DERR(vswp, "%s: updating session id for chan %lld " 2471 "from %llx to %llx", __func__, ldcp->ldc_id, 2472 ldcp->peer_session, ver_pkt->tag.vio_sid); 2473 } 2474 2475 ldcp->peer_session = ver_pkt->tag.vio_sid; 2476 ldcp->session_status |= VSW_PEER_SESSION; 2477 2478 /* Legal message at this time ? */ 2479 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2480 return; 2481 2482 /* 2483 * First check the device class. Currently only expect 2484 * to be talking to a network device. In the future may 2485 * also talk to another switch. 2486 */ 2487 if (ver_pkt->dev_class != VDEV_NETWORK) { 2488 DERR(vswp, "%s: illegal device class %d", __func__, 2489 ver_pkt->dev_class); 2490 2491 ver_pkt->tag.vio_sid = ldcp->local_session; 2492 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2493 2494 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2495 2496 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2497 sizeof (vio_ver_msg_t), B_TRUE); 2498 2499 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2500 vsw_next_milestone(ldcp); 2501 return; 2502 } else { 2503 ldcp->dev_class = ver_pkt->dev_class; 2504 } 2505 2506 /* 2507 * Now check the version. 2508 */ 2509 if (vsw_supported_version(ver_pkt) == 0) { 2510 /* 2511 * Support this major version and possibly 2512 * adjusted minor version. 2513 */ 2514 2515 D2(vswp, "%s: accepted ver %d:%d", __func__, 2516 ver_pkt->ver_major, ver_pkt->ver_minor); 2517 2518 /* Store accepted values */ 2519 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2520 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2521 2522 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2523 2524 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2525 2526 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2527 /* 2528 * Send a version info message 2529 * using the accepted version that 2530 * we are about to ack. Also note that 2531 * we send our ver info before we ack. 2532 * Otherwise, as soon as receiving the 2533 * ack, obp sends attr info msg, which 2534 * breaks vsw_check_flag() invoked 2535 * from vsw_process_ctrl_attr_pkt(); 2536 * as we also need VSW_VER_ACK_RECV to 2537 * be set in lane_out.lstate, before 2538 * we can receive attr info. 2539 */ 2540 vsw_send_ver(ldcp); 2541 } 2542 } else { 2543 /* 2544 * NACK back with the next lower major/minor 2545 * pairing we support (if don't suuport any more 2546 * versions then they will be set to zero. 2547 */ 2548 2549 D2(vswp, "%s: replying with ver %d:%d", __func__, 2550 ver_pkt->ver_major, ver_pkt->ver_minor); 2551 2552 /* Store updated values */ 2553 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2554 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2555 2556 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2557 2558 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2559 } 2560 2561 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2562 ver_pkt->tag.vio_sid = ldcp->local_session; 2563 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2564 sizeof (vio_ver_msg_t), B_TRUE); 2565 2566 vsw_next_milestone(ldcp); 2567 break; 2568 2569 case VIO_SUBTYPE_ACK: 2570 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2571 2572 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2573 return; 2574 2575 /* Store updated values */ 2576 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2577 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2578 2579 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2580 vsw_next_milestone(ldcp); 2581 2582 break; 2583 2584 case VIO_SUBTYPE_NACK: 2585 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2586 2587 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2588 return; 2589 2590 /* 2591 * If our peer sent us a NACK with the ver fields set to 2592 * zero then there is nothing more we can do. Otherwise see 2593 * if we support either the version suggested, or a lesser 2594 * one. 2595 */ 2596 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2597 DERR(vswp, "%s: peer unable to negotiate any " 2598 "further.", __func__); 2599 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2600 vsw_next_milestone(ldcp); 2601 return; 2602 } 2603 2604 /* 2605 * Check to see if we support this major version or 2606 * a lower one. If we don't then maj/min will be set 2607 * to zero. 2608 */ 2609 (void) vsw_supported_version(ver_pkt); 2610 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2611 /* Nothing more we can do */ 2612 DERR(vswp, "%s: version negotiation failed.\n", 2613 __func__); 2614 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2615 vsw_next_milestone(ldcp); 2616 } else { 2617 /* found a supported major version */ 2618 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2619 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2620 2621 D2(vswp, "%s: resending with updated values (%x, %x)", 2622 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2623 2624 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2625 ver_pkt->tag.vio_sid = ldcp->local_session; 2626 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2627 2628 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2629 2630 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2631 sizeof (vio_ver_msg_t), B_TRUE); 2632 2633 vsw_next_milestone(ldcp); 2634 2635 } 2636 break; 2637 2638 default: 2639 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2640 ver_pkt->tag.vio_subtype); 2641 } 2642 2643 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2644 } 2645 2646 /* 2647 * Process an attribute packet. We can end up here either because our peer 2648 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2649 * peer has sent us an attribute INFO message 2650 * 2651 * If its an ACK we then move to the next stage of the handshake which 2652 * is to send our descriptor ring info to our peer. If its a NACK then 2653 * there is nothing more we can (currently) do. 2654 * 2655 * If we get a valid/acceptable INFO packet (and we have already negotiated 2656 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2657 * NACK back and reset channel state to INACTIV. 2658 * 2659 * FUTURE: in time we will probably negotiate over attributes, but for 2660 * the moment unacceptable attributes are regarded as a fatal error. 2661 * 2662 */ 2663 void 2664 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2665 { 2666 vnet_attr_msg_t *attr_pkt; 2667 vsw_t *vswp = ldcp->ldc_vswp; 2668 vsw_port_t *port = ldcp->ldc_port; 2669 uint64_t macaddr = 0; 2670 lane_t *lane_out = &ldcp->lane_out; 2671 lane_t *lane_in = &ldcp->lane_in; 2672 uint32_t mtu; 2673 boolean_t ack = B_TRUE; 2674 int i; 2675 2676 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2677 2678 /* 2679 * We know this is a ctrl/attr packet so 2680 * cast it into the correct structure. 2681 */ 2682 attr_pkt = (vnet_attr_msg_t *)pkt; 2683 2684 switch (attr_pkt->tag.vio_subtype) { 2685 case VIO_SUBTYPE_INFO: 2686 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2687 2688 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2689 return; 2690 2691 /* 2692 * If the attributes are unacceptable then we NACK back. 2693 */ 2694 if (vsw_check_attr(attr_pkt, ldcp)) { 2695 ack = B_FALSE; 2696 2697 DERR(vswp, "%s (chan %d): invalid attributes", 2698 __func__, ldcp->ldc_id); 2699 2700 } else { 2701 2702 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2703 /* 2704 * Versions >= 1.4: 2705 * The mtu is negotiated down to the 2706 * minimum of our mtu and peer's mtu. 2707 */ 2708 mtu = MIN(attr_pkt->mtu, vswp->max_frame_size); 2709 2710 /* 2711 * If we have received an ack for the attr info 2712 * that we sent, then check if the mtu computed 2713 * above matches the mtu that the peer had ack'd 2714 * (saved in local hparams). If they don't 2715 * match, we fail the handshake. 2716 */ 2717 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2718 if (mtu != lane_out->mtu) { 2719 /* send NACK */ 2720 ack = B_FALSE; 2721 } 2722 } else { 2723 /* 2724 * Save the mtu computed above in our 2725 * attr parameters, so it gets sent in 2726 * the attr info from us to the peer. 2727 */ 2728 lane_out->mtu = mtu; 2729 } 2730 } 2731 2732 } 2733 2734 if (ack == B_FALSE) { 2735 2736 vsw_free_lane_resources(ldcp, INBOUND); 2737 2738 attr_pkt->tag.vio_sid = ldcp->local_session; 2739 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2740 2741 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2742 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2743 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2744 sizeof (vnet_attr_msg_t), B_TRUE); 2745 2746 vsw_next_milestone(ldcp); 2747 return; 2748 } 2749 2750 /* 2751 * Otherwise store attributes for this lane and update 2752 * lane state. 2753 */ 2754 lane_in->mtu = attr_pkt->mtu; 2755 lane_in->addr = attr_pkt->addr; 2756 lane_in->addr_type = attr_pkt->addr_type; 2757 lane_in->xfer_mode = attr_pkt->xfer_mode; 2758 lane_in->ack_freq = attr_pkt->ack_freq; 2759 2760 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2761 /* save the MIN mtu in the msg to be replied */ 2762 attr_pkt->mtu = mtu; 2763 } 2764 2765 macaddr = lane_in->addr; 2766 for (i = ETHERADDRL - 1; i >= 0; i--) { 2767 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2768 macaddr >>= 8; 2769 } 2770 2771 /* create the fdb entry for this port/mac address */ 2772 vsw_fdbe_add(vswp, port); 2773 2774 /* add the port to the specified vlans */ 2775 vsw_vlan_add_ids(port, VSW_VNETPORT); 2776 2777 /* setup device specifc xmit routines */ 2778 mutex_enter(&port->tx_lock); 2779 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2780 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2781 (VSW_VER_LT(ldcp, 1, 2) && 2782 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2783 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2784 port->transmit = vsw_dringsend; 2785 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2786 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2787 vsw_create_privring(ldcp); 2788 port->transmit = vsw_descrsend; 2789 lane_out->xfer_mode = VIO_DESC_MODE; 2790 } 2791 2792 /* 2793 * HybridIO is supported only vnet, not by OBP. 2794 * So, set hio_capable to true only when in DRING mode. 2795 */ 2796 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2797 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2798 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2799 } else { 2800 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2801 } 2802 2803 mutex_exit(&port->tx_lock); 2804 2805 attr_pkt->tag.vio_sid = ldcp->local_session; 2806 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2807 2808 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2809 2810 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2811 2812 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2813 sizeof (vnet_attr_msg_t), B_TRUE); 2814 2815 vsw_next_milestone(ldcp); 2816 break; 2817 2818 case VIO_SUBTYPE_ACK: 2819 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2820 2821 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2822 return; 2823 2824 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2825 /* 2826 * Versions >= 1.4: 2827 * The ack msg sent by the peer contains the minimum of 2828 * our mtu (that we had sent in our attr info) and the 2829 * peer's mtu. 2830 * 2831 * If we have sent an ack for the attr info msg from 2832 * the peer, check if the mtu that was computed then 2833 * (saved in lane_out params) matches the mtu that the 2834 * peer has ack'd. If they don't match, we fail the 2835 * handshake. 2836 */ 2837 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2838 if (lane_out->mtu != attr_pkt->mtu) { 2839 return; 2840 } 2841 } else { 2842 /* 2843 * If the mtu ack'd by the peer is > our mtu 2844 * fail handshake. Otherwise, save the mtu, so 2845 * we can validate it when we receive attr info 2846 * from our peer. 2847 */ 2848 if (attr_pkt->mtu > lane_out->mtu) { 2849 return; 2850 } 2851 if (attr_pkt->mtu <= lane_out->mtu) { 2852 lane_out->mtu = attr_pkt->mtu; 2853 } 2854 } 2855 } 2856 2857 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2858 vsw_next_milestone(ldcp); 2859 break; 2860 2861 case VIO_SUBTYPE_NACK: 2862 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2863 2864 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2865 return; 2866 2867 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2868 vsw_next_milestone(ldcp); 2869 break; 2870 2871 default: 2872 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2873 attr_pkt->tag.vio_subtype); 2874 } 2875 2876 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2877 } 2878 2879 /* 2880 * Process a dring info packet. We can end up here either because our peer 2881 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2882 * peer has sent us a dring INFO message. 2883 * 2884 * If we get a valid/acceptable INFO packet (and we have already negotiated 2885 * a version) we ACK back and update the lane state, otherwise we NACK back. 2886 * 2887 * FUTURE: nothing to stop client from sending us info on multiple dring's 2888 * but for the moment we will just use the first one we are given. 2889 * 2890 */ 2891 void 2892 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 2893 { 2894 vio_dring_reg_msg_t *dring_pkt; 2895 vsw_t *vswp = ldcp->ldc_vswp; 2896 ldc_mem_info_t minfo; 2897 dring_info_t *dp, *dbp; 2898 int dring_found = 0; 2899 2900 /* 2901 * We know this is a ctrl/dring packet so 2902 * cast it into the correct structure. 2903 */ 2904 dring_pkt = (vio_dring_reg_msg_t *)pkt; 2905 2906 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2907 2908 switch (dring_pkt->tag.vio_subtype) { 2909 case VIO_SUBTYPE_INFO: 2910 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2911 2912 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 2913 return; 2914 2915 /* 2916 * If the dring params are unacceptable then we NACK back. 2917 */ 2918 if (vsw_check_dring_info(dring_pkt)) { 2919 2920 DERR(vswp, "%s (%lld): invalid dring info", 2921 __func__, ldcp->ldc_id); 2922 2923 vsw_free_lane_resources(ldcp, INBOUND); 2924 2925 dring_pkt->tag.vio_sid = ldcp->local_session; 2926 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2927 2928 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2929 2930 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2931 2932 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2933 sizeof (vio_dring_reg_msg_t), B_TRUE); 2934 2935 vsw_next_milestone(ldcp); 2936 return; 2937 } 2938 2939 /* 2940 * Otherwise, attempt to map in the dring using the 2941 * cookie. If that succeeds we send back a unique dring 2942 * identifier that the sending side will use in future 2943 * to refer to this descriptor ring. 2944 */ 2945 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 2946 2947 dp->num_descriptors = dring_pkt->num_descriptors; 2948 dp->descriptor_size = dring_pkt->descriptor_size; 2949 dp->options = dring_pkt->options; 2950 dp->ncookies = dring_pkt->ncookies; 2951 2952 /* 2953 * Note: should only get one cookie. Enforced in 2954 * the ldc layer. 2955 */ 2956 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 2957 sizeof (ldc_mem_cookie_t)); 2958 2959 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 2960 dp->num_descriptors, dp->descriptor_size); 2961 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 2962 dp->options, dp->ncookies); 2963 2964 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 2965 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 2966 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 2967 2968 DERR(vswp, "%s: dring_map failed\n", __func__); 2969 2970 kmem_free(dp, sizeof (dring_info_t)); 2971 vsw_free_lane_resources(ldcp, INBOUND); 2972 2973 dring_pkt->tag.vio_sid = ldcp->local_session; 2974 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2975 2976 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2977 2978 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2979 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 2980 sizeof (vio_dring_reg_msg_t), B_TRUE); 2981 2982 vsw_next_milestone(ldcp); 2983 return; 2984 } 2985 2986 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 2987 2988 DERR(vswp, "%s: dring_addr failed\n", __func__); 2989 2990 kmem_free(dp, sizeof (dring_info_t)); 2991 vsw_free_lane_resources(ldcp, INBOUND); 2992 2993 dring_pkt->tag.vio_sid = ldcp->local_session; 2994 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2995 2996 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 2997 2998 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 2999 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3000 sizeof (vio_dring_reg_msg_t), B_TRUE); 3001 3002 vsw_next_milestone(ldcp); 3003 return; 3004 } else { 3005 /* store the address of the pub part of ring */ 3006 dp->pub_addr = minfo.vaddr; 3007 3008 /* cache the dring mtype */ 3009 dp->dring_mtype = minfo.mtype; 3010 } 3011 3012 /* no private section as we are importing */ 3013 dp->priv_addr = NULL; 3014 3015 /* 3016 * Using simple mono increasing int for ident at 3017 * the moment. 3018 */ 3019 dp->ident = ldcp->next_ident; 3020 ldcp->next_ident++; 3021 3022 dp->end_idx = 0; 3023 dp->next = NULL; 3024 3025 /* 3026 * Link it onto the end of the list of drings 3027 * for this lane. 3028 */ 3029 if (ldcp->lane_in.dringp == NULL) { 3030 D2(vswp, "%s: adding first INBOUND dring", __func__); 3031 ldcp->lane_in.dringp = dp; 3032 } else { 3033 dbp = ldcp->lane_in.dringp; 3034 3035 while (dbp->next != NULL) 3036 dbp = dbp->next; 3037 3038 dbp->next = dp; 3039 } 3040 3041 /* acknowledge it */ 3042 dring_pkt->tag.vio_sid = ldcp->local_session; 3043 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3044 dring_pkt->dring_ident = dp->ident; 3045 3046 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3047 sizeof (vio_dring_reg_msg_t), B_TRUE); 3048 3049 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3050 vsw_next_milestone(ldcp); 3051 break; 3052 3053 case VIO_SUBTYPE_ACK: 3054 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3055 3056 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3057 return; 3058 3059 /* 3060 * Peer is acknowledging our dring info and will have 3061 * sent us a dring identifier which we will use to 3062 * refer to this ring w.r.t. our peer. 3063 */ 3064 dp = ldcp->lane_out.dringp; 3065 if (dp != NULL) { 3066 /* 3067 * Find the ring this ident should be associated 3068 * with. 3069 */ 3070 if (vsw_dring_match(dp, dring_pkt)) { 3071 dring_found = 1; 3072 3073 } else while (dp != NULL) { 3074 if (vsw_dring_match(dp, dring_pkt)) { 3075 dring_found = 1; 3076 break; 3077 } 3078 dp = dp->next; 3079 } 3080 3081 if (dring_found == 0) { 3082 DERR(NULL, "%s: unrecognised ring cookie", 3083 __func__); 3084 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3085 return; 3086 } 3087 3088 } else { 3089 DERR(vswp, "%s: DRING ACK received but no drings " 3090 "allocated", __func__); 3091 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3092 return; 3093 } 3094 3095 /* store ident */ 3096 dp->ident = dring_pkt->dring_ident; 3097 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3098 vsw_next_milestone(ldcp); 3099 break; 3100 3101 case VIO_SUBTYPE_NACK: 3102 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3103 3104 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3105 return; 3106 3107 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3108 vsw_next_milestone(ldcp); 3109 break; 3110 3111 default: 3112 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3113 dring_pkt->tag.vio_subtype); 3114 } 3115 3116 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3117 } 3118 3119 /* 3120 * Process a request from peer to unregister a dring. 3121 * 3122 * For the moment we just restart the handshake if our 3123 * peer endpoint attempts to unregister a dring. 3124 */ 3125 void 3126 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3127 { 3128 vsw_t *vswp = ldcp->ldc_vswp; 3129 vio_dring_unreg_msg_t *dring_pkt; 3130 3131 /* 3132 * We know this is a ctrl/dring packet so 3133 * cast it into the correct structure. 3134 */ 3135 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3136 3137 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3138 3139 switch (dring_pkt->tag.vio_subtype) { 3140 case VIO_SUBTYPE_INFO: 3141 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3142 3143 DWARN(vswp, "%s: restarting handshake..", __func__); 3144 break; 3145 3146 case VIO_SUBTYPE_ACK: 3147 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3148 3149 DWARN(vswp, "%s: restarting handshake..", __func__); 3150 break; 3151 3152 case VIO_SUBTYPE_NACK: 3153 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3154 3155 DWARN(vswp, "%s: restarting handshake..", __func__); 3156 break; 3157 3158 default: 3159 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3160 dring_pkt->tag.vio_subtype); 3161 } 3162 3163 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3164 3165 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3166 } 3167 3168 #define SND_MCST_NACK(ldcp, pkt) \ 3169 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3170 pkt->tag.vio_sid = ldcp->local_session; \ 3171 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3172 sizeof (vnet_mcast_msg_t), B_TRUE); 3173 3174 /* 3175 * Process a multicast request from a vnet. 3176 * 3177 * Vnet's specify a multicast address that they are interested in. This 3178 * address is used as a key into the hash table which forms the multicast 3179 * forwarding database (mFDB). 3180 * 3181 * The table keys are the multicast addresses, while the table entries 3182 * are pointers to lists of ports which wish to receive packets for the 3183 * specified multicast address. 3184 * 3185 * When a multicast packet is being switched we use the address as a key 3186 * into the hash table, and then walk the appropriate port list forwarding 3187 * the pkt to each port in turn. 3188 * 3189 * If a vnet is no longer interested in a particular multicast grouping 3190 * we simply find the correct location in the hash table and then delete 3191 * the relevant port from the port list. 3192 * 3193 * To deal with the case whereby a port is being deleted without first 3194 * removing itself from the lists in the hash table, we maintain a list 3195 * of multicast addresses the port has registered an interest in, within 3196 * the port structure itself. We then simply walk that list of addresses 3197 * using them as keys into the hash table and remove the port from the 3198 * appropriate lists. 3199 */ 3200 static void 3201 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3202 { 3203 vnet_mcast_msg_t *mcst_pkt; 3204 vsw_port_t *port = ldcp->ldc_port; 3205 vsw_t *vswp = ldcp->ldc_vswp; 3206 int i; 3207 3208 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3209 3210 /* 3211 * We know this is a ctrl/mcast packet so 3212 * cast it into the correct structure. 3213 */ 3214 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3215 3216 switch (mcst_pkt->tag.vio_subtype) { 3217 case VIO_SUBTYPE_INFO: 3218 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3219 3220 /* 3221 * Check if in correct state to receive a multicast 3222 * message (i.e. handshake complete). If not reset 3223 * the handshake. 3224 */ 3225 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3226 return; 3227 3228 /* 3229 * Before attempting to add or remove address check 3230 * that they are valid multicast addresses. 3231 * If not, then NACK back. 3232 */ 3233 for (i = 0; i < mcst_pkt->count; i++) { 3234 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3235 DERR(vswp, "%s: invalid multicast address", 3236 __func__); 3237 SND_MCST_NACK(ldcp, mcst_pkt); 3238 return; 3239 } 3240 } 3241 3242 /* 3243 * Now add/remove the addresses. If this fails we 3244 * NACK back. 3245 */ 3246 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3247 SND_MCST_NACK(ldcp, mcst_pkt); 3248 return; 3249 } 3250 3251 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3252 mcst_pkt->tag.vio_sid = ldcp->local_session; 3253 3254 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3255 3256 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3257 sizeof (vnet_mcast_msg_t), B_TRUE); 3258 break; 3259 3260 case VIO_SUBTYPE_ACK: 3261 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3262 3263 /* 3264 * We shouldn't ever get a multicast ACK message as 3265 * at the moment we never request multicast addresses 3266 * to be set on some other device. This may change in 3267 * the future if we have cascading switches. 3268 */ 3269 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3270 return; 3271 3272 /* Do nothing */ 3273 break; 3274 3275 case VIO_SUBTYPE_NACK: 3276 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3277 3278 /* 3279 * We shouldn't get a multicast NACK packet for the 3280 * same reasons as we shouldn't get a ACK packet. 3281 */ 3282 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3283 return; 3284 3285 /* Do nothing */ 3286 break; 3287 3288 default: 3289 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3290 mcst_pkt->tag.vio_subtype); 3291 } 3292 3293 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3294 } 3295 3296 static void 3297 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3298 { 3299 vio_rdx_msg_t *rdx_pkt; 3300 vsw_t *vswp = ldcp->ldc_vswp; 3301 3302 /* 3303 * We know this is a ctrl/rdx packet so 3304 * cast it into the correct structure. 3305 */ 3306 rdx_pkt = (vio_rdx_msg_t *)pkt; 3307 3308 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3309 3310 switch (rdx_pkt->tag.vio_subtype) { 3311 case VIO_SUBTYPE_INFO: 3312 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3313 3314 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3315 return; 3316 3317 rdx_pkt->tag.vio_sid = ldcp->local_session; 3318 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3319 3320 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3321 3322 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3323 3324 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3325 sizeof (vio_rdx_msg_t), B_TRUE); 3326 3327 vsw_next_milestone(ldcp); 3328 break; 3329 3330 case VIO_SUBTYPE_ACK: 3331 /* 3332 * Should be handled in-band by callback handler. 3333 */ 3334 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3335 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3336 break; 3337 3338 case VIO_SUBTYPE_NACK: 3339 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3340 3341 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3342 return; 3343 3344 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3345 vsw_next_milestone(ldcp); 3346 break; 3347 3348 default: 3349 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3350 rdx_pkt->tag.vio_subtype); 3351 } 3352 3353 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3354 } 3355 3356 static void 3357 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3358 uint32_t msglen) 3359 { 3360 uint16_t env = tagp->vio_subtype_env; 3361 vsw_t *vswp = ldcp->ldc_vswp; 3362 3363 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3364 3365 /* session id check */ 3366 if (ldcp->session_status & VSW_PEER_SESSION) { 3367 if (ldcp->peer_session != tagp->vio_sid) { 3368 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3369 __func__, ldcp->ldc_id, tagp->vio_sid); 3370 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3371 return; 3372 } 3373 } 3374 3375 /* 3376 * It is an error for us to be getting data packets 3377 * before the handshake has completed. 3378 */ 3379 if (ldcp->hphase != VSW_MILESTONE4) { 3380 DERR(vswp, "%s: got data packet before handshake complete " 3381 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3382 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3383 DUMP_FLAGS(ldcp->lane_in.lstate); 3384 DUMP_FLAGS(ldcp->lane_out.lstate); 3385 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3386 return; 3387 } 3388 3389 /* 3390 * To reduce the locking contention, release the 3391 * ldc_cblock here and re-acquire it once we are done 3392 * receiving packets. 3393 */ 3394 mutex_exit(&ldcp->ldc_cblock); 3395 mutex_enter(&ldcp->ldc_rxlock); 3396 3397 /* 3398 * Switch on vio_subtype envelope, then let lower routines 3399 * decide if its an INFO, ACK or NACK packet. 3400 */ 3401 if (env == VIO_DRING_DATA) { 3402 vsw_process_data_dring_pkt(ldcp, dpkt); 3403 } else if (env == VIO_PKT_DATA) { 3404 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3405 } else if (env == VIO_DESC_DATA) { 3406 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3407 } else { 3408 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3409 } 3410 3411 mutex_exit(&ldcp->ldc_rxlock); 3412 mutex_enter(&ldcp->ldc_cblock); 3413 3414 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3415 } 3416 3417 #define SND_DRING_NACK(ldcp, pkt) \ 3418 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3419 pkt->tag.vio_sid = ldcp->local_session; \ 3420 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3421 sizeof (vio_dring_msg_t), B_TRUE); 3422 3423 static void 3424 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3425 { 3426 vio_dring_msg_t *dring_pkt; 3427 vnet_public_desc_t desc, *pub_addr = NULL; 3428 vsw_private_desc_t *priv_addr = NULL; 3429 dring_info_t *dp = NULL; 3430 vsw_t *vswp = ldcp->ldc_vswp; 3431 mblk_t *mp = NULL; 3432 mblk_t *bp = NULL; 3433 mblk_t *bpt = NULL; 3434 size_t nbytes = 0; 3435 uint64_t chain = 0; 3436 uint64_t len; 3437 uint32_t pos, start; 3438 uint32_t range_start, range_end; 3439 int32_t end, num, cnt = 0; 3440 int i, rv, rng_rv = 0, msg_rv = 0; 3441 boolean_t prev_desc_ack = B_FALSE; 3442 int read_attempts = 0; 3443 struct ether_header *ehp; 3444 lane_t *lp = &ldcp->lane_out; 3445 3446 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3447 3448 /* 3449 * We know this is a data/dring packet so 3450 * cast it into the correct structure. 3451 */ 3452 dring_pkt = (vio_dring_msg_t *)dpkt; 3453 3454 /* 3455 * Switch on the vio_subtype. If its INFO then we need to 3456 * process the data. If its an ACK we need to make sure 3457 * it makes sense (i.e did we send an earlier data/info), 3458 * and if its a NACK then we maybe attempt a retry. 3459 */ 3460 switch (dring_pkt->tag.vio_subtype) { 3461 case VIO_SUBTYPE_INFO: 3462 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3463 3464 READ_ENTER(&ldcp->lane_in.dlistrw); 3465 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3466 dring_pkt->dring_ident)) == NULL) { 3467 RW_EXIT(&ldcp->lane_in.dlistrw); 3468 3469 DERR(vswp, "%s(%lld): unable to find dring from " 3470 "ident 0x%llx", __func__, ldcp->ldc_id, 3471 dring_pkt->dring_ident); 3472 3473 SND_DRING_NACK(ldcp, dring_pkt); 3474 return; 3475 } 3476 3477 start = pos = dring_pkt->start_idx; 3478 end = dring_pkt->end_idx; 3479 len = dp->num_descriptors; 3480 3481 range_start = range_end = pos; 3482 3483 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3484 __func__, ldcp->ldc_id, start, end); 3485 3486 if (end == -1) { 3487 num = -1; 3488 } else if (end >= 0) { 3489 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3490 3491 /* basic sanity check */ 3492 if (end > len) { 3493 RW_EXIT(&ldcp->lane_in.dlistrw); 3494 DERR(vswp, "%s(%lld): endpoint %lld outside " 3495 "ring length %lld", __func__, 3496 ldcp->ldc_id, end, len); 3497 3498 SND_DRING_NACK(ldcp, dring_pkt); 3499 return; 3500 } 3501 } else { 3502 RW_EXIT(&ldcp->lane_in.dlistrw); 3503 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3504 __func__, ldcp->ldc_id, end); 3505 SND_DRING_NACK(ldcp, dring_pkt); 3506 return; 3507 } 3508 3509 while (cnt != num) { 3510 vsw_recheck_desc: 3511 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3512 3513 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3514 &desc, dp->dring_mtype, dp->handle, 3515 pos, pos)) != 0) { 3516 DERR(vswp, "%s(%lld): unable to copy " 3517 "descriptor at pos %d: err %d", 3518 __func__, pos, ldcp->ldc_id, rng_rv); 3519 ldcp->ldc_stats.ierrors++; 3520 break; 3521 } 3522 3523 /* 3524 * When given a bounded range of descriptors 3525 * to process, its an error to hit a descriptor 3526 * which is not ready. In the non-bounded case 3527 * (end_idx == -1) this simply indicates we have 3528 * reached the end of the current active range. 3529 */ 3530 if (desc.hdr.dstate != VIO_DESC_READY) { 3531 /* unbound - no error */ 3532 if (end == -1) { 3533 if (read_attempts == vsw_read_attempts) 3534 break; 3535 3536 delay(drv_usectohz(vsw_desc_delay)); 3537 read_attempts++; 3538 goto vsw_recheck_desc; 3539 } 3540 3541 /* bounded - error - so NACK back */ 3542 RW_EXIT(&ldcp->lane_in.dlistrw); 3543 DERR(vswp, "%s(%lld): descriptor not READY " 3544 "(%d)", __func__, ldcp->ldc_id, 3545 desc.hdr.dstate); 3546 SND_DRING_NACK(ldcp, dring_pkt); 3547 return; 3548 } 3549 3550 DTRACE_PROBE1(read_attempts, int, read_attempts); 3551 3552 range_end = pos; 3553 3554 /* 3555 * If we ACK'd the previous descriptor then now 3556 * record the new range start position for later 3557 * ACK's. 3558 */ 3559 if (prev_desc_ack) { 3560 range_start = pos; 3561 3562 D2(vswp, "%s(%lld): updating range start to be " 3563 "%d", __func__, ldcp->ldc_id, range_start); 3564 3565 prev_desc_ack = B_FALSE; 3566 } 3567 3568 D2(vswp, "%s(%lld): processing desc %lld at pos" 3569 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3570 __func__, ldcp->ldc_id, pos, &desc, 3571 desc.hdr.dstate, desc.nbytes); 3572 3573 if ((desc.nbytes < ETHERMIN) || 3574 (desc.nbytes > lp->mtu)) { 3575 /* invalid size; drop the packet */ 3576 ldcp->ldc_stats.ierrors++; 3577 goto vsw_process_desc_done; 3578 } 3579 3580 /* 3581 * Ensure that we ask ldc for an aligned 3582 * number of bytes. Data is padded to align on 8 3583 * byte boundary, desc.nbytes is actual data length, 3584 * i.e. minus that padding. 3585 */ 3586 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3587 if (nbytes > ldcp->max_rxpool_size) { 3588 mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3589 BPRI_MED); 3590 } else { 3591 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3592 if (mp == NULL) { 3593 ldcp->ldc_stats.rx_vio_allocb_fail++; 3594 /* 3595 * No free receive buffers available, 3596 * so fallback onto allocb(9F). Make 3597 * sure that we get a data buffer which 3598 * is a multiple of 8 as this is 3599 * required by ldc_mem_copy. 3600 */ 3601 DTRACE_PROBE(allocb); 3602 mp = allocb(desc.nbytes + 3603 VNET_IPALIGN + 8, BPRI_MED); 3604 } 3605 } 3606 if (mp == NULL) { 3607 DERR(vswp, "%s(%ld): allocb failed", 3608 __func__, ldcp->ldc_id); 3609 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3610 dp->dring_mtype, dp->handle, pos, pos, 3611 VIO_DESC_DONE); 3612 ldcp->ldc_stats.ierrors++; 3613 ldcp->ldc_stats.rx_allocb_fail++; 3614 break; 3615 } 3616 3617 rv = ldc_mem_copy(ldcp->ldc_handle, 3618 (caddr_t)mp->b_rptr, 0, &nbytes, 3619 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3620 if (rv != 0) { 3621 DERR(vswp, "%s(%d): unable to copy in data " 3622 "from %d cookies in desc %d (rv %d)", 3623 __func__, ldcp->ldc_id, desc.ncookies, 3624 pos, rv); 3625 freemsg(mp); 3626 3627 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3628 dp->dring_mtype, dp->handle, pos, pos, 3629 VIO_DESC_DONE); 3630 ldcp->ldc_stats.ierrors++; 3631 break; 3632 } else { 3633 D2(vswp, "%s(%d): copied in %ld bytes" 3634 " using %d cookies", __func__, 3635 ldcp->ldc_id, nbytes, desc.ncookies); 3636 } 3637 3638 /* adjust the read pointer to skip over the padding */ 3639 mp->b_rptr += VNET_IPALIGN; 3640 3641 /* point to the actual end of data */ 3642 mp->b_wptr = mp->b_rptr + desc.nbytes; 3643 3644 /* update statistics */ 3645 ehp = (struct ether_header *)mp->b_rptr; 3646 if (IS_BROADCAST(ehp)) 3647 ldcp->ldc_stats.brdcstrcv++; 3648 else if (IS_MULTICAST(ehp)) 3649 ldcp->ldc_stats.multircv++; 3650 3651 ldcp->ldc_stats.ipackets++; 3652 ldcp->ldc_stats.rbytes += desc.nbytes; 3653 3654 /* 3655 * IPALIGN space can be used for VLAN_TAG 3656 */ 3657 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3658 VSW_VNETPORT, mp); 3659 3660 /* build a chain of received packets */ 3661 if (bp == NULL) { 3662 /* first pkt */ 3663 bp = mp; 3664 bp->b_next = bp->b_prev = NULL; 3665 bpt = bp; 3666 chain = 1; 3667 } else { 3668 mp->b_next = mp->b_prev = NULL; 3669 bpt->b_next = mp; 3670 bpt = mp; 3671 chain++; 3672 } 3673 3674 vsw_process_desc_done: 3675 /* mark we are finished with this descriptor */ 3676 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3677 dp->dring_mtype, dp->handle, pos, pos, 3678 VIO_DESC_DONE)) != 0) { 3679 DERR(vswp, "%s(%lld): unable to update " 3680 "dstate at pos %d: err %d", 3681 __func__, pos, ldcp->ldc_id, rng_rv); 3682 ldcp->ldc_stats.ierrors++; 3683 break; 3684 } 3685 3686 /* 3687 * Send an ACK back to peer if requested. 3688 */ 3689 if (desc.hdr.ack) { 3690 dring_pkt->start_idx = range_start; 3691 dring_pkt->end_idx = range_end; 3692 3693 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3694 " requested", __func__, ldcp->ldc_id, 3695 dring_pkt->start_idx, dring_pkt->end_idx); 3696 3697 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3698 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3699 dring_pkt->tag.vio_sid = ldcp->local_session; 3700 3701 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3702 sizeof (vio_dring_msg_t), B_FALSE); 3703 3704 /* 3705 * Check if ACK was successfully sent. If not 3706 * we break and deal with that below. 3707 */ 3708 if (msg_rv != 0) 3709 break; 3710 3711 prev_desc_ack = B_TRUE; 3712 range_start = pos; 3713 } 3714 3715 /* next descriptor */ 3716 pos = (pos + 1) % len; 3717 cnt++; 3718 3719 /* 3720 * Break out of loop here and stop processing to 3721 * allow some other network device (or disk) to 3722 * get access to the cpu. 3723 */ 3724 if (chain > vsw_chain_len) { 3725 D3(vswp, "%s(%lld): switching chain of %d " 3726 "msgs", __func__, ldcp->ldc_id, chain); 3727 break; 3728 } 3729 } 3730 RW_EXIT(&ldcp->lane_in.dlistrw); 3731 3732 /* send the chain of packets to be switched */ 3733 if (bp != NULL) { 3734 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3735 D3(vswp, "%s(%lld): switching chain of %d msgs", 3736 __func__, ldcp->ldc_id, chain); 3737 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3738 ldcp->ldc_port, NULL); 3739 } 3740 3741 /* 3742 * If when we encountered an error when attempting to 3743 * access an imported dring, initiate a connection reset. 3744 */ 3745 if (rng_rv != 0) { 3746 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3747 break; 3748 } 3749 3750 /* 3751 * If when we attempted to send the ACK we found that the 3752 * channel had been reset then now handle this. We deal with 3753 * it here as we cannot reset the channel while holding the 3754 * dlistrw lock, and we don't want to acquire/release it 3755 * continuously in the above loop, as a channel reset should 3756 * be a rare event. 3757 */ 3758 if (msg_rv == ECONNRESET) { 3759 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3760 break; 3761 } 3762 3763 DTRACE_PROBE1(msg_cnt, int, cnt); 3764 3765 /* 3766 * We are now finished so ACK back with the state 3767 * set to STOPPING so our peer knows we are finished 3768 */ 3769 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3770 dring_pkt->tag.vio_sid = ldcp->local_session; 3771 3772 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3773 3774 DTRACE_PROBE(stop_process_sent); 3775 3776 /* 3777 * We have not processed any more descriptors beyond 3778 * the last one we ACK'd. 3779 */ 3780 if (prev_desc_ack) 3781 range_start = range_end; 3782 3783 dring_pkt->start_idx = range_start; 3784 dring_pkt->end_idx = range_end; 3785 3786 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3787 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3788 dring_pkt->end_idx); 3789 3790 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3791 sizeof (vio_dring_msg_t), B_TRUE); 3792 break; 3793 3794 case VIO_SUBTYPE_ACK: 3795 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3796 /* 3797 * Verify that the relevant descriptors are all 3798 * marked as DONE 3799 */ 3800 READ_ENTER(&ldcp->lane_out.dlistrw); 3801 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3802 dring_pkt->dring_ident)) == NULL) { 3803 RW_EXIT(&ldcp->lane_out.dlistrw); 3804 DERR(vswp, "%s: unknown ident in ACK", __func__); 3805 return; 3806 } 3807 3808 start = end = 0; 3809 start = dring_pkt->start_idx; 3810 end = dring_pkt->end_idx; 3811 len = dp->num_descriptors; 3812 3813 3814 mutex_enter(&dp->dlock); 3815 dp->last_ack_recv = end; 3816 ldcp->ldc_stats.dring_data_acks++; 3817 mutex_exit(&dp->dlock); 3818 3819 (void) vsw_reclaim_dring(dp, start); 3820 3821 /* 3822 * If our peer is stopping processing descriptors then 3823 * we check to make sure it has processed all the descriptors 3824 * we have updated. If not then we send it a new message 3825 * to prompt it to restart. 3826 */ 3827 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3828 DTRACE_PROBE(stop_process_recv); 3829 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3830 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3831 dring_pkt->end_idx); 3832 3833 /* 3834 * Check next descriptor in public section of ring. 3835 * If its marked as READY then we need to prompt our 3836 * peer to start processing the ring again. 3837 */ 3838 i = (end + 1) % len; 3839 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3840 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3841 3842 /* 3843 * Hold the restart lock across all of this to 3844 * make sure that its not possible for us to 3845 * decide that a msg needs to be sent in the future 3846 * but the sending code having already checked is 3847 * about to exit. 3848 */ 3849 mutex_enter(&dp->restart_lock); 3850 ldcp->ldc_stats.dring_stopped_acks++; 3851 mutex_enter(&priv_addr->dstate_lock); 3852 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3853 3854 mutex_exit(&priv_addr->dstate_lock); 3855 3856 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 3857 dring_pkt->tag.vio_sid = ldcp->local_session; 3858 3859 dring_pkt->start_idx = (end + 1) % len; 3860 dring_pkt->end_idx = -1; 3861 3862 D2(vswp, "%s(%lld) : sending restart msg:" 3863 " %d : %d", __func__, ldcp->ldc_id, 3864 dring_pkt->start_idx, dring_pkt->end_idx); 3865 3866 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3867 sizeof (vio_dring_msg_t), B_FALSE); 3868 ldcp->ldc_stats.dring_data_msgs++; 3869 3870 } else { 3871 mutex_exit(&priv_addr->dstate_lock); 3872 dp->restart_reqd = B_TRUE; 3873 } 3874 mutex_exit(&dp->restart_lock); 3875 } 3876 RW_EXIT(&ldcp->lane_out.dlistrw); 3877 3878 /* only do channel reset after dropping dlistrw lock */ 3879 if (msg_rv == ECONNRESET) 3880 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3881 3882 break; 3883 3884 case VIO_SUBTYPE_NACK: 3885 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 3886 __func__, ldcp->ldc_id); 3887 /* 3888 * Something is badly wrong if we are getting NACK's 3889 * for our data pkts. So reset the channel. 3890 */ 3891 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3892 3893 break; 3894 3895 default: 3896 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 3897 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 3898 } 3899 3900 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3901 } 3902 3903 /* 3904 * dummy pkt data handler function for vnet protocol version 1.0 3905 */ 3906 static void 3907 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 3908 { 3909 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 3910 } 3911 3912 /* 3913 * This function handles raw pkt data messages received over the channel. 3914 * Currently, only priority-eth-type frames are received through this mechanism. 3915 * In this case, the frame(data) is present within the message itself which 3916 * is copied into an mblk before switching it. 3917 */ 3918 static void 3919 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 3920 { 3921 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 3922 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 3923 uint32_t size; 3924 mblk_t *mp; 3925 vsw_t *vswp = ldcp->ldc_vswp; 3926 vgen_stats_t *statsp = &ldcp->ldc_stats; 3927 lane_t *lp = &ldcp->lane_out; 3928 3929 size = msglen - VIO_PKT_DATA_HDRSIZE; 3930 if (size < ETHERMIN || size > lp->mtu) { 3931 (void) atomic_inc_32(&statsp->rx_pri_fail); 3932 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 3933 ldcp->ldc_id, size); 3934 return; 3935 } 3936 3937 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 3938 if (mp == NULL) { 3939 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 3940 if (mp == NULL) { 3941 (void) atomic_inc_32(&statsp->rx_pri_fail); 3942 DWARN(vswp, "%s(%lld) allocb failure, " 3943 "unable to process priority frame\n", __func__, 3944 ldcp->ldc_id); 3945 return; 3946 } 3947 } 3948 3949 /* skip over the extra space for vlan tag */ 3950 mp->b_rptr += VLAN_TAGSZ; 3951 3952 /* copy the frame from the payload of raw data msg into the mblk */ 3953 bcopy(dpkt->data, mp->b_rptr, size); 3954 mp->b_wptr = mp->b_rptr + size; 3955 3956 /* update stats */ 3957 (void) atomic_inc_64(&statsp->rx_pri_packets); 3958 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 3959 3960 /* 3961 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 3962 */ 3963 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 3964 3965 /* switch the frame to destination */ 3966 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 3967 } 3968 3969 /* 3970 * Process an in-band descriptor message (most likely from 3971 * OBP). 3972 */ 3973 static void 3974 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 3975 { 3976 vnet_ibnd_desc_t *ibnd_desc; 3977 dring_info_t *dp = NULL; 3978 vsw_private_desc_t *priv_addr = NULL; 3979 vsw_t *vswp = ldcp->ldc_vswp; 3980 mblk_t *mp = NULL; 3981 size_t nbytes = 0; 3982 size_t off = 0; 3983 uint64_t idx = 0; 3984 uint32_t num = 1, len, datalen = 0; 3985 uint64_t ncookies = 0; 3986 int i, rv; 3987 int j = 0; 3988 3989 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3990 3991 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 3992 3993 switch (ibnd_desc->hdr.tag.vio_subtype) { 3994 case VIO_SUBTYPE_INFO: 3995 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3996 3997 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3998 return; 3999 4000 /* 4001 * Data is padded to align on a 8 byte boundary, 4002 * nbytes is actual data length, i.e. minus that 4003 * padding. 4004 */ 4005 datalen = ibnd_desc->nbytes; 4006 4007 D2(vswp, "%s(%lld): processing inband desc : " 4008 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4009 4010 ncookies = ibnd_desc->ncookies; 4011 4012 /* 4013 * allocb(9F) returns an aligned data block. We 4014 * need to ensure that we ask ldc for an aligned 4015 * number of bytes also. 4016 */ 4017 nbytes = datalen; 4018 if (nbytes & 0x7) { 4019 off = 8 - (nbytes & 0x7); 4020 nbytes += off; 4021 } 4022 4023 /* alloc extra space for VLAN_TAG */ 4024 mp = allocb(datalen + 8, BPRI_MED); 4025 if (mp == NULL) { 4026 DERR(vswp, "%s(%lld): allocb failed", 4027 __func__, ldcp->ldc_id); 4028 ldcp->ldc_stats.rx_allocb_fail++; 4029 return; 4030 } 4031 4032 /* skip over the extra space for VLAN_TAG */ 4033 mp->b_rptr += 8; 4034 4035 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4036 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4037 LDC_COPY_IN); 4038 4039 if (rv != 0) { 4040 DERR(vswp, "%s(%d): unable to copy in data from " 4041 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 4042 freemsg(mp); 4043 ldcp->ldc_stats.ierrors++; 4044 return; 4045 } 4046 4047 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 4048 __func__, ldcp->ldc_id, nbytes, ncookies); 4049 4050 /* point to the actual end of data */ 4051 mp->b_wptr = mp->b_rptr + datalen; 4052 ldcp->ldc_stats.ipackets++; 4053 ldcp->ldc_stats.rbytes += datalen; 4054 4055 /* 4056 * We ACK back every in-band descriptor message we process 4057 */ 4058 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4059 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4060 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 4061 sizeof (vnet_ibnd_desc_t), B_TRUE); 4062 4063 /* 4064 * there is extra space alloc'd for VLAN_TAG 4065 */ 4066 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4067 4068 /* send the packet to be switched */ 4069 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4070 ldcp->ldc_port, NULL); 4071 4072 break; 4073 4074 case VIO_SUBTYPE_ACK: 4075 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4076 4077 /* Verify the ACK is valid */ 4078 idx = ibnd_desc->hdr.desc_handle; 4079 4080 if (idx >= vsw_ntxds) { 4081 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 4082 "(idx %ld)", vswp->instance, idx); 4083 return; 4084 } 4085 4086 if ((dp = ldcp->lane_out.dringp) == NULL) { 4087 DERR(vswp, "%s: no dring found", __func__); 4088 return; 4089 } 4090 4091 len = dp->num_descriptors; 4092 /* 4093 * If the descriptor we are being ACK'ed for is not the 4094 * one we expected, then pkts were lost somwhere, either 4095 * when we tried to send a msg, or a previous ACK msg from 4096 * our peer. In either case we now reclaim the descriptors 4097 * in the range from the last ACK we received up to the 4098 * current ACK. 4099 */ 4100 if (idx != dp->last_ack_recv) { 4101 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 4102 __func__, dp->last_ack_recv, idx); 4103 num = idx >= dp->last_ack_recv ? 4104 idx - dp->last_ack_recv + 1: 4105 (len - dp->last_ack_recv + 1) + idx; 4106 } 4107 4108 /* 4109 * When we sent the in-band message to our peer we 4110 * marked the copy in our private ring as READY. We now 4111 * check that the descriptor we are being ACK'ed for is in 4112 * fact READY, i.e. it is one we have shared with our peer. 4113 * 4114 * If its not we flag an error, but still reset the descr 4115 * back to FREE. 4116 */ 4117 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 4118 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4119 mutex_enter(&priv_addr->dstate_lock); 4120 if (priv_addr->dstate != VIO_DESC_READY) { 4121 DERR(vswp, "%s: (%ld) desc at index %ld not " 4122 "READY (0x%lx)", __func__, 4123 ldcp->ldc_id, idx, priv_addr->dstate); 4124 DERR(vswp, "%s: bound %d: ncookies %ld : " 4125 "datalen %ld", __func__, 4126 priv_addr->bound, priv_addr->ncookies, 4127 priv_addr->datalen); 4128 } 4129 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4130 ldcp->ldc_id, idx); 4131 /* release resources associated with sent msg */ 4132 priv_addr->datalen = 0; 4133 priv_addr->dstate = VIO_DESC_FREE; 4134 mutex_exit(&priv_addr->dstate_lock); 4135 } 4136 /* update to next expected value */ 4137 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 4138 4139 break; 4140 4141 case VIO_SUBTYPE_NACK: 4142 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4143 4144 /* 4145 * We should only get a NACK if our peer doesn't like 4146 * something about a message we have sent it. If this 4147 * happens we just release the resources associated with 4148 * the message. (We are relying on higher layers to decide 4149 * whether or not to resend. 4150 */ 4151 4152 /* limit check */ 4153 idx = ibnd_desc->hdr.desc_handle; 4154 4155 if (idx >= vsw_ntxds) { 4156 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4157 __func__, idx); 4158 return; 4159 } 4160 4161 if ((dp = ldcp->lane_out.dringp) == NULL) { 4162 DERR(vswp, "%s: no dring found", __func__); 4163 return; 4164 } 4165 4166 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4167 4168 /* move to correct location in ring */ 4169 priv_addr += idx; 4170 4171 /* release resources associated with sent msg */ 4172 mutex_enter(&priv_addr->dstate_lock); 4173 priv_addr->datalen = 0; 4174 priv_addr->dstate = VIO_DESC_FREE; 4175 mutex_exit(&priv_addr->dstate_lock); 4176 4177 break; 4178 4179 default: 4180 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4181 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4182 } 4183 4184 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4185 } 4186 4187 static void 4188 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 4189 { 4190 _NOTE(ARGUNUSED(epkt)) 4191 4192 vsw_t *vswp = ldcp->ldc_vswp; 4193 uint16_t env = tagp->vio_subtype_env; 4194 4195 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4196 4197 /* 4198 * Error vio_subtypes have yet to be defined. So for 4199 * the moment we can't do anything. 4200 */ 4201 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4202 4203 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4204 } 4205 4206 /* transmit the packet over the given port */ 4207 int 4208 vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, uint32_t count) 4209 { 4210 vsw_ldc_list_t *ldcl = &port->p_ldclist; 4211 vsw_ldc_t *ldcp; 4212 int status = 0; 4213 uint32_t n; 4214 4215 READ_ENTER(&ldcl->lockrw); 4216 /* 4217 * Note for now, we have a single channel. 4218 */ 4219 ldcp = ldcl->head; 4220 if (ldcp == NULL) { 4221 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 4222 freemsgchain(mp); 4223 RW_EXIT(&ldcl->lockrw); 4224 return (1); 4225 } 4226 4227 n = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 4228 4229 count -= n; 4230 if (count == 0) { 4231 goto vsw_portsend_exit; 4232 } 4233 4234 status = ldcp->tx(ldcp, mp, mpt, count); 4235 4236 vsw_portsend_exit: 4237 RW_EXIT(&ldcl->lockrw); 4238 4239 return (status); 4240 } 4241 4242 /* 4243 * Break up frames into 2 seperate chains: normal and 4244 * priority, based on the frame type. The number of 4245 * priority frames is also counted and returned. 4246 * 4247 * Params: 4248 * vswp: pointer to the instance of vsw 4249 * np: head of packet chain to be broken 4250 * npt: tail of packet chain to be broken 4251 * 4252 * Returns: 4253 * np: head of normal data packets 4254 * npt: tail of normal data packets 4255 * hp: head of high priority packets 4256 * hpt: tail of high priority packets 4257 */ 4258 static uint32_t 4259 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4260 mblk_t **hp, mblk_t **hpt) 4261 { 4262 mblk_t *tmp = NULL; 4263 mblk_t *smp = NULL; 4264 mblk_t *hmp = NULL; /* high prio pkts head */ 4265 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4266 mblk_t *nmp = NULL; /* normal pkts head */ 4267 mblk_t *nmpt = NULL; /* normal pkts tail */ 4268 uint32_t count = 0; 4269 int i; 4270 struct ether_header *ehp; 4271 uint32_t num_types; 4272 uint16_t *types; 4273 4274 tmp = *np; 4275 while (tmp != NULL) { 4276 4277 smp = tmp; 4278 tmp = tmp->b_next; 4279 smp->b_next = NULL; 4280 smp->b_prev = NULL; 4281 4282 ehp = (struct ether_header *)smp->b_rptr; 4283 num_types = vswp->pri_num_types; 4284 types = vswp->pri_types; 4285 for (i = 0; i < num_types; i++) { 4286 if (ehp->ether_type == types[i]) { 4287 /* high priority frame */ 4288 4289 if (hmp != NULL) { 4290 hmpt->b_next = smp; 4291 hmpt = smp; 4292 } else { 4293 hmp = hmpt = smp; 4294 } 4295 count++; 4296 break; 4297 } 4298 } 4299 if (i == num_types) { 4300 /* normal data frame */ 4301 4302 if (nmp != NULL) { 4303 nmpt->b_next = smp; 4304 nmpt = smp; 4305 } else { 4306 nmp = nmpt = smp; 4307 } 4308 } 4309 } 4310 4311 *hp = hmp; 4312 *hpt = hmpt; 4313 *np = nmp; 4314 *npt = nmpt; 4315 4316 return (count); 4317 } 4318 4319 /* 4320 * Wrapper function to transmit normal and/or priority frames over the channel. 4321 */ 4322 static int 4323 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4324 { 4325 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4326 mblk_t *tmp; 4327 mblk_t *smp; 4328 mblk_t *hmp; /* high prio pkts head */ 4329 mblk_t *hmpt; /* high prio pkts tail */ 4330 mblk_t *nmp; /* normal pkts head */ 4331 mblk_t *nmpt; /* normal pkts tail */ 4332 uint32_t n = 0; 4333 vsw_t *vswp = ldcp->ldc_vswp; 4334 4335 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4336 ASSERT(count != 0); 4337 4338 nmp = mp; 4339 nmpt = mpt; 4340 4341 /* gather any priority frames from the chain of packets */ 4342 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4343 4344 /* transmit priority frames */ 4345 tmp = hmp; 4346 while (tmp != NULL) { 4347 smp = tmp; 4348 tmp = tmp->b_next; 4349 smp->b_next = NULL; 4350 vsw_ldcsend_pkt(ldcp, smp); 4351 } 4352 4353 count -= n; 4354 4355 if (count == 0) { 4356 /* no normal data frames to process */ 4357 return (0); 4358 } 4359 4360 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4361 } 4362 4363 /* 4364 * Wrapper function to transmit normal frames over the channel. 4365 */ 4366 static int 4367 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4368 { 4369 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4370 mblk_t *tmp = NULL; 4371 4372 ASSERT(count != 0); 4373 /* 4374 * If the TX thread is enabled, then queue the 4375 * ordinary frames and signal the tx thread. 4376 */ 4377 if (ldcp->tx_thread != NULL) { 4378 4379 mutex_enter(&ldcp->tx_thr_lock); 4380 4381 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4382 /* 4383 * If we reached queue limit, 4384 * do not queue new packets, 4385 * drop them. 4386 */ 4387 ldcp->ldc_stats.tx_qfull += count; 4388 mutex_exit(&ldcp->tx_thr_lock); 4389 freemsgchain(mp); 4390 goto exit; 4391 } 4392 if (ldcp->tx_mhead == NULL) { 4393 ldcp->tx_mhead = mp; 4394 ldcp->tx_mtail = mpt; 4395 cv_signal(&ldcp->tx_thr_cv); 4396 } else { 4397 ldcp->tx_mtail->b_next = mp; 4398 ldcp->tx_mtail = mpt; 4399 } 4400 ldcp->tx_cnt += count; 4401 mutex_exit(&ldcp->tx_thr_lock); 4402 } else { 4403 while (mp != NULL) { 4404 tmp = mp->b_next; 4405 mp->b_next = mp->b_prev = NULL; 4406 (void) vsw_ldcsend(ldcp, mp, 1); 4407 mp = tmp; 4408 } 4409 } 4410 4411 exit: 4412 return (0); 4413 } 4414 4415 /* 4416 * This function transmits the frame in the payload of a raw data 4417 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4418 * send special frames with high priorities, without going through 4419 * the normal data path which uses descriptor ring mechanism. 4420 */ 4421 static void 4422 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4423 { 4424 vio_raw_data_msg_t *pkt; 4425 mblk_t *bp; 4426 mblk_t *nmp = NULL; 4427 caddr_t dst; 4428 uint32_t mblksz; 4429 uint32_t size; 4430 uint32_t nbytes; 4431 int rv; 4432 vsw_t *vswp = ldcp->ldc_vswp; 4433 vgen_stats_t *statsp = &ldcp->ldc_stats; 4434 4435 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4436 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4437 (void) atomic_inc_32(&statsp->tx_pri_fail); 4438 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4439 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4440 ldcp->lane_out.lstate); 4441 goto send_pkt_exit; 4442 } 4443 4444 size = msgsize(mp); 4445 4446 /* frame size bigger than available payload len of raw data msg ? */ 4447 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4448 (void) atomic_inc_32(&statsp->tx_pri_fail); 4449 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4450 ldcp->ldc_id, size); 4451 goto send_pkt_exit; 4452 } 4453 4454 if (size < ETHERMIN) 4455 size = ETHERMIN; 4456 4457 /* alloc space for a raw data message */ 4458 nmp = vio_allocb(vswp->pri_tx_vmp); 4459 if (nmp == NULL) { 4460 (void) atomic_inc_32(&statsp->tx_pri_fail); 4461 DWARN(vswp, "vio_allocb failed\n"); 4462 goto send_pkt_exit; 4463 } 4464 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4465 4466 /* copy frame into the payload of raw data message */ 4467 dst = (caddr_t)pkt->data; 4468 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4469 mblksz = MBLKL(bp); 4470 bcopy(bp->b_rptr, dst, mblksz); 4471 dst += mblksz; 4472 } 4473 4474 /* setup the raw data msg */ 4475 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4476 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4477 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4478 pkt->tag.vio_sid = ldcp->local_session; 4479 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4480 4481 /* send the msg over ldc */ 4482 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4483 if (rv != 0) { 4484 (void) atomic_inc_32(&statsp->tx_pri_fail); 4485 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4486 ldcp->ldc_id); 4487 goto send_pkt_exit; 4488 } 4489 4490 /* update stats */ 4491 (void) atomic_inc_64(&statsp->tx_pri_packets); 4492 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4493 4494 send_pkt_exit: 4495 if (nmp != NULL) 4496 freemsg(nmp); 4497 freemsg(mp); 4498 } 4499 4500 /* 4501 * Transmit the packet over the given LDC channel. 4502 * 4503 * The 'retries' argument indicates how many times a packet 4504 * is retried before it is dropped. Note, the retry is done 4505 * only for a resource related failure, for all other failures 4506 * the packet is dropped immediately. 4507 */ 4508 static int 4509 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4510 { 4511 int i; 4512 int rc; 4513 int status = 0; 4514 vsw_port_t *port = ldcp->ldc_port; 4515 dring_info_t *dp = NULL; 4516 4517 4518 for (i = 0; i < retries; ) { 4519 /* 4520 * Send the message out using the appropriate 4521 * transmit function which will free mblock when it 4522 * is finished with it. 4523 */ 4524 mutex_enter(&port->tx_lock); 4525 if (port->transmit != NULL) { 4526 status = (*port->transmit)(ldcp, mp); 4527 } 4528 if (status == LDC_TX_SUCCESS) { 4529 mutex_exit(&port->tx_lock); 4530 break; 4531 } 4532 i++; /* increment the counter here */ 4533 4534 /* If its the last retry, then update the oerror */ 4535 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4536 ldcp->ldc_stats.oerrors++; 4537 } 4538 mutex_exit(&port->tx_lock); 4539 4540 if (status != LDC_TX_NORESOURCES) { 4541 /* 4542 * No retrying required for errors un-related 4543 * to resources. 4544 */ 4545 break; 4546 } 4547 READ_ENTER(&ldcp->lane_out.dlistrw); 4548 if (((dp = ldcp->lane_out.dringp) != NULL) && 4549 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4550 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4551 ((VSW_VER_LT(ldcp, 1, 2) && 4552 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4553 rc = vsw_reclaim_dring(dp, dp->end_idx); 4554 } else { 4555 /* 4556 * If there is no dring or the xfer_mode is 4557 * set to DESC_MODE(ie., OBP), then simply break here. 4558 */ 4559 RW_EXIT(&ldcp->lane_out.dlistrw); 4560 break; 4561 } 4562 RW_EXIT(&ldcp->lane_out.dlistrw); 4563 4564 /* 4565 * Delay only if none were reclaimed 4566 * and its not the last retry. 4567 */ 4568 if ((rc == 0) && (i < retries)) { 4569 delay(drv_usectohz(vsw_ldc_tx_delay)); 4570 } 4571 } 4572 freemsg(mp); 4573 return (status); 4574 } 4575 4576 /* 4577 * Send packet out via descriptor ring to a logical device. 4578 */ 4579 static int 4580 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4581 { 4582 vio_dring_msg_t dring_pkt; 4583 dring_info_t *dp = NULL; 4584 vsw_private_desc_t *priv_desc = NULL; 4585 vnet_public_desc_t *pub = NULL; 4586 vsw_t *vswp = ldcp->ldc_vswp; 4587 mblk_t *bp; 4588 size_t n, size; 4589 caddr_t bufp; 4590 int idx; 4591 int status = LDC_TX_SUCCESS; 4592 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4593 lane_t *lp = &ldcp->lane_out; 4594 4595 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4596 4597 /* TODO: make test a macro */ 4598 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4599 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4600 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4601 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4602 ldcp->lane_out.lstate); 4603 ldcp->ldc_stats.oerrors++; 4604 return (LDC_TX_FAILURE); 4605 } 4606 4607 /* 4608 * Note - using first ring only, this may change 4609 * in the future. 4610 */ 4611 READ_ENTER(&ldcp->lane_out.dlistrw); 4612 if ((dp = ldcp->lane_out.dringp) == NULL) { 4613 RW_EXIT(&ldcp->lane_out.dlistrw); 4614 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4615 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4616 ldcp->ldc_stats.oerrors++; 4617 return (LDC_TX_FAILURE); 4618 } 4619 4620 size = msgsize(mp); 4621 if (size > (size_t)lp->mtu) { 4622 RW_EXIT(&ldcp->lane_out.dlistrw); 4623 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4624 ldcp->ldc_id, size); 4625 ldcp->ldc_stats.oerrors++; 4626 return (LDC_TX_FAILURE); 4627 } 4628 4629 /* 4630 * Find a free descriptor 4631 * 4632 * Note: for the moment we are assuming that we will only 4633 * have one dring going from the switch to each of its 4634 * peers. This may change in the future. 4635 */ 4636 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4637 D2(vswp, "%s(%lld): no descriptor available for ring " 4638 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4639 4640 /* nothing more we can do */ 4641 status = LDC_TX_NORESOURCES; 4642 ldcp->ldc_stats.tx_no_desc++; 4643 goto vsw_dringsend_free_exit; 4644 } else { 4645 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4646 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4647 } 4648 4649 /* copy data into the descriptor */ 4650 bufp = priv_desc->datap; 4651 bufp += VNET_IPALIGN; 4652 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4653 n = MBLKL(bp); 4654 bcopy(bp->b_rptr, bufp, n); 4655 bufp += n; 4656 } 4657 4658 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4659 4660 pub = priv_desc->descp; 4661 pub->nbytes = priv_desc->datalen; 4662 4663 /* update statistics */ 4664 if (IS_BROADCAST(ehp)) 4665 ldcp->ldc_stats.brdcstxmt++; 4666 else if (IS_MULTICAST(ehp)) 4667 ldcp->ldc_stats.multixmt++; 4668 ldcp->ldc_stats.opackets++; 4669 ldcp->ldc_stats.obytes += priv_desc->datalen; 4670 4671 mutex_enter(&priv_desc->dstate_lock); 4672 pub->hdr.dstate = VIO_DESC_READY; 4673 mutex_exit(&priv_desc->dstate_lock); 4674 4675 /* 4676 * Determine whether or not we need to send a message to our 4677 * peer prompting them to read our newly updated descriptor(s). 4678 */ 4679 mutex_enter(&dp->restart_lock); 4680 if (dp->restart_reqd) { 4681 dp->restart_reqd = B_FALSE; 4682 ldcp->ldc_stats.dring_data_msgs++; 4683 mutex_exit(&dp->restart_lock); 4684 4685 /* 4686 * Send a vio_dring_msg to peer to prompt them to read 4687 * the updated descriptor ring. 4688 */ 4689 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4690 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4691 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4692 dring_pkt.tag.vio_sid = ldcp->local_session; 4693 4694 /* Note - for now using first ring */ 4695 dring_pkt.dring_ident = dp->ident; 4696 4697 /* 4698 * If last_ack_recv is -1 then we know we've not 4699 * received any ack's yet, so this must be the first 4700 * msg sent, so set the start to the begining of the ring. 4701 */ 4702 mutex_enter(&dp->dlock); 4703 if (dp->last_ack_recv == -1) { 4704 dring_pkt.start_idx = 0; 4705 } else { 4706 dring_pkt.start_idx = 4707 (dp->last_ack_recv + 1) % dp->num_descriptors; 4708 } 4709 dring_pkt.end_idx = -1; 4710 mutex_exit(&dp->dlock); 4711 4712 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4713 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4714 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4715 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4716 dring_pkt.end_idx); 4717 4718 RW_EXIT(&ldcp->lane_out.dlistrw); 4719 4720 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4721 sizeof (vio_dring_msg_t), B_TRUE); 4722 4723 return (status); 4724 4725 } else { 4726 mutex_exit(&dp->restart_lock); 4727 D2(vswp, "%s(%lld): updating descp %d", __func__, 4728 ldcp->ldc_id, idx); 4729 } 4730 4731 vsw_dringsend_free_exit: 4732 4733 RW_EXIT(&ldcp->lane_out.dlistrw); 4734 4735 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4736 return (status); 4737 } 4738 4739 /* 4740 * Send an in-band descriptor message over ldc. 4741 */ 4742 static int 4743 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4744 { 4745 vsw_t *vswp = ldcp->ldc_vswp; 4746 vnet_ibnd_desc_t ibnd_msg; 4747 vsw_private_desc_t *priv_desc = NULL; 4748 dring_info_t *dp = NULL; 4749 size_t n, size = 0; 4750 caddr_t bufp; 4751 mblk_t *bp; 4752 int idx, i; 4753 int status = LDC_TX_SUCCESS; 4754 static int warn_msg = 1; 4755 lane_t *lp = &ldcp->lane_out; 4756 4757 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4758 4759 ASSERT(mp != NULL); 4760 4761 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4762 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4763 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4764 __func__, ldcp->ldc_id, ldcp->ldc_status, 4765 ldcp->lane_out.lstate); 4766 ldcp->ldc_stats.oerrors++; 4767 return (LDC_TX_FAILURE); 4768 } 4769 4770 /* 4771 * only expect single dring to exist, which we use 4772 * as an internal buffer, rather than a transfer channel. 4773 */ 4774 READ_ENTER(&ldcp->lane_out.dlistrw); 4775 if ((dp = ldcp->lane_out.dringp) == NULL) { 4776 DERR(vswp, "%s(%lld): no dring for outbound lane", 4777 __func__, ldcp->ldc_id); 4778 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4779 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4780 RW_EXIT(&ldcp->lane_out.dlistrw); 4781 ldcp->ldc_stats.oerrors++; 4782 return (LDC_TX_FAILURE); 4783 } 4784 4785 size = msgsize(mp); 4786 if (size > (size_t)lp->mtu) { 4787 RW_EXIT(&ldcp->lane_out.dlistrw); 4788 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4789 ldcp->ldc_id, size); 4790 ldcp->ldc_stats.oerrors++; 4791 return (LDC_TX_FAILURE); 4792 } 4793 4794 /* 4795 * Find a free descriptor in our buffer ring 4796 */ 4797 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4798 RW_EXIT(&ldcp->lane_out.dlistrw); 4799 if (warn_msg) { 4800 DERR(vswp, "%s(%lld): no descriptor available for ring " 4801 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4802 warn_msg = 0; 4803 } 4804 4805 /* nothing more we can do */ 4806 status = LDC_TX_NORESOURCES; 4807 goto vsw_descrsend_free_exit; 4808 } else { 4809 D2(vswp, "%s(%lld): free private descriptor found at pos " 4810 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4811 warn_msg = 1; 4812 } 4813 4814 /* copy data into the descriptor */ 4815 bufp = priv_desc->datap; 4816 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4817 n = MBLKL(bp); 4818 bcopy(bp->b_rptr, bufp, n); 4819 bufp += n; 4820 } 4821 4822 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4823 4824 /* create and send the in-band descp msg */ 4825 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4826 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4827 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4828 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4829 4830 /* 4831 * Copy the mem cookies describing the data from the 4832 * private region of the descriptor ring into the inband 4833 * descriptor. 4834 */ 4835 for (i = 0; i < priv_desc->ncookies; i++) { 4836 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4837 sizeof (ldc_mem_cookie_t)); 4838 } 4839 4840 ibnd_msg.hdr.desc_handle = idx; 4841 ibnd_msg.ncookies = priv_desc->ncookies; 4842 ibnd_msg.nbytes = size; 4843 4844 ldcp->ldc_stats.opackets++; 4845 ldcp->ldc_stats.obytes += size; 4846 4847 RW_EXIT(&ldcp->lane_out.dlistrw); 4848 4849 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4850 sizeof (vnet_ibnd_desc_t), B_TRUE); 4851 4852 vsw_descrsend_free_exit: 4853 4854 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4855 return (status); 4856 } 4857 4858 static void 4859 vsw_send_ver(void *arg) 4860 { 4861 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4862 vsw_t *vswp = ldcp->ldc_vswp; 4863 lane_t *lp = &ldcp->lane_out; 4864 vio_ver_msg_t ver_msg; 4865 4866 D1(vswp, "%s enter", __func__); 4867 4868 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4869 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4870 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 4871 ver_msg.tag.vio_sid = ldcp->local_session; 4872 4873 if (vsw_obp_ver_proto_workaround == B_FALSE) { 4874 ver_msg.ver_major = vsw_versions[0].ver_major; 4875 ver_msg.ver_minor = vsw_versions[0].ver_minor; 4876 } else { 4877 /* use the major,minor that we've ack'd */ 4878 lane_t *lpi = &ldcp->lane_in; 4879 ver_msg.ver_major = lpi->ver_major; 4880 ver_msg.ver_minor = lpi->ver_minor; 4881 } 4882 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 4883 4884 lp->lstate |= VSW_VER_INFO_SENT; 4885 lp->ver_major = ver_msg.ver_major; 4886 lp->ver_minor = ver_msg.ver_minor; 4887 4888 DUMP_TAG(ver_msg.tag); 4889 4890 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 4891 4892 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 4893 } 4894 4895 static void 4896 vsw_send_attr(vsw_ldc_t *ldcp) 4897 { 4898 vsw_t *vswp = ldcp->ldc_vswp; 4899 lane_t *lp = &ldcp->lane_out; 4900 vnet_attr_msg_t attr_msg; 4901 4902 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 4903 4904 /* 4905 * Subtype is set to INFO by default 4906 */ 4907 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 4908 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 4909 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 4910 attr_msg.tag.vio_sid = ldcp->local_session; 4911 4912 /* payload copied from default settings for lane */ 4913 attr_msg.mtu = lp->mtu; 4914 attr_msg.addr_type = lp->addr_type; 4915 attr_msg.xfer_mode = lp->xfer_mode; 4916 attr_msg.ack_freq = lp->xfer_mode; 4917 4918 READ_ENTER(&vswp->if_lockrw); 4919 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 4920 RW_EXIT(&vswp->if_lockrw); 4921 4922 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 4923 4924 DUMP_TAG(attr_msg.tag); 4925 4926 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 4927 4928 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 4929 } 4930 4931 /* 4932 * Create dring info msg (which also results in the creation of 4933 * a dring). 4934 */ 4935 static vio_dring_reg_msg_t * 4936 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 4937 { 4938 vio_dring_reg_msg_t *mp; 4939 dring_info_t *dp; 4940 vsw_t *vswp = ldcp->ldc_vswp; 4941 int rv; 4942 4943 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 4944 4945 /* 4946 * If we can't create a dring, obviously no point sending 4947 * a message. 4948 */ 4949 if ((dp = vsw_create_dring(ldcp)) == NULL) 4950 return (NULL); 4951 4952 /* Allocate pools of receive mblks */ 4953 rv = vsw_init_multipools(ldcp, vswp); 4954 if (rv) { 4955 DWARN(vswp, "%s: unable to create free mblk pools for" 4956 " channel %ld (rv %d)", __func__, ldcp->ldc_id, rv); 4957 vsw_free_lane_resources(ldcp, OUTBOUND); 4958 return (NULL); 4959 } 4960 4961 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 4962 4963 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 4964 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 4965 mp->tag.vio_subtype_env = VIO_DRING_REG; 4966 mp->tag.vio_sid = ldcp->local_session; 4967 4968 /* payload */ 4969 mp->num_descriptors = dp->num_descriptors; 4970 mp->descriptor_size = dp->descriptor_size; 4971 mp->options = dp->options; 4972 mp->ncookies = dp->ncookies; 4973 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 4974 4975 mp->dring_ident = 0; 4976 4977 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 4978 4979 return (mp); 4980 } 4981 4982 static void 4983 vsw_send_dring_info(vsw_ldc_t *ldcp) 4984 { 4985 vio_dring_reg_msg_t *dring_msg; 4986 vsw_t *vswp = ldcp->ldc_vswp; 4987 4988 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 4989 4990 dring_msg = vsw_create_dring_info_pkt(ldcp); 4991 if (dring_msg == NULL) { 4992 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 4993 vswp->instance, __func__); 4994 return; 4995 } 4996 4997 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 4998 4999 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 5000 5001 (void) vsw_send_msg(ldcp, dring_msg, 5002 sizeof (vio_dring_reg_msg_t), B_TRUE); 5003 5004 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 5005 5006 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 5007 } 5008 5009 static void 5010 vsw_send_rdx(vsw_ldc_t *ldcp) 5011 { 5012 vsw_t *vswp = ldcp->ldc_vswp; 5013 vio_rdx_msg_t rdx_msg; 5014 5015 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5016 5017 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5018 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5019 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5020 rdx_msg.tag.vio_sid = ldcp->local_session; 5021 5022 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 5023 5024 DUMP_TAG(rdx_msg.tag); 5025 5026 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 5027 5028 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5029 } 5030 5031 /* 5032 * Generic routine to send message out over ldc channel. 5033 * 5034 * It is possible that when we attempt to write over the ldc channel 5035 * that we get notified that it has been reset. Depending on the value 5036 * of the handle_reset flag we either handle that event here or simply 5037 * notify the caller that the channel was reset. 5038 */ 5039 int 5040 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 5041 { 5042 int rv; 5043 size_t msglen = size; 5044 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5045 vsw_t *vswp = ldcp->ldc_vswp; 5046 vio_dring_msg_t *dmsg; 5047 vio_raw_data_msg_t *rmsg; 5048 vnet_ibnd_desc_t *imsg; 5049 boolean_t data_msg = B_FALSE; 5050 5051 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5052 ldcp->ldc_id, size); 5053 5054 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5055 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5056 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5057 5058 mutex_enter(&ldcp->ldc_txlock); 5059 5060 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 5061 if (tag->vio_subtype_env == VIO_DRING_DATA) { 5062 dmsg = (vio_dring_msg_t *)tag; 5063 dmsg->seq_num = ldcp->lane_out.seq_num; 5064 data_msg = B_TRUE; 5065 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 5066 rmsg = (vio_raw_data_msg_t *)tag; 5067 rmsg->seq_num = ldcp->lane_out.seq_num; 5068 data_msg = B_TRUE; 5069 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 5070 imsg = (vnet_ibnd_desc_t *)tag; 5071 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 5072 data_msg = B_TRUE; 5073 } 5074 } 5075 5076 do { 5077 msglen = size; 5078 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5079 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5080 5081 if (rv == 0 && data_msg == B_TRUE) { 5082 ldcp->lane_out.seq_num++; 5083 } 5084 5085 if ((rv != 0) || (msglen != size)) { 5086 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 5087 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 5088 ldcp->ldc_stats.oerrors++; 5089 } 5090 5091 mutex_exit(&ldcp->ldc_txlock); 5092 5093 /* 5094 * If channel has been reset we either handle it here or 5095 * simply report back that it has been reset and let caller 5096 * decide what to do. 5097 */ 5098 if (rv == ECONNRESET) { 5099 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 5100 5101 /* 5102 * N.B - must never be holding the dlistrw lock when 5103 * we do a reset of the channel. 5104 */ 5105 if (handle_reset) { 5106 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 5107 } 5108 } 5109 5110 return (rv); 5111 } 5112 5113 /* 5114 * Remove the specified address from the list of address maintained 5115 * in this port node. 5116 */ 5117 mcst_addr_t * 5118 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 5119 { 5120 vsw_t *vswp = NULL; 5121 vsw_port_t *port = NULL; 5122 mcst_addr_t *prev_p = NULL; 5123 mcst_addr_t *curr_p = NULL; 5124 5125 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 5126 __func__, devtype, addr); 5127 5128 if (devtype == VSW_VNETPORT) { 5129 port = (vsw_port_t *)arg; 5130 mutex_enter(&port->mca_lock); 5131 prev_p = curr_p = port->mcap; 5132 } else { 5133 vswp = (vsw_t *)arg; 5134 mutex_enter(&vswp->mca_lock); 5135 prev_p = curr_p = vswp->mcap; 5136 } 5137 5138 while (curr_p != NULL) { 5139 if (curr_p->addr == addr) { 5140 D2(NULL, "%s: address found", __func__); 5141 /* match found */ 5142 if (prev_p == curr_p) { 5143 /* list head */ 5144 if (devtype == VSW_VNETPORT) 5145 port->mcap = curr_p->nextp; 5146 else 5147 vswp->mcap = curr_p->nextp; 5148 } else { 5149 prev_p->nextp = curr_p->nextp; 5150 } 5151 break; 5152 } else { 5153 prev_p = curr_p; 5154 curr_p = curr_p->nextp; 5155 } 5156 } 5157 5158 if (devtype == VSW_VNETPORT) 5159 mutex_exit(&port->mca_lock); 5160 else 5161 mutex_exit(&vswp->mca_lock); 5162 5163 D1(NULL, "%s: exit", __func__); 5164 5165 return (curr_p); 5166 } 5167 5168 /* 5169 * Creates a descriptor ring (dring) and links it into the 5170 * link of outbound drings for this channel. 5171 * 5172 * Returns NULL if creation failed. 5173 */ 5174 static dring_info_t * 5175 vsw_create_dring(vsw_ldc_t *ldcp) 5176 { 5177 vsw_private_desc_t *priv_addr = NULL; 5178 vsw_t *vswp = ldcp->ldc_vswp; 5179 ldc_mem_info_t minfo; 5180 dring_info_t *dp, *tp; 5181 int i; 5182 5183 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5184 5185 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5186 5187 /* create public section of ring */ 5188 if ((ldc_mem_dring_create(vsw_ntxds, 5189 VSW_PUB_SIZE, &dp->handle)) != 0) { 5190 5191 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 5192 "failed", ldcp->ldc_id); 5193 goto create_fail_exit; 5194 } 5195 5196 ASSERT(dp->handle != NULL); 5197 5198 /* 5199 * Get the base address of the public section of the ring. 5200 */ 5201 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5202 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 5203 ldcp->ldc_id); 5204 goto dring_fail_exit; 5205 } else { 5206 ASSERT(minfo.vaddr != 0); 5207 dp->pub_addr = minfo.vaddr; 5208 } 5209 5210 dp->num_descriptors = vsw_ntxds; 5211 dp->descriptor_size = VSW_PUB_SIZE; 5212 dp->options = VIO_TX_DRING; 5213 dp->ncookies = 1; /* guaranteed by ldc */ 5214 5215 /* 5216 * create private portion of ring 5217 */ 5218 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 5219 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5220 5221 if (vsw_setup_ring(ldcp, dp)) { 5222 DERR(vswp, "%s: unable to setup ring", __func__); 5223 goto dring_fail_exit; 5224 } 5225 5226 /* haven't used any descriptors yet */ 5227 dp->end_idx = 0; 5228 dp->last_ack_recv = -1; 5229 5230 /* bind dring to the channel */ 5231 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 5232 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 5233 &dp->cookie[0], &dp->ncookies)) != 0) { 5234 DERR(vswp, "vsw_create_dring: unable to bind to channel " 5235 "%lld", ldcp->ldc_id); 5236 goto dring_fail_exit; 5237 } 5238 5239 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5240 dp->restart_reqd = B_TRUE; 5241 5242 /* 5243 * Only ever create rings for outgoing lane. Link it onto 5244 * end of list. 5245 */ 5246 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5247 if (ldcp->lane_out.dringp == NULL) { 5248 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5249 ldcp->lane_out.dringp = dp; 5250 } else { 5251 tp = ldcp->lane_out.dringp; 5252 while (tp->next != NULL) 5253 tp = tp->next; 5254 5255 tp->next = dp; 5256 } 5257 RW_EXIT(&ldcp->lane_out.dlistrw); 5258 5259 return (dp); 5260 5261 dring_fail_exit: 5262 (void) ldc_mem_dring_destroy(dp->handle); 5263 5264 create_fail_exit: 5265 if (dp->priv_addr != NULL) { 5266 priv_addr = dp->priv_addr; 5267 for (i = 0; i < vsw_ntxds; i++) { 5268 if (priv_addr->memhandle != NULL) 5269 (void) ldc_mem_free_handle( 5270 priv_addr->memhandle); 5271 priv_addr++; 5272 } 5273 kmem_free(dp->priv_addr, 5274 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5275 } 5276 mutex_destroy(&dp->dlock); 5277 5278 kmem_free(dp, sizeof (dring_info_t)); 5279 return (NULL); 5280 } 5281 5282 /* 5283 * Create a ring consisting of just a private portion and link 5284 * it into the list of rings for the outbound lane. 5285 * 5286 * These type of rings are used primarily for temporary data 5287 * storage (i.e. as data buffers). 5288 */ 5289 void 5290 vsw_create_privring(vsw_ldc_t *ldcp) 5291 { 5292 dring_info_t *dp, *tp; 5293 vsw_t *vswp = ldcp->ldc_vswp; 5294 5295 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5296 5297 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5298 5299 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5300 5301 /* no public section */ 5302 dp->pub_addr = NULL; 5303 5304 dp->priv_addr = kmem_zalloc( 5305 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5306 5307 dp->num_descriptors = vsw_ntxds; 5308 5309 if (vsw_setup_ring(ldcp, dp)) { 5310 DERR(vswp, "%s: setup of ring failed", __func__); 5311 kmem_free(dp->priv_addr, 5312 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5313 mutex_destroy(&dp->dlock); 5314 kmem_free(dp, sizeof (dring_info_t)); 5315 return; 5316 } 5317 5318 /* haven't used any descriptors yet */ 5319 dp->end_idx = 0; 5320 5321 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5322 dp->restart_reqd = B_TRUE; 5323 5324 /* 5325 * Only ever create rings for outgoing lane. Link it onto 5326 * end of list. 5327 */ 5328 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5329 if (ldcp->lane_out.dringp == NULL) { 5330 D2(vswp, "%s: adding first outbound privring", __func__); 5331 ldcp->lane_out.dringp = dp; 5332 } else { 5333 tp = ldcp->lane_out.dringp; 5334 while (tp->next != NULL) 5335 tp = tp->next; 5336 5337 tp->next = dp; 5338 } 5339 RW_EXIT(&ldcp->lane_out.dlistrw); 5340 5341 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5342 } 5343 5344 /* 5345 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5346 * failure. 5347 */ 5348 int 5349 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5350 { 5351 vnet_public_desc_t *pub_addr = NULL; 5352 vsw_private_desc_t *priv_addr = NULL; 5353 vsw_t *vswp = ldcp->ldc_vswp; 5354 uint64_t *tmpp; 5355 uint64_t offset = 0; 5356 uint32_t ncookies = 0; 5357 static char *name = "vsw_setup_ring"; 5358 int i, j, nc, rv; 5359 size_t data_sz; 5360 void *data_addr; 5361 5362 priv_addr = dp->priv_addr; 5363 pub_addr = dp->pub_addr; 5364 5365 /* public section may be null but private should never be */ 5366 ASSERT(priv_addr != NULL); 5367 5368 /* 5369 * Allocate the region of memory which will be used to hold 5370 * the data the descriptors will refer to. 5371 */ 5372 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5373 5374 /* 5375 * In order to ensure that the number of ldc cookies per descriptor is 5376 * limited to be within the default MAX_COOKIES (2), we take the steps 5377 * outlined below: 5378 * 5379 * Align the entire data buffer area to 8K and carve out per descriptor 5380 * data buffers starting from this 8K aligned base address. 5381 * 5382 * We round up the mtu specified to be a multiple of 2K or 4K. 5383 * For sizes up to 12K we round up the size to the next 2K. 5384 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 5385 * 14K could end up needing 3 cookies, with the buffer spread across 5386 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 5387 */ 5388 if (data_sz <= VNET_12K) { 5389 data_sz = VNET_ROUNDUP_2K(data_sz); 5390 } else { 5391 data_sz = VNET_ROUNDUP_4K(data_sz); 5392 } 5393 5394 dp->desc_data_sz = data_sz; 5395 5396 /* allocate extra 8K bytes for alignment */ 5397 dp->data_sz = (vsw_ntxds * data_sz) + VNET_8K; 5398 data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5399 dp->data_addr = data_addr; 5400 5401 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5402 dp->data_sz, dp->data_addr); 5403 5404 /* align the starting address of the data area to 8K */ 5405 data_addr = (void *)VNET_ROUNDUP_8K((uintptr_t)data_addr); 5406 5407 tmpp = (uint64_t *)data_addr; 5408 offset = dp->desc_data_sz/sizeof (tmpp); 5409 5410 /* 5411 * Initialise some of the private and public (if they exist) 5412 * descriptor fields. 5413 */ 5414 for (i = 0; i < vsw_ntxds; i++) { 5415 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5416 5417 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5418 &priv_addr->memhandle)) != 0) { 5419 DERR(vswp, "%s: alloc mem handle failed", name); 5420 goto setup_ring_cleanup; 5421 } 5422 5423 priv_addr->datap = (void *)tmpp; 5424 5425 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5426 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5427 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5428 &(priv_addr->memcookie[0]), &ncookies); 5429 if (rv != 0) { 5430 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5431 "(rv %d)", name, ldcp->ldc_id, rv); 5432 goto setup_ring_cleanup; 5433 } 5434 priv_addr->bound = 1; 5435 5436 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5437 name, i, priv_addr->memcookie[0].addr, 5438 priv_addr->memcookie[0].size); 5439 5440 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5441 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5442 "invalid num of cookies (%d) for size 0x%llx", 5443 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5444 5445 goto setup_ring_cleanup; 5446 } else { 5447 for (j = 1; j < ncookies; j++) { 5448 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5449 &(priv_addr->memcookie[j])); 5450 if (rv != 0) { 5451 DERR(vswp, "%s: ldc_mem_nextcookie " 5452 "failed rv (%d)", name, rv); 5453 goto setup_ring_cleanup; 5454 } 5455 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5456 "size 0x%llx", name, j, 5457 priv_addr->memcookie[j].addr, 5458 priv_addr->memcookie[j].size); 5459 } 5460 5461 } 5462 priv_addr->ncookies = ncookies; 5463 priv_addr->dstate = VIO_DESC_FREE; 5464 5465 if (pub_addr != NULL) { 5466 5467 /* link pub and private sides */ 5468 priv_addr->descp = pub_addr; 5469 5470 pub_addr->ncookies = priv_addr->ncookies; 5471 5472 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5473 bcopy(&priv_addr->memcookie[nc], 5474 &pub_addr->memcookie[nc], 5475 sizeof (ldc_mem_cookie_t)); 5476 } 5477 5478 pub_addr->hdr.dstate = VIO_DESC_FREE; 5479 pub_addr++; 5480 } 5481 5482 /* 5483 * move to next element in the dring and the next 5484 * position in the data buffer. 5485 */ 5486 priv_addr++; 5487 tmpp += offset; 5488 } 5489 5490 return (0); 5491 5492 setup_ring_cleanup: 5493 priv_addr = dp->priv_addr; 5494 5495 for (j = 0; j < i; j++) { 5496 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5497 (void) ldc_mem_free_handle(priv_addr->memhandle); 5498 5499 mutex_destroy(&priv_addr->dstate_lock); 5500 5501 priv_addr++; 5502 } 5503 kmem_free(dp->data_addr, dp->data_sz); 5504 5505 return (1); 5506 } 5507 5508 /* 5509 * Searches the private section of a ring for a free descriptor, 5510 * starting at the location of the last free descriptor found 5511 * previously. 5512 * 5513 * Returns 0 if free descriptor is available, and updates state 5514 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5515 * 5516 * FUTURE: might need to return contiguous range of descriptors 5517 * as dring info msg assumes all will be contiguous. 5518 */ 5519 static int 5520 vsw_dring_find_free_desc(dring_info_t *dringp, 5521 vsw_private_desc_t **priv_p, int *idx) 5522 { 5523 vsw_private_desc_t *addr = NULL; 5524 int num = vsw_ntxds; 5525 int ret = 1; 5526 5527 D1(NULL, "%s enter\n", __func__); 5528 5529 ASSERT(dringp->priv_addr != NULL); 5530 5531 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5532 __func__, dringp, dringp->end_idx); 5533 5534 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5535 5536 mutex_enter(&addr->dstate_lock); 5537 if (addr->dstate == VIO_DESC_FREE) { 5538 addr->dstate = VIO_DESC_READY; 5539 *priv_p = addr; 5540 *idx = dringp->end_idx; 5541 dringp->end_idx = (dringp->end_idx + 1) % num; 5542 ret = 0; 5543 5544 } 5545 mutex_exit(&addr->dstate_lock); 5546 5547 /* ring full */ 5548 if (ret == 1) { 5549 D2(NULL, "%s: no desp free: started at %d", __func__, 5550 dringp->end_idx); 5551 } 5552 5553 D1(NULL, "%s: exit\n", __func__); 5554 5555 return (ret); 5556 } 5557 5558 /* 5559 * Map from a dring identifier to the ring itself. Returns 5560 * pointer to ring or NULL if no match found. 5561 * 5562 * Should be called with dlistrw rwlock held as reader. 5563 */ 5564 static dring_info_t * 5565 vsw_ident2dring(lane_t *lane, uint64_t ident) 5566 { 5567 dring_info_t *dp = NULL; 5568 5569 if ((dp = lane->dringp) == NULL) { 5570 return (NULL); 5571 } else { 5572 if (dp->ident == ident) 5573 return (dp); 5574 5575 while (dp != NULL) { 5576 if (dp->ident == ident) 5577 break; 5578 dp = dp->next; 5579 } 5580 } 5581 5582 return (dp); 5583 } 5584 5585 /* 5586 * Set the default lane attributes. These are copied into 5587 * the attr msg we send to our peer. If they are not acceptable 5588 * then (currently) the handshake ends. 5589 */ 5590 static void 5591 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5592 { 5593 bzero(lp, sizeof (lane_t)); 5594 5595 READ_ENTER(&vswp->if_lockrw); 5596 ether_copy(&(vswp->if_addr), &(lp->addr)); 5597 RW_EXIT(&vswp->if_lockrw); 5598 5599 lp->mtu = vswp->max_frame_size; 5600 lp->addr_type = ADDR_TYPE_MAC; 5601 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5602 lp->ack_freq = 0; /* for shared mode */ 5603 lp->seq_num = VNET_ISS; 5604 } 5605 5606 /* 5607 * Verify that the attributes are acceptable. 5608 * 5609 * FUTURE: If some attributes are not acceptable, change them 5610 * our desired values. 5611 */ 5612 static int 5613 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5614 { 5615 int ret = 0; 5616 struct ether_addr ea; 5617 vsw_port_t *port = ldcp->ldc_port; 5618 lane_t *lp = &ldcp->lane_out; 5619 5620 D1(NULL, "vsw_check_attr enter\n"); 5621 5622 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5623 (pkt->xfer_mode != lp->xfer_mode)) { 5624 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5625 ret = 1; 5626 } 5627 5628 /* Only support MAC addresses at moment. */ 5629 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5630 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5631 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5632 ret = 1; 5633 } 5634 5635 /* 5636 * MAC address supplied by device should match that stored 5637 * in the vsw-port OBP node. Need to decide what to do if they 5638 * don't match, for the moment just warn but don't fail. 5639 */ 5640 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5641 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5642 DERR(NULL, "vsw_check_attr: device supplied address " 5643 "0x%llx doesn't match node address 0x%llx\n", 5644 pkt->addr, port->p_macaddr); 5645 } 5646 5647 /* 5648 * Ack freq only makes sense in pkt mode, in shared 5649 * mode the ring descriptors say whether or not to 5650 * send back an ACK. 5651 */ 5652 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5653 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5654 (VSW_VER_LT(ldcp, 1, 2) && 5655 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5656 if (pkt->ack_freq > 0) { 5657 D2(NULL, "vsw_check_attr: non zero ack freq " 5658 " in SHM mode\n"); 5659 ret = 1; 5660 } 5661 } 5662 5663 if (VSW_VER_LT(ldcp, 1, 4)) { 5664 /* versions < 1.4, mtu must match */ 5665 if (pkt->mtu != lp->mtu) { 5666 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5667 pkt->mtu); 5668 ret = 1; 5669 } 5670 } else { 5671 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5672 if (pkt->mtu < ETHERMAX) { 5673 ret = 1; 5674 } 5675 } 5676 5677 D1(NULL, "vsw_check_attr exit\n"); 5678 5679 return (ret); 5680 } 5681 5682 /* 5683 * Returns 1 if there is a problem, 0 otherwise. 5684 */ 5685 static int 5686 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5687 { 5688 _NOTE(ARGUNUSED(pkt)) 5689 5690 int ret = 0; 5691 5692 D1(NULL, "vsw_check_dring_info enter\n"); 5693 5694 if ((pkt->num_descriptors == 0) || 5695 (pkt->descriptor_size == 0) || 5696 (pkt->ncookies != 1)) { 5697 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5698 ret = 1; 5699 } 5700 5701 D1(NULL, "vsw_check_dring_info exit\n"); 5702 5703 return (ret); 5704 } 5705 5706 /* 5707 * Returns 1 if two memory cookies match. Otherwise returns 0. 5708 */ 5709 static int 5710 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5711 { 5712 if ((m1->addr != m2->addr) || 5713 (m2->size != m2->size)) { 5714 return (0); 5715 } else { 5716 return (1); 5717 } 5718 } 5719 5720 /* 5721 * Returns 1 if ring described in reg message matches that 5722 * described by dring_info structure. Otherwise returns 0. 5723 */ 5724 static int 5725 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5726 { 5727 if ((msg->descriptor_size != dp->descriptor_size) || 5728 (msg->num_descriptors != dp->num_descriptors) || 5729 (msg->ncookies != dp->ncookies) || 5730 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5731 return (0); 5732 } else { 5733 return (1); 5734 } 5735 5736 } 5737 5738 static caddr_t 5739 vsw_print_ethaddr(uint8_t *a, char *ebuf) 5740 { 5741 (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 5742 a[0], a[1], a[2], a[3], a[4], a[5]); 5743 return (ebuf); 5744 } 5745 5746 /* 5747 * Reset and free all the resources associated with 5748 * the channel. 5749 */ 5750 static void 5751 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5752 { 5753 dring_info_t *dp, *dpp; 5754 lane_t *lp = NULL; 5755 5756 ASSERT(ldcp != NULL); 5757 5758 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5759 5760 if (dir == INBOUND) { 5761 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5762 " of channel %lld", __func__, ldcp->ldc_id); 5763 lp = &ldcp->lane_in; 5764 } else { 5765 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5766 " of channel %lld", __func__, ldcp->ldc_id); 5767 lp = &ldcp->lane_out; 5768 } 5769 5770 lp->lstate = VSW_LANE_INACTIV; 5771 lp->seq_num = VNET_ISS; 5772 5773 if (lp->dringp) { 5774 if (dir == INBOUND) { 5775 WRITE_ENTER(&lp->dlistrw); 5776 dp = lp->dringp; 5777 while (dp != NULL) { 5778 dpp = dp->next; 5779 if (dp->handle != NULL) 5780 (void) ldc_mem_dring_unmap(dp->handle); 5781 kmem_free(dp, sizeof (dring_info_t)); 5782 dp = dpp; 5783 } 5784 RW_EXIT(&lp->dlistrw); 5785 } else { 5786 /* 5787 * unbind, destroy exported dring, free dring struct 5788 */ 5789 WRITE_ENTER(&lp->dlistrw); 5790 dp = lp->dringp; 5791 vsw_free_ring(dp); 5792 RW_EXIT(&lp->dlistrw); 5793 } 5794 lp->dringp = NULL; 5795 } 5796 5797 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5798 } 5799 5800 /* 5801 * Free ring and all associated resources. 5802 * 5803 * Should be called with dlistrw rwlock held as writer. 5804 */ 5805 static void 5806 vsw_free_ring(dring_info_t *dp) 5807 { 5808 vsw_private_desc_t *paddr = NULL; 5809 dring_info_t *dpp; 5810 int i; 5811 5812 while (dp != NULL) { 5813 mutex_enter(&dp->dlock); 5814 dpp = dp->next; 5815 if (dp->priv_addr != NULL) { 5816 /* 5817 * First unbind and free the memory handles 5818 * stored in each descriptor within the ring. 5819 */ 5820 for (i = 0; i < vsw_ntxds; i++) { 5821 paddr = (vsw_private_desc_t *) 5822 dp->priv_addr + i; 5823 if (paddr->memhandle != NULL) { 5824 if (paddr->bound == 1) { 5825 if (ldc_mem_unbind_handle( 5826 paddr->memhandle) != 0) { 5827 DERR(NULL, "error " 5828 "unbinding handle for " 5829 "ring 0x%llx at pos %d", 5830 dp, i); 5831 continue; 5832 } 5833 paddr->bound = 0; 5834 } 5835 5836 if (ldc_mem_free_handle( 5837 paddr->memhandle) != 0) { 5838 DERR(NULL, "error freeing " 5839 "handle for ring 0x%llx " 5840 "at pos %d", dp, i); 5841 continue; 5842 } 5843 paddr->memhandle = NULL; 5844 } 5845 mutex_destroy(&paddr->dstate_lock); 5846 } 5847 kmem_free(dp->priv_addr, 5848 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5849 } 5850 5851 /* 5852 * Now unbind and destroy the ring itself. 5853 */ 5854 if (dp->handle != NULL) { 5855 (void) ldc_mem_dring_unbind(dp->handle); 5856 (void) ldc_mem_dring_destroy(dp->handle); 5857 } 5858 5859 if (dp->data_addr != NULL) { 5860 kmem_free(dp->data_addr, dp->data_sz); 5861 } 5862 5863 mutex_exit(&dp->dlock); 5864 mutex_destroy(&dp->dlock); 5865 mutex_destroy(&dp->restart_lock); 5866 kmem_free(dp, sizeof (dring_info_t)); 5867 5868 dp = dpp; 5869 } 5870 } 5871 5872 /* 5873 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 5874 * This thread is woken up by the LDC interrupt handler to process 5875 * LDC packets and receive data. 5876 */ 5877 static void 5878 vsw_ldc_rx_worker(void *arg) 5879 { 5880 callb_cpr_t cprinfo; 5881 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5882 vsw_t *vswp = ldcp->ldc_vswp; 5883 5884 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5885 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 5886 "vsw_rx_thread"); 5887 mutex_enter(&ldcp->rx_thr_lock); 5888 ldcp->rx_thr_flags |= VSW_WTHR_RUNNING; 5889 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 5890 5891 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5892 /* 5893 * Wait until the data is received or a stop 5894 * request is received. 5895 */ 5896 while (!(ldcp->rx_thr_flags & 5897 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 5898 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5899 } 5900 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 5901 5902 /* 5903 * First process the stop request. 5904 */ 5905 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 5906 D2(vswp, "%s(%lld):Rx thread stopped\n", 5907 __func__, ldcp->ldc_id); 5908 break; 5909 } 5910 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 5911 mutex_exit(&ldcp->rx_thr_lock); 5912 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 5913 __func__, ldcp->ldc_id); 5914 mutex_enter(&ldcp->ldc_cblock); 5915 vsw_process_pkt(ldcp); 5916 mutex_exit(&ldcp->ldc_cblock); 5917 mutex_enter(&ldcp->rx_thr_lock); 5918 } 5919 5920 /* 5921 * Update the run status and wakeup the thread that 5922 * has sent the stop request. 5923 */ 5924 ldcp->rx_thr_flags &= ~VSW_WTHR_RUNNING; 5925 cv_signal(&ldcp->rx_thr_cv); 5926 CALLB_CPR_EXIT(&cprinfo); 5927 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5928 thread_exit(); 5929 } 5930 5931 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 5932 static void 5933 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 5934 { 5935 vsw_t *vswp = ldcp->ldc_vswp; 5936 5937 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5938 /* 5939 * Send a stop request by setting the stop flag and 5940 * wait until the receive thread stops. 5941 */ 5942 mutex_enter(&ldcp->rx_thr_lock); 5943 if (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5944 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 5945 cv_signal(&ldcp->rx_thr_cv); 5946 while (ldcp->rx_thr_flags & VSW_WTHR_RUNNING) { 5947 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 5948 } 5949 } 5950 mutex_exit(&ldcp->rx_thr_lock); 5951 ldcp->rx_thread = NULL; 5952 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 5953 } 5954 5955 /* 5956 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 5957 * This thread is woken up by the vsw_portsend to transmit 5958 * packets. 5959 */ 5960 static void 5961 vsw_ldc_tx_worker(void *arg) 5962 { 5963 callb_cpr_t cprinfo; 5964 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5965 vsw_t *vswp = ldcp->ldc_vswp; 5966 mblk_t *mp; 5967 mblk_t *tmp; 5968 5969 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 5970 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 5971 "vnet_tx_thread"); 5972 mutex_enter(&ldcp->tx_thr_lock); 5973 ldcp->tx_thr_flags |= VSW_WTHR_RUNNING; 5974 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 5975 5976 CALLB_CPR_SAFE_BEGIN(&cprinfo); 5977 /* 5978 * Wait until the data is received or a stop 5979 * request is received. 5980 */ 5981 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 5982 (ldcp->tx_mhead == NULL)) { 5983 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 5984 } 5985 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 5986 5987 /* 5988 * First process the stop request. 5989 */ 5990 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 5991 D2(vswp, "%s(%lld):tx thread stopped\n", 5992 __func__, ldcp->ldc_id); 5993 break; 5994 } 5995 mp = ldcp->tx_mhead; 5996 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 5997 ldcp->tx_cnt = 0; 5998 mutex_exit(&ldcp->tx_thr_lock); 5999 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 6000 __func__, ldcp->ldc_id); 6001 while (mp != NULL) { 6002 tmp = mp->b_next; 6003 mp->b_next = mp->b_prev = NULL; 6004 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 6005 mp = tmp; 6006 } 6007 mutex_enter(&ldcp->tx_thr_lock); 6008 } 6009 6010 /* 6011 * Update the run status and wakeup the thread that 6012 * has sent the stop request. 6013 */ 6014 ldcp->tx_thr_flags &= ~VSW_WTHR_RUNNING; 6015 cv_signal(&ldcp->tx_thr_cv); 6016 CALLB_CPR_EXIT(&cprinfo); 6017 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6018 thread_exit(); 6019 } 6020 6021 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 6022 static void 6023 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 6024 { 6025 vsw_t *vswp = ldcp->ldc_vswp; 6026 6027 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6028 /* 6029 * Send a stop request by setting the stop flag and 6030 * wait until the receive thread stops. 6031 */ 6032 mutex_enter(&ldcp->tx_thr_lock); 6033 if (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 6034 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 6035 cv_signal(&ldcp->tx_thr_cv); 6036 while (ldcp->tx_thr_flags & VSW_WTHR_RUNNING) { 6037 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 6038 } 6039 } 6040 mutex_exit(&ldcp->tx_thr_lock); 6041 ldcp->tx_thread = NULL; 6042 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6043 } 6044 6045 /* vsw_reclaim_dring -- reclaim descriptors */ 6046 static int 6047 vsw_reclaim_dring(dring_info_t *dp, int start) 6048 { 6049 int i, j, len; 6050 vsw_private_desc_t *priv_addr; 6051 vnet_public_desc_t *pub_addr; 6052 6053 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 6054 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6055 len = dp->num_descriptors; 6056 6057 D2(NULL, "%s: start index %ld\n", __func__, start); 6058 6059 j = 0; 6060 for (i = start; j < len; i = (i + 1) % len, j++) { 6061 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 6062 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6063 6064 mutex_enter(&priv_addr->dstate_lock); 6065 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 6066 mutex_exit(&priv_addr->dstate_lock); 6067 break; 6068 } 6069 pub_addr->hdr.dstate = VIO_DESC_FREE; 6070 priv_addr->dstate = VIO_DESC_FREE; 6071 /* clear all the fields */ 6072 priv_addr->datalen = 0; 6073 pub_addr->hdr.ack = 0; 6074 mutex_exit(&priv_addr->dstate_lock); 6075 6076 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 6077 i, pub_addr->hdr.dstate, priv_addr->dstate); 6078 } 6079 return (j); 6080 } 6081 6082 /* 6083 * Debugging routines 6084 */ 6085 static void 6086 display_state(void) 6087 { 6088 vsw_t *vswp; 6089 vsw_port_list_t *plist; 6090 vsw_port_t *port; 6091 vsw_ldc_list_t *ldcl; 6092 vsw_ldc_t *ldcp; 6093 extern vsw_t *vsw_head; 6094 6095 cmn_err(CE_NOTE, "***** system state *****"); 6096 6097 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6098 plist = &vswp->plist; 6099 READ_ENTER(&plist->lockrw); 6100 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6101 vswp->instance, plist->num_ports); 6102 6103 for (port = plist->head; port != NULL; port = port->p_next) { 6104 ldcl = &port->p_ldclist; 6105 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6106 port->p_instance, port->num_ldcs); 6107 READ_ENTER(&ldcl->lockrw); 6108 ldcp = ldcl->head; 6109 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6110 cmn_err(CE_CONT, "chan %lu : dev %d : " 6111 "status %d : phase %u\n", 6112 ldcp->ldc_id, ldcp->dev_class, 6113 ldcp->ldc_status, ldcp->hphase); 6114 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6115 "psession %lu\n", ldcp->ldc_id, 6116 ldcp->local_session, ldcp->peer_session); 6117 6118 cmn_err(CE_CONT, "Inbound lane:\n"); 6119 display_lane(&ldcp->lane_in); 6120 cmn_err(CE_CONT, "Outbound lane:\n"); 6121 display_lane(&ldcp->lane_out); 6122 } 6123 RW_EXIT(&ldcl->lockrw); 6124 } 6125 RW_EXIT(&plist->lockrw); 6126 } 6127 cmn_err(CE_NOTE, "***** system state *****"); 6128 } 6129 6130 static void 6131 display_lane(lane_t *lp) 6132 { 6133 dring_info_t *drp; 6134 6135 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6136 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6137 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6138 lp->addr_type, lp->addr, lp->xfer_mode); 6139 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6140 6141 cmn_err(CE_CONT, "Dring info:\n"); 6142 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6143 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6144 drp->num_descriptors, drp->descriptor_size); 6145 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6146 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6147 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6148 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6149 drp->ident, drp->end_idx); 6150 display_ring(drp); 6151 } 6152 } 6153 6154 static void 6155 display_ring(dring_info_t *dringp) 6156 { 6157 uint64_t i; 6158 uint64_t priv_count = 0; 6159 uint64_t pub_count = 0; 6160 vnet_public_desc_t *pub_addr = NULL; 6161 vsw_private_desc_t *priv_addr = NULL; 6162 6163 for (i = 0; i < vsw_ntxds; i++) { 6164 if (dringp->pub_addr != NULL) { 6165 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6166 6167 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6168 pub_count++; 6169 } 6170 6171 if (dringp->priv_addr != NULL) { 6172 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6173 6174 if (priv_addr->dstate == VIO_DESC_FREE) 6175 priv_count++; 6176 } 6177 } 6178 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6179 i, priv_count, pub_count); 6180 } 6181 6182 static void 6183 dump_flags(uint64_t state) 6184 { 6185 int i; 6186 6187 typedef struct flag_name { 6188 int flag_val; 6189 char *flag_name; 6190 } flag_name_t; 6191 6192 flag_name_t flags[] = { 6193 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6194 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6195 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6196 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6197 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6198 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6199 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6200 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6201 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6202 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6203 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6204 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6205 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6206 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6207 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6208 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6209 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6210 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6211 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6212 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6213 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6214 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6215 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6216 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6217 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6218 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6219 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6220 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6221 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6222 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6223 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6224 6225 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6226 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6227 if (state & flags[i].flag_val) 6228 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6229 } 6230 } 6231