1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* Port add/deletion/etc routines */ 75 static void vsw_port_delete(vsw_port_t *port); 76 static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 77 static void vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 78 static int vsw_init_ldcs(vsw_port_t *port); 79 static void vsw_uninit_ldcs(vsw_port_t *port); 80 static int vsw_ldc_init(vsw_ldc_t *ldcp); 81 static void vsw_ldc_uninit(vsw_ldc_t *ldcp); 82 static void vsw_drain_ldcs(vsw_port_t *port); 83 static void vsw_drain_port_taskq(vsw_port_t *port); 84 static void vsw_marker_task(void *); 85 static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 86 void vsw_detach_ports(vsw_t *vswp); 87 int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 88 mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 89 int vsw_port_detach(vsw_t *vswp, int p_instance); 90 int vsw_portsend(vsw_port_t *port, mblk_t *mp); 91 int vsw_port_attach(vsw_port_t *portp); 92 vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 93 void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 94 int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 95 void vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate); 96 void vsw_reset_ports(vsw_t *vswp); 97 void vsw_port_reset(vsw_port_t *portp); 98 void vsw_physlink_update_ports(vsw_t *vswp); 99 static void vsw_port_physlink_update(vsw_port_t *portp); 100 101 /* Interrupt routines */ 102 static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 103 104 /* Handshake routines */ 105 static void vsw_ldc_reinit(vsw_ldc_t *); 106 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 107 static void vsw_conn_task(void *); 108 static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 109 static void vsw_next_milestone(vsw_ldc_t *); 110 static int vsw_supported_version(vio_ver_msg_t *); 111 static void vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp); 112 static void vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp); 113 114 /* Data processing routines */ 115 static void vsw_process_pkt(void *); 116 static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t *); 117 static void vsw_process_ctrl_pkt(void *); 118 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 119 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 120 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 121 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 122 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 123 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 124 static void vsw_process_physlink_msg(vsw_ldc_t *, void *); 125 static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *, 126 uint32_t); 127 static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 128 static void vsw_process_pkt_data_nop(void *, void *, uint32_t); 129 static void vsw_process_pkt_data(void *, void *, uint32_t); 130 static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 131 static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t *); 132 133 /* Switching/data transmit routines */ 134 static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 135 static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 136 static void vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp); 137 static int vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries); 138 static int vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 139 static int vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count); 140 141 /* Packet creation routines */ 142 static void vsw_send_ver(void *); 143 static void vsw_send_attr(vsw_ldc_t *); 144 static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 145 static void vsw_send_dring_info(vsw_ldc_t *); 146 static void vsw_send_rdx(vsw_ldc_t *); 147 static void vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state); 148 149 /* Dring routines */ 150 static dring_info_t *vsw_create_dring(vsw_ldc_t *); 151 static void vsw_create_privring(vsw_ldc_t *); 152 static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 153 static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 154 int *); 155 static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 156 static int vsw_reclaim_dring(dring_info_t *dp, int start); 157 158 static void vsw_set_lane_attr(vsw_t *, lane_t *); 159 static int vsw_check_attr(vnet_attr_msg_t *, vsw_ldc_t *); 160 static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 161 static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 162 static int vsw_check_dring_info(vio_dring_reg_msg_t *); 163 164 /* Rcv/Tx thread routines */ 165 static void vsw_stop_tx_thread(vsw_ldc_t *ldcp); 166 static void vsw_ldc_tx_worker(void *arg); 167 static void vsw_stop_rx_thread(vsw_ldc_t *ldcp); 168 static void vsw_ldc_rx_worker(void *arg); 169 170 /* Misc support routines */ 171 static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 172 static void vsw_free_ring(dring_info_t *); 173 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 174 static int vsw_get_same_dest_list(struct ether_header *ehp, 175 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 176 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 177 178 /* Debugging routines */ 179 static void dump_flags(uint64_t); 180 static void display_state(void); 181 static void display_lane(lane_t *); 182 static void display_ring(dring_info_t *); 183 184 /* 185 * Functions imported from other files. 186 */ 187 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 188 extern void vsw_unset_hw(vsw_t *, vsw_port_t *, int); 189 extern int vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port); 190 extern void vsw_del_mcst_port(vsw_port_t *port); 191 extern int vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 192 extern int vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg); 193 extern void vsw_fdbe_add(vsw_t *vswp, void *port); 194 extern void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 195 extern void vsw_create_vlans(void *arg, int type); 196 extern void vsw_destroy_vlans(void *arg, int type); 197 extern void vsw_vlan_add_ids(void *arg, int type); 198 extern void vsw_vlan_remove_ids(void *arg, int type); 199 extern boolean_t vsw_frame_lookup_vid(void *arg, int caller, 200 struct ether_header *ehp, uint16_t *vidp); 201 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 202 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 203 mblk_t **npt); 204 extern boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 205 extern void vsw_hio_start(vsw_t *vswp, vsw_ldc_t *ldcp); 206 extern void vsw_hio_stop(vsw_t *vswp, vsw_ldc_t *ldcp); 207 extern void vsw_process_dds_msg(vsw_t *vswp, vsw_ldc_t *ldcp, void *msg); 208 extern void vsw_hio_stop_port(vsw_port_t *portp); 209 extern void vsw_publish_macaddr(vsw_t *vswp, vsw_port_t *portp); 210 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 211 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 212 213 214 #define VSW_NUM_VMPOOLS 3 /* number of vio mblk pools */ 215 216 /* 217 * Tunables used in this file. 218 */ 219 extern int vsw_num_handshakes; 220 extern int vsw_wretries; 221 extern int vsw_desc_delay; 222 extern int vsw_read_attempts; 223 extern int vsw_ldc_tx_delay; 224 extern int vsw_ldc_tx_retries; 225 extern int vsw_ldc_retries; 226 extern int vsw_ldc_delay; 227 extern boolean_t vsw_ldc_rxthr_enabled; 228 extern boolean_t vsw_ldc_txthr_enabled; 229 extern uint32_t vsw_ntxds; 230 extern uint32_t vsw_max_tx_qcount; 231 extern uint32_t vsw_chain_len; 232 extern uint32_t vsw_mblk_size1; 233 extern uint32_t vsw_mblk_size2; 234 extern uint32_t vsw_mblk_size3; 235 extern uint32_t vsw_mblk_size4; 236 extern uint32_t vsw_num_mblks1; 237 extern uint32_t vsw_num_mblks2; 238 extern uint32_t vsw_num_mblks3; 239 extern uint32_t vsw_num_mblks4; 240 extern boolean_t vsw_obp_ver_proto_workaround; 241 extern uint32_t vsw_publish_macaddr_count; 242 extern boolean_t vsw_jumbo_rxpools; 243 244 #define LDC_ENTER_LOCK(ldcp) \ 245 mutex_enter(&((ldcp)->ldc_cblock));\ 246 mutex_enter(&((ldcp)->ldc_rxlock));\ 247 mutex_enter(&((ldcp)->ldc_txlock)); 248 #define LDC_EXIT_LOCK(ldcp) \ 249 mutex_exit(&((ldcp)->ldc_txlock));\ 250 mutex_exit(&((ldcp)->ldc_rxlock));\ 251 mutex_exit(&((ldcp)->ldc_cblock)); 252 253 #define VSW_VER_EQ(ldcp, major, minor) \ 254 ((ldcp)->lane_out.ver_major == (major) && \ 255 (ldcp)->lane_out.ver_minor == (minor)) 256 257 #define VSW_VER_LT(ldcp, major, minor) \ 258 (((ldcp)->lane_out.ver_major < (major)) || \ 259 ((ldcp)->lane_out.ver_major == (major) && \ 260 (ldcp)->lane_out.ver_minor < (minor))) 261 262 #define VSW_VER_GTEQ(ldcp, major, minor) \ 263 (((ldcp)->lane_out.ver_major > (major)) || \ 264 ((ldcp)->lane_out.ver_major == (major) && \ 265 (ldcp)->lane_out.ver_minor >= (minor))) 266 267 /* 268 * VIO Protocol Version Info: 269 * 270 * The version specified below represents the version of protocol currently 271 * supported in the driver. It means the driver can negotiate with peers with 272 * versions <= this version. Here is a summary of the feature(s) that are 273 * supported at each version of the protocol: 274 * 275 * 1.0 Basic VIO protocol. 276 * 1.1 vDisk protocol update (no virtual network update). 277 * 1.2 Support for priority frames (priority-ether-types). 278 * 1.3 VLAN and HybridIO support. 279 * 1.4 Jumbo Frame support. 280 * 1.5 Link State Notification support with optional support 281 * for Physical Link information. 282 */ 283 static ver_sup_t vsw_versions[] = { {1, 5} }; 284 285 /* 286 * For the moment the state dump routines have their own 287 * private flag. 288 */ 289 #define DUMP_STATE 0 290 291 #if DUMP_STATE 292 293 #define DUMP_TAG(tag) \ 294 { \ 295 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 296 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 297 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 298 } 299 300 #define DUMP_TAG_PTR(tag) \ 301 { \ 302 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 303 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 304 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 305 } 306 307 #define DUMP_FLAGS(flags) dump_flags(flags); 308 #define DISPLAY_STATE() display_state() 309 310 #else 311 312 #define DUMP_TAG(tag) 313 #define DUMP_TAG_PTR(tag) 314 #define DUMP_FLAGS(state) 315 #define DISPLAY_STATE() 316 317 #endif /* DUMP_STATE */ 318 319 /* 320 * Attach the specified port. 321 * 322 * Returns 0 on success, 1 on failure. 323 */ 324 int 325 vsw_port_attach(vsw_port_t *port) 326 { 327 vsw_t *vswp = port->p_vswp; 328 vsw_port_list_t *plist = &vswp->plist; 329 vsw_port_t *p, **pp; 330 int i; 331 int nids = port->num_ldcs; 332 uint64_t *ldcids; 333 int rv; 334 335 D1(vswp, "%s: enter : port %d", __func__, port->p_instance); 336 337 /* port already exists? */ 338 READ_ENTER(&plist->lockrw); 339 for (p = plist->head; p != NULL; p = p->p_next) { 340 if (p->p_instance == port->p_instance) { 341 DWARN(vswp, "%s: port instance %d already attached", 342 __func__, p->p_instance); 343 RW_EXIT(&plist->lockrw); 344 return (1); 345 } 346 } 347 RW_EXIT(&plist->lockrw); 348 349 rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 350 351 mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 352 mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 353 rw_init(&port->maccl_rwlock, NULL, RW_DRIVER, NULL); 354 355 mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 356 cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 357 port->state = VSW_PORT_INIT; 358 359 D2(vswp, "%s: %d nids", __func__, nids); 360 ldcids = port->ldc_ids; 361 for (i = 0; i < nids; i++) { 362 D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 363 if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 364 DERR(vswp, "%s: ldc_attach failed", __func__); 365 goto exit_error; 366 } 367 } 368 369 if (vswp->switching_setup_done == B_TRUE) { 370 /* 371 * If the underlying network device has been setup, 372 * then open a mac client and porgram the mac address 373 * for this port. 374 */ 375 rv = vsw_mac_client_init(vswp, port, VSW_VNETPORT); 376 if (rv != 0) { 377 goto exit_error; 378 } 379 } 380 381 /* create the fdb entry for this port/mac address */ 382 vsw_fdbe_add(vswp, port); 383 384 vsw_create_vlans(port, VSW_VNETPORT); 385 386 WRITE_ENTER(&plist->lockrw); 387 388 /* link it into the list of ports for this vsw instance */ 389 pp = (vsw_port_t **)(&plist->head); 390 port->p_next = *pp; 391 *pp = port; 392 plist->num_ports++; 393 394 RW_EXIT(&plist->lockrw); 395 396 /* 397 * Initialise the port and any ldc's under it. 398 */ 399 (void) vsw_init_ldcs(port); 400 401 /* announce macaddr of vnet to the physical switch */ 402 if (vsw_publish_macaddr_count != 0) { /* enabled */ 403 vsw_publish_macaddr(vswp, port); 404 } 405 406 D1(vswp, "%s: exit", __func__); 407 return (0); 408 409 exit_error: 410 rw_destroy(&port->p_ldclist.lockrw); 411 412 cv_destroy(&port->state_cv); 413 mutex_destroy(&port->state_lock); 414 415 rw_destroy(&port->maccl_rwlock); 416 mutex_destroy(&port->tx_lock); 417 mutex_destroy(&port->mca_lock); 418 kmem_free(port, sizeof (vsw_port_t)); 419 return (1); 420 } 421 422 /* 423 * Detach the specified port. 424 * 425 * Returns 0 on success, 1 on failure. 426 */ 427 int 428 vsw_port_detach(vsw_t *vswp, int p_instance) 429 { 430 vsw_port_t *port = NULL; 431 vsw_port_list_t *plist = &vswp->plist; 432 433 D1(vswp, "%s: enter: port id %d", __func__, p_instance); 434 435 WRITE_ENTER(&plist->lockrw); 436 437 if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 438 RW_EXIT(&plist->lockrw); 439 return (1); 440 } 441 442 if (vsw_plist_del_node(vswp, port)) { 443 RW_EXIT(&plist->lockrw); 444 return (1); 445 } 446 447 /* cleanup any HybridIO for this port */ 448 vsw_hio_stop_port(port); 449 450 /* 451 * No longer need to hold writer lock on port list now 452 * that we have unlinked the target port from the list. 453 */ 454 RW_EXIT(&plist->lockrw); 455 456 /* Cleanup and close the mac client */ 457 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 458 459 /* Remove the fdb entry for this port/mac address */ 460 vsw_fdbe_del(vswp, &(port->p_macaddr)); 461 vsw_destroy_vlans(port, VSW_VNETPORT); 462 463 /* Remove any multicast addresses.. */ 464 vsw_del_mcst_port(port); 465 466 vsw_port_delete(port); 467 468 D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 469 return (0); 470 } 471 472 /* 473 * Detach all active ports. 474 */ 475 void 476 vsw_detach_ports(vsw_t *vswp) 477 { 478 vsw_port_list_t *plist = &vswp->plist; 479 vsw_port_t *port = NULL; 480 481 D1(vswp, "%s: enter", __func__); 482 483 WRITE_ENTER(&plist->lockrw); 484 485 while ((port = plist->head) != NULL) { 486 (void) vsw_plist_del_node(vswp, port); 487 488 /* cleanup any HybridIO for this port */ 489 vsw_hio_stop_port(port); 490 491 /* Cleanup and close the mac client */ 492 vsw_mac_client_cleanup(vswp, port, VSW_VNETPORT); 493 494 /* Remove the fdb entry for this port/mac address */ 495 vsw_fdbe_del(vswp, &(port->p_macaddr)); 496 vsw_destroy_vlans(port, VSW_VNETPORT); 497 498 /* Remove any multicast addresses.. */ 499 vsw_del_mcst_port(port); 500 501 /* 502 * No longer need to hold the lock on the port list 503 * now that we have unlinked the target port from the 504 * list. 505 */ 506 RW_EXIT(&plist->lockrw); 507 vsw_port_delete(port); 508 WRITE_ENTER(&plist->lockrw); 509 } 510 RW_EXIT(&plist->lockrw); 511 512 D1(vswp, "%s: exit", __func__); 513 } 514 515 /* 516 * Delete the specified port. 517 */ 518 static void 519 vsw_port_delete(vsw_port_t *port) 520 { 521 vsw_ldc_list_t *ldcl; 522 vsw_t *vswp = port->p_vswp; 523 int num_ldcs; 524 525 D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 526 527 vsw_uninit_ldcs(port); 528 529 /* 530 * Wait for any pending ctrl msg tasks which reference this 531 * port to finish. 532 */ 533 vsw_drain_port_taskq(port); 534 535 /* 536 * Wait for any active callbacks to finish 537 */ 538 vsw_drain_ldcs(port); 539 540 ldcl = &port->p_ldclist; 541 num_ldcs = port->num_ldcs; 542 WRITE_ENTER(&ldcl->lockrw); 543 while (num_ldcs > 0) { 544 vsw_ldc_detach(port, ldcl->head->ldc_id); 545 num_ldcs--; 546 } 547 RW_EXIT(&ldcl->lockrw); 548 549 rw_destroy(&port->p_ldclist.lockrw); 550 551 rw_destroy(&port->maccl_rwlock); 552 mutex_destroy(&port->mca_lock); 553 mutex_destroy(&port->tx_lock); 554 555 cv_destroy(&port->state_cv); 556 mutex_destroy(&port->state_lock); 557 558 if (port->num_ldcs != 0) { 559 kmem_free(port->ldc_ids, port->num_ldcs * sizeof (uint64_t)); 560 port->num_ldcs = 0; 561 } 562 563 if (port->nvids != 0) { 564 kmem_free(port->vids, sizeof (vsw_vlanid_t) * port->nvids); 565 } 566 567 kmem_free(port, sizeof (vsw_port_t)); 568 569 D1(vswp, "%s: exit", __func__); 570 } 571 572 static int 573 vsw_init_multipools(vsw_ldc_t *ldcp, vsw_t *vswp) 574 { 575 size_t data_sz; 576 int rv; 577 uint32_t sz1 = 0; 578 uint32_t sz2 = 0; 579 uint32_t sz3 = 0; 580 uint32_t sz4 = 0; 581 582 /* 583 * We round up the mtu specified to be a multiple of 2K to limit the 584 * number of rx buffer pools created for a given mtu. 585 */ 586 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 587 data_sz = VNET_ROUNDUP_2K(data_sz); 588 589 /* 590 * If pool sizes are specified, use them. Note that the presence of 591 * the first tunable will be used as a hint. 592 */ 593 if (vsw_mblk_size1 != 0) { 594 sz1 = vsw_mblk_size1; 595 sz2 = vsw_mblk_size2; 596 sz3 = vsw_mblk_size3; 597 sz4 = vsw_mblk_size4; 598 599 if (sz4 == 0) { /* need 3 pools */ 600 601 ldcp->max_rxpool_size = sz3; 602 rv = vio_init_multipools(&ldcp->vmp, 603 VSW_NUM_VMPOOLS, sz1, sz2, sz3, 604 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 605 606 } else { 607 608 ldcp->max_rxpool_size = sz4; 609 rv = vio_init_multipools(&ldcp->vmp, 610 VSW_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 611 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 612 vsw_num_mblks4); 613 614 } 615 616 return (rv); 617 } 618 619 /* 620 * Pool sizes are not specified. We select the pool sizes based on the 621 * mtu if vnet_jumbo_rxpools is enabled. 622 */ 623 if (vsw_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 624 /* 625 * Receive buffer pool allocation based on mtu is disabled. 626 * Use the default mechanism of standard size pool allocation. 627 */ 628 sz1 = VSW_MBLK_SZ_128; 629 sz2 = VSW_MBLK_SZ_256; 630 sz3 = VSW_MBLK_SZ_2048; 631 ldcp->max_rxpool_size = sz3; 632 633 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS, 634 sz1, sz2, sz3, 635 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3); 636 637 return (rv); 638 } 639 640 switch (data_sz) { 641 642 case VNET_4K: 643 644 sz1 = VSW_MBLK_SZ_128; 645 sz2 = VSW_MBLK_SZ_256; 646 sz3 = VSW_MBLK_SZ_2048; 647 sz4 = sz3 << 1; /* 4K */ 648 ldcp->max_rxpool_size = sz4; 649 650 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 651 sz1, sz2, sz3, sz4, 652 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 653 vsw_num_mblks4); 654 break; 655 656 default: /* data_sz: 4K+ to 16K */ 657 658 sz1 = VSW_MBLK_SZ_256; 659 sz2 = VSW_MBLK_SZ_2048; 660 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 661 sz4 = data_sz; /* Jumbo-size */ 662 ldcp->max_rxpool_size = sz4; 663 664 rv = vio_init_multipools(&ldcp->vmp, VSW_NUM_VMPOOLS + 1, 665 sz1, sz2, sz3, sz4, 666 vsw_num_mblks1, vsw_num_mblks2, vsw_num_mblks3, 667 vsw_num_mblks4); 668 break; 669 } 670 671 return (rv); 672 673 } 674 675 /* 676 * Attach a logical domain channel (ldc) under a specified port. 677 * 678 * Returns 0 on success, 1 on failure. 679 */ 680 static int 681 vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 682 { 683 vsw_t *vswp = port->p_vswp; 684 vsw_ldc_list_t *ldcl = &port->p_ldclist; 685 vsw_ldc_t *ldcp = NULL; 686 ldc_attr_t attr; 687 ldc_status_t istatus; 688 int status = DDI_FAILURE; 689 char kname[MAXNAMELEN]; 690 enum { PROG_init = 0x0, 691 PROG_callback = 0x1, PROG_rx_thread = 0x2, 692 PROG_tx_thread = 0x4} 693 progress; 694 695 progress = PROG_init; 696 697 D1(vswp, "%s: enter", __func__); 698 699 ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 700 if (ldcp == NULL) { 701 DERR(vswp, "%s: kmem_zalloc failed", __func__); 702 return (1); 703 } 704 ldcp->ldc_id = ldc_id; 705 706 mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 707 mutex_init(&ldcp->ldc_rxlock, NULL, MUTEX_DRIVER, NULL); 708 mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 709 mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 710 cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 711 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 712 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 713 714 /* required for handshake with peer */ 715 ldcp->local_session = (uint64_t)ddi_get_lbolt(); 716 ldcp->peer_session = 0; 717 ldcp->session_status = 0; 718 ldcp->hss_id = 1; /* Initial handshake session id */ 719 720 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 721 722 /* only set for outbound lane, inbound set by peer */ 723 vsw_set_lane_attr(vswp, &ldcp->lane_out); 724 725 attr.devclass = LDC_DEV_NT_SVC; 726 attr.instance = ddi_get_instance(vswp->dip); 727 attr.mode = LDC_MODE_UNRELIABLE; 728 attr.mtu = VSW_LDC_MTU; 729 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 730 if (status != 0) { 731 DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 732 __func__, ldc_id, status); 733 goto ldc_attach_fail; 734 } 735 736 if (vsw_ldc_rxthr_enabled) { 737 ldcp->rx_thr_flags = 0; 738 739 mutex_init(&ldcp->rx_thr_lock, NULL, MUTEX_DRIVER, NULL); 740 cv_init(&ldcp->rx_thr_cv, NULL, CV_DRIVER, NULL); 741 ldcp->rx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 742 vsw_ldc_rx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 743 744 progress |= PROG_rx_thread; 745 if (ldcp->rx_thread == NULL) { 746 DWARN(vswp, "%s(%lld): Failed to create worker thread", 747 __func__, ldc_id); 748 goto ldc_attach_fail; 749 } 750 } 751 752 if (vsw_ldc_txthr_enabled) { 753 ldcp->tx_thr_flags = 0; 754 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 755 756 mutex_init(&ldcp->tx_thr_lock, NULL, MUTEX_DRIVER, NULL); 757 cv_init(&ldcp->tx_thr_cv, NULL, CV_DRIVER, NULL); 758 ldcp->tx_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 759 vsw_ldc_tx_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 760 761 progress |= PROG_tx_thread; 762 if (ldcp->tx_thread == NULL) { 763 DWARN(vswp, "%s(%lld): Failed to create worker thread", 764 __func__, ldc_id); 765 goto ldc_attach_fail; 766 } 767 } 768 769 status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 770 if (status != 0) { 771 DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 772 __func__, ldc_id, status); 773 (void) ldc_fini(ldcp->ldc_handle); 774 goto ldc_attach_fail; 775 } 776 /* 777 * allocate a message for ldc_read()s, big enough to hold ctrl and 778 * data msgs, including raw data msgs used to recv priority frames. 779 */ 780 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vswp->max_frame_size; 781 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 782 783 progress |= PROG_callback; 784 785 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 786 787 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 788 DERR(vswp, "%s: ldc_status failed", __func__); 789 mutex_destroy(&ldcp->status_lock); 790 goto ldc_attach_fail; 791 } 792 793 ldcp->ldc_status = istatus; 794 ldcp->ldc_port = port; 795 ldcp->ldc_vswp = vswp; 796 797 vsw_reset_vnet_proto_ops(ldcp); 798 799 (void) sprintf(kname, "%sldc0x%lx", DRV_NAME, ldcp->ldc_id); 800 ldcp->ksp = vgen_setup_kstats(DRV_NAME, vswp->instance, 801 kname, &ldcp->ldc_stats); 802 if (ldcp->ksp == NULL) { 803 DERR(vswp, "%s: kstats setup failed", __func__); 804 goto ldc_attach_fail; 805 } 806 807 /* link it into the list of channels for this port */ 808 WRITE_ENTER(&ldcl->lockrw); 809 ldcp->ldc_next = ldcl->head; 810 ldcl->head = ldcp; 811 RW_EXIT(&ldcl->lockrw); 812 813 D1(vswp, "%s: exit", __func__); 814 return (0); 815 816 ldc_attach_fail: 817 818 if (progress & PROG_callback) { 819 (void) ldc_unreg_callback(ldcp->ldc_handle); 820 kmem_free(ldcp->ldcmsg, ldcp->msglen); 821 } 822 823 if (progress & PROG_rx_thread) { 824 if (ldcp->rx_thread != NULL) { 825 vsw_stop_rx_thread(ldcp); 826 } 827 mutex_destroy(&ldcp->rx_thr_lock); 828 cv_destroy(&ldcp->rx_thr_cv); 829 } 830 831 if (progress & PROG_tx_thread) { 832 if (ldcp->tx_thread != NULL) { 833 vsw_stop_tx_thread(ldcp); 834 } 835 mutex_destroy(&ldcp->tx_thr_lock); 836 cv_destroy(&ldcp->tx_thr_cv); 837 } 838 if (ldcp->ksp != NULL) { 839 vgen_destroy_kstats(ldcp->ksp); 840 } 841 mutex_destroy(&ldcp->ldc_txlock); 842 mutex_destroy(&ldcp->ldc_rxlock); 843 mutex_destroy(&ldcp->ldc_cblock); 844 mutex_destroy(&ldcp->drain_cv_lock); 845 846 cv_destroy(&ldcp->drain_cv); 847 848 rw_destroy(&ldcp->lane_in.dlistrw); 849 rw_destroy(&ldcp->lane_out.dlistrw); 850 851 kmem_free(ldcp, sizeof (vsw_ldc_t)); 852 853 return (1); 854 } 855 856 /* 857 * Detach a logical domain channel (ldc) belonging to a 858 * particular port. 859 */ 860 static void 861 vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 862 { 863 vsw_t *vswp = port->p_vswp; 864 vsw_ldc_t *ldcp, *prev_ldcp; 865 vsw_ldc_list_t *ldcl = &port->p_ldclist; 866 int rv; 867 int retries = 0; 868 869 prev_ldcp = ldcl->head; 870 for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 871 if (ldcp->ldc_id == ldc_id) { 872 break; 873 } 874 } 875 876 /* specified ldc id not found */ 877 ASSERT(ldcp != NULL); 878 879 D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 880 881 /* Stop the receive thread */ 882 if (ldcp->rx_thread != NULL) { 883 vsw_stop_rx_thread(ldcp); 884 mutex_destroy(&ldcp->rx_thr_lock); 885 cv_destroy(&ldcp->rx_thr_cv); 886 } 887 kmem_free(ldcp->ldcmsg, ldcp->msglen); 888 889 /* Stop the tx thread */ 890 if (ldcp->tx_thread != NULL) { 891 vsw_stop_tx_thread(ldcp); 892 mutex_destroy(&ldcp->tx_thr_lock); 893 cv_destroy(&ldcp->tx_thr_cv); 894 if (ldcp->tx_mhead != NULL) { 895 freemsgchain(ldcp->tx_mhead); 896 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 897 ldcp->tx_cnt = 0; 898 } 899 } 900 901 /* Destory kstats */ 902 vgen_destroy_kstats(ldcp->ksp); 903 904 /* 905 * Before we can close the channel we must release any mapped 906 * resources (e.g. drings). 907 */ 908 vsw_free_lane_resources(ldcp, INBOUND); 909 vsw_free_lane_resources(ldcp, OUTBOUND); 910 911 /* 912 * Close the channel, retry on EAAGIN. 913 */ 914 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 915 if (++retries > vsw_ldc_retries) { 916 break; 917 } 918 drv_usecwait(vsw_ldc_delay); 919 } 920 if (rv != 0) { 921 cmn_err(CE_NOTE, 922 "!vsw%d: Error(%d) closing the channel(0x%lx)\n", 923 vswp->instance, rv, ldcp->ldc_id); 924 } 925 926 (void) ldc_fini(ldcp->ldc_handle); 927 928 ldcp->ldc_status = LDC_INIT; 929 ldcp->ldc_handle = NULL; 930 ldcp->ldc_vswp = NULL; 931 932 933 /* 934 * Most likely some mblks are still in use and 935 * have not been returned to the pool. These mblks are 936 * added to the pool that is maintained in the device instance. 937 * Another attempt will be made to destroy the pool 938 * when the device detaches. 939 */ 940 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 941 942 /* unlink it from the list */ 943 prev_ldcp = ldcp->ldc_next; 944 945 mutex_destroy(&ldcp->ldc_txlock); 946 mutex_destroy(&ldcp->ldc_rxlock); 947 mutex_destroy(&ldcp->ldc_cblock); 948 cv_destroy(&ldcp->drain_cv); 949 mutex_destroy(&ldcp->drain_cv_lock); 950 mutex_destroy(&ldcp->status_lock); 951 rw_destroy(&ldcp->lane_in.dlistrw); 952 rw_destroy(&ldcp->lane_out.dlistrw); 953 954 kmem_free(ldcp, sizeof (vsw_ldc_t)); 955 } 956 957 /* 958 * Open and attempt to bring up the channel. Note that channel 959 * can only be brought up if peer has also opened channel. 960 * 961 * Returns 0 if can open and bring up channel, otherwise 962 * returns 1. 963 */ 964 static int 965 vsw_ldc_init(vsw_ldc_t *ldcp) 966 { 967 vsw_t *vswp = ldcp->ldc_vswp; 968 ldc_status_t istatus = 0; 969 int rv; 970 971 D1(vswp, "%s: enter", __func__); 972 973 LDC_ENTER_LOCK(ldcp); 974 975 /* don't start at 0 in case clients don't like that */ 976 ldcp->next_ident = 1; 977 978 rv = ldc_open(ldcp->ldc_handle); 979 if (rv != 0) { 980 DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 981 __func__, ldcp->ldc_id, rv); 982 LDC_EXIT_LOCK(ldcp); 983 return (1); 984 } 985 986 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 987 DERR(vswp, "%s: unable to get status", __func__); 988 LDC_EXIT_LOCK(ldcp); 989 return (1); 990 991 } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 992 DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 993 __func__, ldcp->ldc_id, istatus); 994 LDC_EXIT_LOCK(ldcp); 995 return (1); 996 } 997 998 mutex_enter(&ldcp->status_lock); 999 ldcp->ldc_status = istatus; 1000 mutex_exit(&ldcp->status_lock); 1001 1002 rv = ldc_up(ldcp->ldc_handle); 1003 if (rv != 0) { 1004 /* 1005 * Not a fatal error for ldc_up() to fail, as peer 1006 * end point may simply not be ready yet. 1007 */ 1008 D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 1009 ldcp->ldc_id, rv); 1010 LDC_EXIT_LOCK(ldcp); 1011 return (1); 1012 } 1013 1014 /* 1015 * ldc_up() call is non-blocking so need to explicitly 1016 * check channel status to see if in fact the channel 1017 * is UP. 1018 */ 1019 mutex_enter(&ldcp->status_lock); 1020 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 1021 DERR(vswp, "%s: unable to get status", __func__); 1022 mutex_exit(&ldcp->status_lock); 1023 LDC_EXIT_LOCK(ldcp); 1024 return (1); 1025 1026 } 1027 1028 if (ldcp->ldc_status == LDC_UP) { 1029 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 1030 ldcp->ldc_id, istatus); 1031 mutex_exit(&ldcp->status_lock); 1032 LDC_EXIT_LOCK(ldcp); 1033 1034 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1035 return (0); 1036 } 1037 1038 mutex_exit(&ldcp->status_lock); 1039 LDC_EXIT_LOCK(ldcp); 1040 1041 D1(vswp, "%s: exit", __func__); 1042 return (0); 1043 } 1044 1045 /* disable callbacks on the channel */ 1046 static void 1047 vsw_ldc_uninit(vsw_ldc_t *ldcp) 1048 { 1049 vsw_t *vswp = ldcp->ldc_vswp; 1050 int rv; 1051 1052 D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 1053 1054 LDC_ENTER_LOCK(ldcp); 1055 1056 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 1057 if (rv != 0) { 1058 cmn_err(CE_NOTE, "!vsw_ldc_uninit(%ld): error disabling " 1059 "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 1060 } 1061 1062 mutex_enter(&ldcp->status_lock); 1063 ldcp->ldc_status = LDC_INIT; 1064 mutex_exit(&ldcp->status_lock); 1065 1066 LDC_EXIT_LOCK(ldcp); 1067 1068 D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 1069 } 1070 1071 static int 1072 vsw_init_ldcs(vsw_port_t *port) 1073 { 1074 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1075 vsw_ldc_t *ldcp; 1076 1077 READ_ENTER(&ldcl->lockrw); 1078 ldcp = ldcl->head; 1079 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1080 (void) vsw_ldc_init(ldcp); 1081 } 1082 RW_EXIT(&ldcl->lockrw); 1083 1084 return (0); 1085 } 1086 1087 static void 1088 vsw_uninit_ldcs(vsw_port_t *port) 1089 { 1090 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1091 vsw_ldc_t *ldcp; 1092 1093 D1(NULL, "vsw_uninit_ldcs: enter\n"); 1094 1095 READ_ENTER(&ldcl->lockrw); 1096 ldcp = ldcl->head; 1097 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1098 vsw_ldc_uninit(ldcp); 1099 } 1100 RW_EXIT(&ldcl->lockrw); 1101 1102 D1(NULL, "vsw_uninit_ldcs: exit\n"); 1103 } 1104 1105 /* 1106 * Wait until the callback(s) associated with the ldcs under the specified 1107 * port have completed. 1108 * 1109 * Prior to this function being invoked each channel under this port 1110 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1111 * 1112 * A short explaination of what we are doing below.. 1113 * 1114 * The simplest approach would be to have a reference counter in 1115 * the ldc structure which is increment/decremented by the callbacks as 1116 * they use the channel. The drain function could then simply disable any 1117 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 1118 * there is a tiny window here - before the callback is able to get the lock 1119 * on the channel it is interrupted and this function gets to execute. It 1120 * sees that the ref count is zero and believes its free to delete the 1121 * associated data structures. 1122 * 1123 * We get around this by taking advantage of the fact that before the ldc 1124 * framework invokes a callback it sets a flag to indicate that there is a 1125 * callback active (or about to become active). If when we attempt to 1126 * unregister a callback when this active flag is set then the unregister 1127 * will fail with EWOULDBLOCK. 1128 * 1129 * If the unregister fails we do a cv_timedwait. We will either be signaled 1130 * by the callback as it is exiting (note we have to wait a short period to 1131 * allow the callback to return fully to the ldc framework and it to clear 1132 * the active flag), or by the timer expiring. In either case we again attempt 1133 * the unregister. We repeat this until we can succesfully unregister the 1134 * callback. 1135 * 1136 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 1137 * the case where the callback has finished but the ldc framework has not yet 1138 * cleared the active flag. In this case we would never get a cv_signal. 1139 */ 1140 static void 1141 vsw_drain_ldcs(vsw_port_t *port) 1142 { 1143 vsw_ldc_list_t *ldcl = &port->p_ldclist; 1144 vsw_ldc_t *ldcp; 1145 vsw_t *vswp = port->p_vswp; 1146 1147 D1(vswp, "%s: enter", __func__); 1148 1149 READ_ENTER(&ldcl->lockrw); 1150 1151 ldcp = ldcl->head; 1152 1153 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 1154 /* 1155 * If we can unregister the channel callback then we 1156 * know that there is no callback either running or 1157 * scheduled to run for this channel so move on to next 1158 * channel in the list. 1159 */ 1160 mutex_enter(&ldcp->drain_cv_lock); 1161 1162 /* prompt active callbacks to quit */ 1163 ldcp->drain_state = VSW_LDC_DRAINING; 1164 1165 if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 1166 D2(vswp, "%s: unreg callback for chan %ld", __func__, 1167 ldcp->ldc_id); 1168 mutex_exit(&ldcp->drain_cv_lock); 1169 continue; 1170 } else { 1171 /* 1172 * If we end up here we know that either 1) a callback 1173 * is currently executing, 2) is about to start (i.e. 1174 * the ldc framework has set the active flag but 1175 * has not actually invoked the callback yet, or 3) 1176 * has finished and has returned to the ldc framework 1177 * but the ldc framework has not yet cleared the 1178 * active bit. 1179 * 1180 * Wait for it to finish. 1181 */ 1182 while (ldc_unreg_callback(ldcp->ldc_handle) 1183 == EWOULDBLOCK) 1184 (void) cv_timedwait(&ldcp->drain_cv, 1185 &ldcp->drain_cv_lock, lbolt + hz); 1186 1187 mutex_exit(&ldcp->drain_cv_lock); 1188 D2(vswp, "%s: unreg callback for chan %ld after " 1189 "timeout", __func__, ldcp->ldc_id); 1190 } 1191 } 1192 RW_EXIT(&ldcl->lockrw); 1193 1194 D1(vswp, "%s: exit", __func__); 1195 } 1196 1197 /* 1198 * Wait until all tasks which reference this port have completed. 1199 * 1200 * Prior to this function being invoked each channel under this port 1201 * should have been quiesced via ldc_set_cb_mode(DISABLE). 1202 */ 1203 static void 1204 vsw_drain_port_taskq(vsw_port_t *port) 1205 { 1206 vsw_t *vswp = port->p_vswp; 1207 1208 D1(vswp, "%s: enter", __func__); 1209 1210 /* 1211 * Mark the port as in the process of being detached, and 1212 * dispatch a marker task to the queue so we know when all 1213 * relevant tasks have completed. 1214 */ 1215 mutex_enter(&port->state_lock); 1216 port->state = VSW_PORT_DETACHING; 1217 1218 if ((vswp->taskq_p == NULL) || 1219 (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 1220 port, DDI_NOSLEEP) != DDI_SUCCESS)) { 1221 cmn_err(CE_NOTE, "!vsw%d: unable to dispatch marker task", 1222 vswp->instance); 1223 mutex_exit(&port->state_lock); 1224 return; 1225 } 1226 1227 /* 1228 * Wait for the marker task to finish. 1229 */ 1230 while (port->state != VSW_PORT_DETACHABLE) 1231 cv_wait(&port->state_cv, &port->state_lock); 1232 1233 mutex_exit(&port->state_lock); 1234 1235 D1(vswp, "%s: exit", __func__); 1236 } 1237 1238 static void 1239 vsw_marker_task(void *arg) 1240 { 1241 vsw_port_t *port = arg; 1242 vsw_t *vswp = port->p_vswp; 1243 1244 D1(vswp, "%s: enter", __func__); 1245 1246 mutex_enter(&port->state_lock); 1247 1248 /* 1249 * No further tasks should be dispatched which reference 1250 * this port so ok to mark it as safe to detach. 1251 */ 1252 port->state = VSW_PORT_DETACHABLE; 1253 1254 cv_signal(&port->state_cv); 1255 1256 mutex_exit(&port->state_lock); 1257 1258 D1(vswp, "%s: exit", __func__); 1259 } 1260 1261 vsw_port_t * 1262 vsw_lookup_port(vsw_t *vswp, int p_instance) 1263 { 1264 vsw_port_list_t *plist = &vswp->plist; 1265 vsw_port_t *port; 1266 1267 for (port = plist->head; port != NULL; port = port->p_next) { 1268 if (port->p_instance == p_instance) { 1269 D2(vswp, "vsw_lookup_port: found p_instance\n"); 1270 return (port); 1271 } 1272 } 1273 1274 return (NULL); 1275 } 1276 1277 void 1278 vsw_vlan_unaware_port_reset(vsw_port_t *portp) 1279 { 1280 vsw_ldc_list_t *ldclp; 1281 vsw_ldc_t *ldcp; 1282 1283 ldclp = &portp->p_ldclist; 1284 1285 READ_ENTER(&ldclp->lockrw); 1286 1287 /* 1288 * NOTE: for now, we will assume we have a single channel. 1289 */ 1290 if (ldclp->head == NULL) { 1291 RW_EXIT(&ldclp->lockrw); 1292 return; 1293 } 1294 ldcp = ldclp->head; 1295 1296 mutex_enter(&ldcp->ldc_cblock); 1297 1298 /* 1299 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 1300 * the connection. See comments in vsw_set_vnet_proto_ops(). 1301 */ 1302 if (ldcp->hphase == VSW_MILESTONE4 && VSW_VER_LT(ldcp, 1, 3) && 1303 portp->nvids != 0) { 1304 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1305 } 1306 1307 mutex_exit(&ldcp->ldc_cblock); 1308 1309 RW_EXIT(&ldclp->lockrw); 1310 } 1311 1312 void 1313 vsw_hio_port_reset(vsw_port_t *portp, boolean_t immediate) 1314 { 1315 vsw_ldc_list_t *ldclp; 1316 vsw_ldc_t *ldcp; 1317 1318 ldclp = &portp->p_ldclist; 1319 1320 READ_ENTER(&ldclp->lockrw); 1321 1322 /* 1323 * NOTE: for now, we will assume we have a single channel. 1324 */ 1325 if (ldclp->head == NULL) { 1326 RW_EXIT(&ldclp->lockrw); 1327 return; 1328 } 1329 ldcp = ldclp->head; 1330 1331 mutex_enter(&ldcp->ldc_cblock); 1332 1333 /* 1334 * If the peer is HybridIO capable (ver >= 1.3), reset channel 1335 * to trigger re-negotiation, which inturn trigger HybridIO 1336 * setup/cleanup. 1337 */ 1338 if ((ldcp->hphase == VSW_MILESTONE4) && 1339 (portp->p_hio_capable == B_TRUE)) { 1340 if (immediate == B_TRUE) { 1341 (void) ldc_down(ldcp->ldc_handle); 1342 } else { 1343 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1344 } 1345 } 1346 1347 mutex_exit(&ldcp->ldc_cblock); 1348 1349 RW_EXIT(&ldclp->lockrw); 1350 } 1351 1352 void 1353 vsw_port_reset(vsw_port_t *portp) 1354 { 1355 vsw_ldc_list_t *ldclp; 1356 vsw_ldc_t *ldcp; 1357 1358 ldclp = &portp->p_ldclist; 1359 1360 READ_ENTER(&ldclp->lockrw); 1361 1362 /* 1363 * NOTE: for now, we will assume we have a single channel. 1364 */ 1365 if (ldclp->head == NULL) { 1366 RW_EXIT(&ldclp->lockrw); 1367 return; 1368 } 1369 ldcp = ldclp->head; 1370 1371 mutex_enter(&ldcp->ldc_cblock); 1372 1373 /* 1374 * reset channel and terminate the connection. 1375 */ 1376 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1377 1378 mutex_exit(&ldcp->ldc_cblock); 1379 1380 RW_EXIT(&ldclp->lockrw); 1381 } 1382 1383 void 1384 vsw_reset_ports(vsw_t *vswp) 1385 { 1386 vsw_port_list_t *plist = &vswp->plist; 1387 vsw_port_t *portp; 1388 1389 READ_ENTER(&plist->lockrw); 1390 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1391 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1392 vsw_hio_stop_port(portp); 1393 } 1394 vsw_port_reset(portp); 1395 } 1396 RW_EXIT(&plist->lockrw); 1397 } 1398 1399 static void 1400 vsw_send_physlink_msg(vsw_ldc_t *ldcp, link_state_t plink_state) 1401 { 1402 vnet_physlink_msg_t msg; 1403 vnet_physlink_msg_t *msgp = &msg; 1404 uint32_t physlink_info = 0; 1405 1406 if (plink_state == LINK_STATE_UP) { 1407 physlink_info |= VNET_PHYSLINK_STATE_UP; 1408 } else { 1409 physlink_info |= VNET_PHYSLINK_STATE_DOWN; 1410 } 1411 1412 msgp->tag.vio_msgtype = VIO_TYPE_CTRL; 1413 msgp->tag.vio_subtype = VIO_SUBTYPE_INFO; 1414 msgp->tag.vio_subtype_env = VNET_PHYSLINK_INFO; 1415 msgp->tag.vio_sid = ldcp->local_session; 1416 msgp->physlink_info = physlink_info; 1417 1418 (void) vsw_send_msg(ldcp, msgp, sizeof (msg), B_TRUE); 1419 } 1420 1421 static void 1422 vsw_port_physlink_update(vsw_port_t *portp) 1423 { 1424 vsw_ldc_list_t *ldclp; 1425 vsw_ldc_t *ldcp; 1426 vsw_t *vswp; 1427 1428 vswp = portp->p_vswp; 1429 ldclp = &portp->p_ldclist; 1430 1431 READ_ENTER(&ldclp->lockrw); 1432 1433 /* 1434 * NOTE: for now, we will assume we have a single channel. 1435 */ 1436 if (ldclp->head == NULL) { 1437 RW_EXIT(&ldclp->lockrw); 1438 return; 1439 } 1440 ldcp = ldclp->head; 1441 1442 mutex_enter(&ldcp->ldc_cblock); 1443 1444 /* 1445 * If handshake has completed successfully and if the vnet device 1446 * has negotiated to get physical link state updates, send a message 1447 * with the current state. 1448 */ 1449 if (ldcp->hphase == VSW_MILESTONE4 && ldcp->pls_negotiated == B_TRUE) { 1450 vsw_send_physlink_msg(ldcp, vswp->phys_link_state); 1451 } 1452 1453 mutex_exit(&ldcp->ldc_cblock); 1454 1455 RW_EXIT(&ldclp->lockrw); 1456 } 1457 1458 void 1459 vsw_physlink_update_ports(vsw_t *vswp) 1460 { 1461 vsw_port_list_t *plist = &vswp->plist; 1462 vsw_port_t *portp; 1463 1464 READ_ENTER(&plist->lockrw); 1465 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 1466 vsw_port_physlink_update(portp); 1467 } 1468 RW_EXIT(&plist->lockrw); 1469 } 1470 1471 /* 1472 * Search for and remove the specified port from the port 1473 * list. Returns 0 if able to locate and remove port, otherwise 1474 * returns 1. 1475 */ 1476 static int 1477 vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 1478 { 1479 vsw_port_list_t *plist = &vswp->plist; 1480 vsw_port_t *curr_p, *prev_p; 1481 1482 if (plist->head == NULL) 1483 return (1); 1484 1485 curr_p = prev_p = plist->head; 1486 1487 while (curr_p != NULL) { 1488 if (curr_p == port) { 1489 if (prev_p == curr_p) { 1490 plist->head = curr_p->p_next; 1491 } else { 1492 prev_p->p_next = curr_p->p_next; 1493 } 1494 plist->num_ports--; 1495 break; 1496 } else { 1497 prev_p = curr_p; 1498 curr_p = curr_p->p_next; 1499 } 1500 } 1501 return (0); 1502 } 1503 1504 /* 1505 * Interrupt handler for ldc messages. 1506 */ 1507 static uint_t 1508 vsw_ldc_cb(uint64_t event, caddr_t arg) 1509 { 1510 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 1511 vsw_t *vswp = ldcp->ldc_vswp; 1512 1513 D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 1514 1515 mutex_enter(&ldcp->ldc_cblock); 1516 ldcp->ldc_stats.callbacks++; 1517 1518 mutex_enter(&ldcp->status_lock); 1519 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 1520 mutex_exit(&ldcp->status_lock); 1521 mutex_exit(&ldcp->ldc_cblock); 1522 return (LDC_SUCCESS); 1523 } 1524 mutex_exit(&ldcp->status_lock); 1525 1526 if (event & LDC_EVT_UP) { 1527 /* 1528 * Channel has come up. 1529 */ 1530 D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 1531 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1532 1533 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 1534 1535 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1536 } 1537 1538 if (event & LDC_EVT_READ) { 1539 /* 1540 * Data available for reading. 1541 */ 1542 D2(vswp, "%s: id(ld) event(%llx) data READ", 1543 __func__, ldcp->ldc_id, event); 1544 1545 if (ldcp->rx_thread != NULL) { 1546 /* 1547 * If the receive thread is enabled, then 1548 * wakeup the receive thread to process the 1549 * LDC messages. 1550 */ 1551 mutex_exit(&ldcp->ldc_cblock); 1552 mutex_enter(&ldcp->rx_thr_lock); 1553 if (!(ldcp->rx_thr_flags & VSW_WTHR_DATARCVD)) { 1554 ldcp->rx_thr_flags |= VSW_WTHR_DATARCVD; 1555 cv_signal(&ldcp->rx_thr_cv); 1556 } 1557 mutex_exit(&ldcp->rx_thr_lock); 1558 mutex_enter(&ldcp->ldc_cblock); 1559 } else { 1560 vsw_process_pkt(ldcp); 1561 } 1562 1563 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 1564 1565 goto vsw_cb_exit; 1566 } 1567 1568 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 1569 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 1570 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1571 1572 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 1573 } 1574 1575 /* 1576 * Catch either LDC_EVT_WRITE which we don't support or any 1577 * unknown event. 1578 */ 1579 if (event & 1580 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 1581 DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 1582 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 1583 } 1584 1585 vsw_cb_exit: 1586 mutex_exit(&ldcp->ldc_cblock); 1587 1588 /* 1589 * Let the drain function know we are finishing if it 1590 * is waiting. 1591 */ 1592 mutex_enter(&ldcp->drain_cv_lock); 1593 if (ldcp->drain_state == VSW_LDC_DRAINING) 1594 cv_signal(&ldcp->drain_cv); 1595 mutex_exit(&ldcp->drain_cv_lock); 1596 1597 return (LDC_SUCCESS); 1598 } 1599 1600 /* 1601 * Reinitialise data structures associated with the channel. 1602 */ 1603 static void 1604 vsw_ldc_reinit(vsw_ldc_t *ldcp) 1605 { 1606 vsw_t *vswp = ldcp->ldc_vswp; 1607 vsw_port_t *port; 1608 vsw_ldc_list_t *ldcl; 1609 1610 D1(vswp, "%s: enter", __func__); 1611 1612 /* free receive mblk pools for the channel */ 1613 vio_destroy_multipools(&ldcp->vmp, &vswp->rxh); 1614 1615 port = ldcp->ldc_port; 1616 ldcl = &port->p_ldclist; 1617 1618 READ_ENTER(&ldcl->lockrw); 1619 1620 D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 1621 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 1622 1623 vsw_free_lane_resources(ldcp, INBOUND); 1624 vsw_free_lane_resources(ldcp, OUTBOUND); 1625 RW_EXIT(&ldcl->lockrw); 1626 1627 ldcp->lane_in.lstate = 0; 1628 ldcp->lane_out.lstate = 0; 1629 1630 /* Remove the fdb entry for this port/mac address */ 1631 vsw_fdbe_del(vswp, &(port->p_macaddr)); 1632 1633 /* remove the port from vlans it has been assigned to */ 1634 vsw_vlan_remove_ids(port, VSW_VNETPORT); 1635 1636 /* 1637 * Remove parent port from any multicast groups 1638 * it may have registered with. Client must resend 1639 * multicast add command after handshake completes. 1640 */ 1641 vsw_del_mcst_port(port); 1642 1643 ldcp->peer_session = 0; 1644 ldcp->session_status = 0; 1645 ldcp->hcnt = 0; 1646 ldcp->hphase = VSW_MILESTONE0; 1647 1648 vsw_reset_vnet_proto_ops(ldcp); 1649 1650 D1(vswp, "%s: exit", __func__); 1651 } 1652 1653 /* 1654 * Process a connection event. 1655 * 1656 * Note - care must be taken to ensure that this function is 1657 * not called with the dlistrw lock held. 1658 */ 1659 static void 1660 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 1661 { 1662 vsw_t *vswp = ldcp->ldc_vswp; 1663 vsw_conn_evt_t *conn = NULL; 1664 1665 D1(vswp, "%s: enter", __func__); 1666 1667 /* 1668 * Check if either a reset or restart event is pending 1669 * or in progress. If so just return. 1670 * 1671 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 1672 * being received by the callback handler, or a ECONNRESET error 1673 * code being returned from a ldc_read() or ldc_write() call. 1674 * 1675 * A VSW_CONN_RESTART event occurs when some error checking code 1676 * decides that there is a problem with data from the channel, 1677 * and that the handshake should be restarted. 1678 */ 1679 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 1680 (ldstub((uint8_t *)&ldcp->reset_active))) 1681 return; 1682 1683 /* 1684 * If it is an LDC_UP event we first check the recorded 1685 * state of the channel. If this is UP then we know that 1686 * the channel moving to the UP state has already been dealt 1687 * with and don't need to dispatch a new task. 1688 * 1689 * The reason for this check is that when we do a ldc_up(), 1690 * depending on the state of the peer, we may or may not get 1691 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 1692 * every time we do ldc_up() we explicitly check the channel 1693 * status to see has it come up (ldc_up() is asynch and will 1694 * complete at some undefined time), and take the appropriate 1695 * action. 1696 * 1697 * The flip side of this is that we may get a LDC_UP event 1698 * when we have already seen that the channel is up and have 1699 * dealt with that. 1700 */ 1701 mutex_enter(&ldcp->status_lock); 1702 if (evt == VSW_CONN_UP) { 1703 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 1704 mutex_exit(&ldcp->status_lock); 1705 return; 1706 } 1707 } 1708 mutex_exit(&ldcp->status_lock); 1709 1710 /* 1711 * The transaction group id allows us to identify and discard 1712 * any tasks which are still pending on the taskq and refer 1713 * to the handshake session we are about to restart or reset. 1714 * These stale messages no longer have any real meaning. 1715 */ 1716 (void) atomic_inc_32(&ldcp->hss_id); 1717 1718 ASSERT(vswp->taskq_p != NULL); 1719 1720 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 1721 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 1722 " connection event", vswp->instance); 1723 goto err_exit; 1724 } 1725 1726 conn->evt = evt; 1727 conn->ldcp = ldcp; 1728 1729 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 1730 DDI_NOSLEEP) != DDI_SUCCESS) { 1731 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 1732 vswp->instance); 1733 1734 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1735 goto err_exit; 1736 } 1737 1738 D1(vswp, "%s: exit", __func__); 1739 return; 1740 1741 err_exit: 1742 /* 1743 * Have mostly likely failed due to memory shortage. Clear the flag so 1744 * that future requests will at least be attempted and will hopefully 1745 * succeed. 1746 */ 1747 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1748 ldcp->reset_active = 0; 1749 } 1750 1751 /* 1752 * Deal with events relating to a connection. Invoked from a taskq. 1753 */ 1754 static void 1755 vsw_conn_task(void *arg) 1756 { 1757 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 1758 vsw_ldc_t *ldcp = NULL; 1759 vsw_port_t *portp; 1760 vsw_t *vswp = NULL; 1761 uint16_t evt; 1762 ldc_status_t curr_status; 1763 1764 ldcp = conn->ldcp; 1765 evt = conn->evt; 1766 vswp = ldcp->ldc_vswp; 1767 portp = ldcp->ldc_port; 1768 1769 D1(vswp, "%s: enter", __func__); 1770 1771 /* can safely free now have copied out data */ 1772 kmem_free(conn, sizeof (vsw_conn_evt_t)); 1773 1774 mutex_enter(&ldcp->status_lock); 1775 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1776 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1777 "channel %ld", vswp->instance, ldcp->ldc_id); 1778 mutex_exit(&ldcp->status_lock); 1779 return; 1780 } 1781 1782 /* 1783 * If we wish to restart the handshake on this channel, then if 1784 * the channel is UP we bring it DOWN to flush the underlying 1785 * ldc queue. 1786 */ 1787 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 1788 (void) ldc_down(ldcp->ldc_handle); 1789 1790 if ((portp->p_hio_capable) && (portp->p_hio_enabled)) { 1791 vsw_hio_stop(vswp, ldcp); 1792 } 1793 1794 /* 1795 * re-init all the associated data structures. 1796 */ 1797 vsw_ldc_reinit(ldcp); 1798 1799 /* 1800 * Bring the channel back up (note it does no harm to 1801 * do this even if the channel is already UP, Just 1802 * becomes effectively a no-op). 1803 */ 1804 (void) ldc_up(ldcp->ldc_handle); 1805 1806 /* 1807 * Check if channel is now UP. This will only happen if 1808 * peer has also done a ldc_up(). 1809 */ 1810 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 1811 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 1812 "channel %ld", vswp->instance, ldcp->ldc_id); 1813 mutex_exit(&ldcp->status_lock); 1814 return; 1815 } 1816 1817 ldcp->ldc_status = curr_status; 1818 1819 /* channel UP so restart handshake by sending version info */ 1820 if (curr_status == LDC_UP) { 1821 if (ldcp->hcnt++ > vsw_num_handshakes) { 1822 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 1823 " handshake attempts (%d) on channel %ld", 1824 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 1825 mutex_exit(&ldcp->status_lock); 1826 return; 1827 } 1828 1829 if (vsw_obp_ver_proto_workaround == B_FALSE && 1830 (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 1831 DDI_NOSLEEP) != DDI_SUCCESS)) { 1832 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 1833 vswp->instance); 1834 1835 /* 1836 * Don't count as valid restart attempt if couldn't 1837 * send version msg. 1838 */ 1839 if (ldcp->hcnt > 0) 1840 ldcp->hcnt--; 1841 } 1842 } 1843 1844 /* 1845 * Mark that the process is complete by clearing the flag. 1846 * 1847 * Note is it possible that the taskq dispatch above may have failed, 1848 * most likely due to memory shortage. We still clear the flag so 1849 * future attempts will at least be attempted and will hopefully 1850 * succeed. 1851 */ 1852 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 1853 ldcp->reset_active = 0; 1854 1855 mutex_exit(&ldcp->status_lock); 1856 1857 D1(vswp, "%s: exit", __func__); 1858 } 1859 1860 /* 1861 * returns 0 if legal for event signified by flag to have 1862 * occured at the time it did. Otherwise returns 1. 1863 */ 1864 int 1865 vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 1866 { 1867 vsw_t *vswp = ldcp->ldc_vswp; 1868 uint64_t state; 1869 uint64_t phase; 1870 1871 if (dir == INBOUND) 1872 state = ldcp->lane_in.lstate; 1873 else 1874 state = ldcp->lane_out.lstate; 1875 1876 phase = ldcp->hphase; 1877 1878 switch (flag) { 1879 case VSW_VER_INFO_RECV: 1880 if (phase > VSW_MILESTONE0) { 1881 DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 1882 " when in state %d\n", ldcp->ldc_id, phase); 1883 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1884 return (1); 1885 } 1886 break; 1887 1888 case VSW_VER_ACK_RECV: 1889 case VSW_VER_NACK_RECV: 1890 if (!(state & VSW_VER_INFO_SENT)) { 1891 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 1892 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 1893 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1894 return (1); 1895 } else 1896 state &= ~VSW_VER_INFO_SENT; 1897 break; 1898 1899 case VSW_ATTR_INFO_RECV: 1900 if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 1901 DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 1902 " when in state %d\n", ldcp->ldc_id, phase); 1903 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1904 return (1); 1905 } 1906 break; 1907 1908 case VSW_ATTR_ACK_RECV: 1909 case VSW_ATTR_NACK_RECV: 1910 if (!(state & VSW_ATTR_INFO_SENT)) { 1911 DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 1912 " or ATTR_NACK when in state %d\n", 1913 ldcp->ldc_id, phase); 1914 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1915 return (1); 1916 } else 1917 state &= ~VSW_ATTR_INFO_SENT; 1918 break; 1919 1920 case VSW_DRING_INFO_RECV: 1921 if (phase < VSW_MILESTONE1) { 1922 DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 1923 " when in state %d\n", ldcp->ldc_id, phase); 1924 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1925 return (1); 1926 } 1927 break; 1928 1929 case VSW_DRING_ACK_RECV: 1930 case VSW_DRING_NACK_RECV: 1931 if (!(state & VSW_DRING_INFO_SENT)) { 1932 DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 1933 " or DRING_NACK when in state %d\n", 1934 ldcp->ldc_id, phase); 1935 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1936 return (1); 1937 } else 1938 state &= ~VSW_DRING_INFO_SENT; 1939 break; 1940 1941 case VSW_RDX_INFO_RECV: 1942 if (phase < VSW_MILESTONE3) { 1943 DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 1944 " when in state %d\n", ldcp->ldc_id, phase); 1945 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1946 return (1); 1947 } 1948 break; 1949 1950 case VSW_RDX_ACK_RECV: 1951 case VSW_RDX_NACK_RECV: 1952 if (!(state & VSW_RDX_INFO_SENT)) { 1953 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 1954 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 1955 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1956 return (1); 1957 } else 1958 state &= ~VSW_RDX_INFO_SENT; 1959 break; 1960 1961 case VSW_MCST_INFO_RECV: 1962 if (phase < VSW_MILESTONE3) { 1963 DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 1964 " when in state %d\n", ldcp->ldc_id, phase); 1965 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 1966 return (1); 1967 } 1968 break; 1969 1970 default: 1971 DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 1972 ldcp->ldc_id, flag); 1973 return (1); 1974 } 1975 1976 if (dir == INBOUND) 1977 ldcp->lane_in.lstate = state; 1978 else 1979 ldcp->lane_out.lstate = state; 1980 1981 D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 1982 1983 return (0); 1984 } 1985 1986 void 1987 vsw_next_milestone(vsw_ldc_t *ldcp) 1988 { 1989 vsw_t *vswp = ldcp->ldc_vswp; 1990 vsw_port_t *portp = ldcp->ldc_port; 1991 1992 D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 1993 ldcp->ldc_id, ldcp->hphase); 1994 1995 DUMP_FLAGS(ldcp->lane_in.lstate); 1996 DUMP_FLAGS(ldcp->lane_out.lstate); 1997 1998 switch (ldcp->hphase) { 1999 2000 case VSW_MILESTONE0: 2001 /* 2002 * If we haven't started to handshake with our peer, 2003 * start to do so now. 2004 */ 2005 if (ldcp->lane_out.lstate == 0) { 2006 D2(vswp, "%s: (chan %lld) starting handshake " 2007 "with peer", __func__, ldcp->ldc_id); 2008 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 2009 } 2010 2011 /* 2012 * Only way to pass this milestone is to have successfully 2013 * negotiated version info. 2014 */ 2015 if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 2016 (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 2017 2018 D2(vswp, "%s: (chan %lld) leaving milestone 0", 2019 __func__, ldcp->ldc_id); 2020 2021 vsw_set_vnet_proto_ops(ldcp); 2022 2023 /* 2024 * Next milestone is passed when attribute 2025 * information has been successfully exchanged. 2026 */ 2027 ldcp->hphase = VSW_MILESTONE1; 2028 vsw_send_attr(ldcp); 2029 2030 } 2031 break; 2032 2033 case VSW_MILESTONE1: 2034 /* 2035 * Only way to pass this milestone is to have successfully 2036 * negotiated attribute information. 2037 */ 2038 if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 2039 2040 ldcp->hphase = VSW_MILESTONE2; 2041 2042 /* 2043 * If the peer device has said it wishes to 2044 * use descriptor rings then we send it our ring 2045 * info, otherwise we just set up a private ring 2046 * which we use an internal buffer 2047 */ 2048 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2049 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2050 (VSW_VER_LT(ldcp, 1, 2) && 2051 (ldcp->lane_in.xfer_mode == 2052 VIO_DRING_MODE_V1_0))) { 2053 vsw_send_dring_info(ldcp); 2054 } 2055 } 2056 break; 2057 2058 case VSW_MILESTONE2: 2059 /* 2060 * If peer has indicated in its attribute message that 2061 * it wishes to use descriptor rings then the only way 2062 * to pass this milestone is for us to have received 2063 * valid dring info. 2064 * 2065 * If peer is not using descriptor rings then just fall 2066 * through. 2067 */ 2068 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2069 (ldcp->lane_in.xfer_mode & VIO_DRING_MODE_V1_2)) || 2070 (VSW_VER_LT(ldcp, 1, 2) && 2071 (ldcp->lane_in.xfer_mode == 2072 VIO_DRING_MODE_V1_0))) { 2073 if (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)) 2074 break; 2075 } 2076 2077 D2(vswp, "%s: (chan %lld) leaving milestone 2", 2078 __func__, ldcp->ldc_id); 2079 2080 ldcp->hphase = VSW_MILESTONE3; 2081 vsw_send_rdx(ldcp); 2082 break; 2083 2084 case VSW_MILESTONE3: 2085 /* 2086 * Pass this milestone when all paramaters have been 2087 * successfully exchanged and RDX sent in both directions. 2088 * 2089 * Mark outbound lane as available to transmit data. 2090 */ 2091 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 2092 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 2093 2094 D2(vswp, "%s: (chan %lld) leaving milestone 3", 2095 __func__, ldcp->ldc_id); 2096 D2(vswp, "%s: ** handshake complete (0x%llx : " 2097 "0x%llx) **", __func__, ldcp->lane_in.lstate, 2098 ldcp->lane_out.lstate); 2099 ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 2100 ldcp->hphase = VSW_MILESTONE4; 2101 ldcp->hcnt = 0; 2102 DISPLAY_STATE(); 2103 /* Start HIO if enabled and capable */ 2104 if ((portp->p_hio_enabled) && (portp->p_hio_capable)) { 2105 D2(vswp, "%s: start HybridIO setup", __func__); 2106 vsw_hio_start(vswp, ldcp); 2107 } 2108 2109 if (ldcp->pls_negotiated == B_TRUE) { 2110 /* 2111 * The vnet device has negotiated to get phys 2112 * link updates. Now that the handshake with 2113 * the vnet device is complete, send an initial 2114 * update with the current physical link state. 2115 */ 2116 vsw_send_physlink_msg(ldcp, 2117 vswp->phys_link_state); 2118 } 2119 2120 } else { 2121 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 2122 __func__, ldcp->lane_in.lstate, 2123 ldcp->lane_out.lstate); 2124 } 2125 break; 2126 2127 case VSW_MILESTONE4: 2128 D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 2129 ldcp->ldc_id); 2130 break; 2131 2132 default: 2133 DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 2134 ldcp->ldc_id, ldcp->hphase); 2135 } 2136 2137 D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 2138 ldcp->hphase); 2139 } 2140 2141 /* 2142 * Check if major version is supported. 2143 * 2144 * Returns 0 if finds supported major number, and if necessary 2145 * adjusts the minor field. 2146 * 2147 * Returns 1 if can't match major number exactly. Sets mjor/minor 2148 * to next lowest support values, or to zero if no other values possible. 2149 */ 2150 static int 2151 vsw_supported_version(vio_ver_msg_t *vp) 2152 { 2153 int i; 2154 2155 D1(NULL, "vsw_supported_version: enter"); 2156 2157 for (i = 0; i < VSW_NUM_VER; i++) { 2158 if (vsw_versions[i].ver_major == vp->ver_major) { 2159 /* 2160 * Matching or lower major version found. Update 2161 * minor number if necessary. 2162 */ 2163 if (vp->ver_minor > vsw_versions[i].ver_minor) { 2164 D2(NULL, "%s: adjusting minor value from %d " 2165 "to %d", __func__, vp->ver_minor, 2166 vsw_versions[i].ver_minor); 2167 vp->ver_minor = vsw_versions[i].ver_minor; 2168 } 2169 2170 return (0); 2171 } 2172 2173 /* 2174 * If the message contains a higher major version number, set 2175 * the message's major/minor versions to the current values 2176 * and return false, so this message will get resent with 2177 * these values. 2178 */ 2179 if (vsw_versions[i].ver_major < vp->ver_major) { 2180 D2(NULL, "%s: adjusting major and minor " 2181 "values to %d, %d\n", 2182 __func__, vsw_versions[i].ver_major, 2183 vsw_versions[i].ver_minor); 2184 vp->ver_major = vsw_versions[i].ver_major; 2185 vp->ver_minor = vsw_versions[i].ver_minor; 2186 return (1); 2187 } 2188 } 2189 2190 /* No match was possible, zero out fields */ 2191 vp->ver_major = 0; 2192 vp->ver_minor = 0; 2193 2194 D1(NULL, "vsw_supported_version: exit"); 2195 2196 return (1); 2197 } 2198 2199 /* 2200 * Set vnet-protocol-version dependent functions based on version. 2201 */ 2202 static void 2203 vsw_set_vnet_proto_ops(vsw_ldc_t *ldcp) 2204 { 2205 vsw_t *vswp = ldcp->ldc_vswp; 2206 lane_t *lp = &ldcp->lane_out; 2207 2208 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2209 /* 2210 * If the version negotiated with peer is >= 1.4(Jumbo Frame 2211 * Support), set the mtu in our attributes to max_frame_size. 2212 */ 2213 lp->mtu = vswp->max_frame_size; 2214 } else if (VSW_VER_EQ(ldcp, 1, 3)) { 2215 /* 2216 * If the version negotiated with peer is == 1.3 (Vlan Tag 2217 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 2218 */ 2219 lp->mtu = ETHERMAX + VLAN_TAGSZ; 2220 } else { 2221 vsw_port_t *portp = ldcp->ldc_port; 2222 /* 2223 * Pre-1.3 peers expect max frame size of ETHERMAX. 2224 * We can negotiate that size with those peers provided only 2225 * pvid is defined for our peer and there are no vids. Then we 2226 * can send/recv only untagged frames of max size ETHERMAX. 2227 * Note that pvid of the peer can be different, as vsw has to 2228 * serve the vnet in that vlan even if itself is not assigned 2229 * to that vlan. 2230 */ 2231 if (portp->nvids == 0) { 2232 lp->mtu = ETHERMAX; 2233 } 2234 } 2235 2236 if (VSW_VER_GTEQ(ldcp, 1, 2)) { 2237 /* Versions >= 1.2 */ 2238 2239 if (VSW_PRI_ETH_DEFINED(vswp)) { 2240 /* 2241 * enable priority routines and pkt mode only if 2242 * at least one pri-eth-type is specified in MD. 2243 */ 2244 ldcp->tx = vsw_ldctx_pri; 2245 ldcp->rx_pktdata = vsw_process_pkt_data; 2246 2247 /* set xfer mode for vsw_send_attr() */ 2248 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 2249 } else { 2250 /* no priority eth types defined in MD */ 2251 2252 ldcp->tx = vsw_ldctx; 2253 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2254 2255 /* set xfer mode for vsw_send_attr() */ 2256 lp->xfer_mode = VIO_DRING_MODE_V1_2; 2257 } 2258 2259 } else { 2260 /* Versions prior to 1.2 */ 2261 2262 vsw_reset_vnet_proto_ops(ldcp); 2263 } 2264 } 2265 2266 /* 2267 * Reset vnet-protocol-version dependent functions to v1.0. 2268 */ 2269 static void 2270 vsw_reset_vnet_proto_ops(vsw_ldc_t *ldcp) 2271 { 2272 lane_t *lp = &ldcp->lane_out; 2273 2274 ldcp->tx = vsw_ldctx; 2275 ldcp->rx_pktdata = vsw_process_pkt_data_nop; 2276 2277 /* set xfer mode for vsw_send_attr() */ 2278 lp->xfer_mode = VIO_DRING_MODE_V1_0; 2279 } 2280 2281 /* 2282 * Main routine for processing messages received over LDC. 2283 */ 2284 static void 2285 vsw_process_pkt(void *arg) 2286 { 2287 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 2288 vsw_t *vswp = ldcp->ldc_vswp; 2289 size_t msglen; 2290 vio_msg_tag_t *tagp; 2291 uint64_t *ldcmsg; 2292 int rv = 0; 2293 2294 2295 D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2296 2297 ASSERT(MUTEX_HELD(&ldcp->ldc_cblock)); 2298 2299 ldcmsg = ldcp->ldcmsg; 2300 /* 2301 * If channel is up read messages until channel is empty. 2302 */ 2303 do { 2304 msglen = ldcp->msglen; 2305 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 2306 2307 if (rv != 0) { 2308 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 2309 __func__, ldcp->ldc_id, rv, msglen); 2310 } 2311 2312 /* channel has been reset */ 2313 if (rv == ECONNRESET) { 2314 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 2315 break; 2316 } 2317 2318 if (msglen == 0) { 2319 D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 2320 ldcp->ldc_id); 2321 break; 2322 } 2323 2324 D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 2325 ldcp->ldc_id, msglen); 2326 2327 /* 2328 * Figure out what sort of packet we have gotten by 2329 * examining the msg tag, and then switch it appropriately. 2330 */ 2331 tagp = (vio_msg_tag_t *)ldcmsg; 2332 2333 switch (tagp->vio_msgtype) { 2334 case VIO_TYPE_CTRL: 2335 vsw_dispatch_ctrl_task(ldcp, ldcmsg, tagp); 2336 break; 2337 case VIO_TYPE_DATA: 2338 vsw_process_data_pkt(ldcp, ldcmsg, tagp, msglen); 2339 break; 2340 case VIO_TYPE_ERR: 2341 vsw_process_err_pkt(ldcp, ldcmsg, tagp); 2342 break; 2343 default: 2344 DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 2345 "id(%lx)\n", tagp->vio_msgtype, ldcp->ldc_id); 2346 break; 2347 } 2348 } while (msglen); 2349 2350 D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 2351 } 2352 2353 /* 2354 * Dispatch a task to process a VIO control message. 2355 */ 2356 static void 2357 vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t *tagp) 2358 { 2359 vsw_ctrl_task_t *ctaskp = NULL; 2360 vsw_port_t *port = ldcp->ldc_port; 2361 vsw_t *vswp = port->p_vswp; 2362 2363 D1(vswp, "%s: enter", __func__); 2364 2365 /* 2366 * We need to handle RDX ACK messages in-band as once they 2367 * are exchanged it is possible that we will get an 2368 * immediate (legitimate) data packet. 2369 */ 2370 if ((tagp->vio_subtype_env == VIO_RDX) && 2371 (tagp->vio_subtype == VIO_SUBTYPE_ACK)) { 2372 2373 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 2374 return; 2375 2376 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 2377 D2(vswp, "%s (%ld) handling RDX_ACK in place " 2378 "(ostate 0x%llx : hphase %d)", __func__, 2379 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 2380 vsw_next_milestone(ldcp); 2381 return; 2382 } 2383 2384 ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 2385 2386 if (ctaskp == NULL) { 2387 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 2388 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2389 return; 2390 } 2391 2392 ctaskp->ldcp = ldcp; 2393 bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 2394 ctaskp->hss_id = ldcp->hss_id; 2395 2396 /* 2397 * Dispatch task to processing taskq if port is not in 2398 * the process of being detached. 2399 */ 2400 mutex_enter(&port->state_lock); 2401 if (port->state == VSW_PORT_INIT) { 2402 if ((vswp->taskq_p == NULL) || 2403 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 2404 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 2405 mutex_exit(&port->state_lock); 2406 DERR(vswp, "%s: unable to dispatch task to taskq", 2407 __func__); 2408 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2409 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2410 return; 2411 } 2412 } else { 2413 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2414 DWARN(vswp, "%s: port %d detaching, not dispatching " 2415 "task", __func__, port->p_instance); 2416 } 2417 2418 mutex_exit(&port->state_lock); 2419 2420 D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 2421 ldcp->ldc_id); 2422 D1(vswp, "%s: exit", __func__); 2423 } 2424 2425 /* 2426 * Process a VIO ctrl message. Invoked from taskq. 2427 */ 2428 static void 2429 vsw_process_ctrl_pkt(void *arg) 2430 { 2431 vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 2432 vsw_ldc_t *ldcp = ctaskp->ldcp; 2433 vsw_t *vswp = ldcp->ldc_vswp; 2434 vio_msg_tag_t tag; 2435 uint16_t env; 2436 2437 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2438 2439 bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 2440 env = tag.vio_subtype_env; 2441 2442 /* stale pkt check */ 2443 if (ctaskp->hss_id < ldcp->hss_id) { 2444 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 2445 " (%ld) handshake session", __func__, ctaskp->hss_id); 2446 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2447 return; 2448 } 2449 2450 /* session id check */ 2451 if (ldcp->session_status & VSW_PEER_SESSION) { 2452 if (ldcp->peer_session != tag.vio_sid) { 2453 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 2454 __func__, ldcp->ldc_id, tag.vio_sid); 2455 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2456 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 2457 return; 2458 } 2459 } 2460 2461 /* 2462 * Switch on vio_subtype envelope, then let lower routines 2463 * decide if its an INFO, ACK or NACK packet. 2464 */ 2465 switch (env) { 2466 case VIO_VER_INFO: 2467 vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 2468 break; 2469 case VIO_DRING_REG: 2470 vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 2471 break; 2472 case VIO_DRING_UNREG: 2473 vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 2474 break; 2475 case VIO_ATTR_INFO: 2476 vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 2477 break; 2478 case VNET_MCAST_INFO: 2479 vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 2480 break; 2481 case VIO_RDX: 2482 vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 2483 break; 2484 case VIO_DDS_INFO: 2485 vsw_process_dds_msg(vswp, ldcp, &ctaskp->pktp); 2486 break; 2487 2488 case VNET_PHYSLINK_INFO: 2489 vsw_process_physlink_msg(ldcp, &ctaskp->pktp); 2490 break; 2491 default: 2492 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 2493 } 2494 2495 kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 2496 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 2497 } 2498 2499 /* 2500 * Version negotiation. We can end up here either because our peer 2501 * has responded to a handshake message we have sent it, or our peer 2502 * has initiated a handshake with us. If its the former then can only 2503 * be ACK or NACK, if its the later can only be INFO. 2504 * 2505 * If its an ACK we move to the next stage of the handshake, namely 2506 * attribute exchange. If its a NACK we see if we can specify another 2507 * version, if we can't we stop. 2508 * 2509 * If it is an INFO we reset all params associated with communication 2510 * in that direction over this channel (remember connection is 2511 * essentially 2 independent simplex channels). 2512 */ 2513 void 2514 vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 2515 { 2516 vio_ver_msg_t *ver_pkt; 2517 vsw_t *vswp = ldcp->ldc_vswp; 2518 2519 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 2520 2521 /* 2522 * We know this is a ctrl/version packet so 2523 * cast it into the correct structure. 2524 */ 2525 ver_pkt = (vio_ver_msg_t *)pkt; 2526 2527 switch (ver_pkt->tag.vio_subtype) { 2528 case VIO_SUBTYPE_INFO: 2529 D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 2530 2531 /* 2532 * Record the session id, which we will use from now 2533 * until we see another VER_INFO msg. Even then the 2534 * session id in most cases will be unchanged, execpt 2535 * if channel was reset. 2536 */ 2537 if ((ldcp->session_status & VSW_PEER_SESSION) && 2538 (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 2539 DERR(vswp, "%s: updating session id for chan %lld " 2540 "from %llx to %llx", __func__, ldcp->ldc_id, 2541 ldcp->peer_session, ver_pkt->tag.vio_sid); 2542 } 2543 2544 ldcp->peer_session = ver_pkt->tag.vio_sid; 2545 ldcp->session_status |= VSW_PEER_SESSION; 2546 2547 /* Legal message at this time ? */ 2548 if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 2549 return; 2550 2551 /* 2552 * First check the device class. Currently only expect 2553 * to be talking to a network device. In the future may 2554 * also talk to another switch. 2555 */ 2556 if (ver_pkt->dev_class != VDEV_NETWORK) { 2557 DERR(vswp, "%s: illegal device class %d", __func__, 2558 ver_pkt->dev_class); 2559 2560 ver_pkt->tag.vio_sid = ldcp->local_session; 2561 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2562 2563 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2564 2565 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2566 sizeof (vio_ver_msg_t), B_TRUE); 2567 2568 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2569 vsw_next_milestone(ldcp); 2570 return; 2571 } else { 2572 ldcp->dev_class = ver_pkt->dev_class; 2573 } 2574 2575 /* 2576 * Now check the version. 2577 */ 2578 if (vsw_supported_version(ver_pkt) == 0) { 2579 /* 2580 * Support this major version and possibly 2581 * adjusted minor version. 2582 */ 2583 2584 D2(vswp, "%s: accepted ver %d:%d", __func__, 2585 ver_pkt->ver_major, ver_pkt->ver_minor); 2586 2587 /* Store accepted values */ 2588 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2589 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2590 2591 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2592 2593 ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 2594 2595 if (vsw_obp_ver_proto_workaround == B_TRUE) { 2596 /* 2597 * Send a version info message 2598 * using the accepted version that 2599 * we are about to ack. Also note that 2600 * we send our ver info before we ack. 2601 * Otherwise, as soon as receiving the 2602 * ack, obp sends attr info msg, which 2603 * breaks vsw_check_flag() invoked 2604 * from vsw_process_ctrl_attr_pkt(); 2605 * as we also need VSW_VER_ACK_RECV to 2606 * be set in lane_out.lstate, before 2607 * we can receive attr info. 2608 */ 2609 vsw_send_ver(ldcp); 2610 } 2611 } else { 2612 /* 2613 * NACK back with the next lower major/minor 2614 * pairing we support (if don't suuport any more 2615 * versions then they will be set to zero. 2616 */ 2617 2618 D2(vswp, "%s: replying with ver %d:%d", __func__, 2619 ver_pkt->ver_major, ver_pkt->ver_minor); 2620 2621 /* Store updated values */ 2622 ldcp->lane_in.ver_major = ver_pkt->ver_major; 2623 ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 2624 2625 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2626 2627 ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 2628 } 2629 2630 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2631 ver_pkt->tag.vio_sid = ldcp->local_session; 2632 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2633 sizeof (vio_ver_msg_t), B_TRUE); 2634 2635 vsw_next_milestone(ldcp); 2636 break; 2637 2638 case VIO_SUBTYPE_ACK: 2639 D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 2640 2641 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 2642 return; 2643 2644 /* Store updated values */ 2645 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2646 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2647 2648 ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 2649 vsw_next_milestone(ldcp); 2650 2651 break; 2652 2653 case VIO_SUBTYPE_NACK: 2654 D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 2655 2656 if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 2657 return; 2658 2659 /* 2660 * If our peer sent us a NACK with the ver fields set to 2661 * zero then there is nothing more we can do. Otherwise see 2662 * if we support either the version suggested, or a lesser 2663 * one. 2664 */ 2665 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2666 DERR(vswp, "%s: peer unable to negotiate any " 2667 "further.", __func__); 2668 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2669 vsw_next_milestone(ldcp); 2670 return; 2671 } 2672 2673 /* 2674 * Check to see if we support this major version or 2675 * a lower one. If we don't then maj/min will be set 2676 * to zero. 2677 */ 2678 (void) vsw_supported_version(ver_pkt); 2679 if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 2680 /* Nothing more we can do */ 2681 DERR(vswp, "%s: version negotiation failed.\n", 2682 __func__); 2683 ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 2684 vsw_next_milestone(ldcp); 2685 } else { 2686 /* found a supported major version */ 2687 ldcp->lane_out.ver_major = ver_pkt->ver_major; 2688 ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 2689 2690 D2(vswp, "%s: resending with updated values (%x, %x)", 2691 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 2692 2693 ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 2694 ver_pkt->tag.vio_sid = ldcp->local_session; 2695 ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 2696 2697 DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 2698 2699 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 2700 sizeof (vio_ver_msg_t), B_TRUE); 2701 2702 vsw_next_milestone(ldcp); 2703 2704 } 2705 break; 2706 2707 default: 2708 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2709 ver_pkt->tag.vio_subtype); 2710 } 2711 2712 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 2713 } 2714 2715 /* 2716 * Process an attribute packet. We can end up here either because our peer 2717 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 2718 * peer has sent us an attribute INFO message 2719 * 2720 * If its an ACK we then move to the next stage of the handshake which 2721 * is to send our descriptor ring info to our peer. If its a NACK then 2722 * there is nothing more we can (currently) do. 2723 * 2724 * If we get a valid/acceptable INFO packet (and we have already negotiated 2725 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 2726 * NACK back and reset channel state to INACTIV. 2727 * 2728 * FUTURE: in time we will probably negotiate over attributes, but for 2729 * the moment unacceptable attributes are regarded as a fatal error. 2730 * 2731 */ 2732 void 2733 vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 2734 { 2735 vnet_attr_msg_t *attr_pkt; 2736 vsw_t *vswp = ldcp->ldc_vswp; 2737 vsw_port_t *port = ldcp->ldc_port; 2738 uint64_t macaddr = 0; 2739 lane_t *lane_out = &ldcp->lane_out; 2740 lane_t *lane_in = &ldcp->lane_in; 2741 uint32_t mtu; 2742 boolean_t ack = B_TRUE; 2743 int i; 2744 2745 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 2746 2747 /* 2748 * We know this is a ctrl/attr packet so 2749 * cast it into the correct structure. 2750 */ 2751 attr_pkt = (vnet_attr_msg_t *)pkt; 2752 2753 switch (attr_pkt->tag.vio_subtype) { 2754 case VIO_SUBTYPE_INFO: 2755 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 2756 2757 if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 2758 return; 2759 2760 /* 2761 * If the attributes are unacceptable then we NACK back. 2762 */ 2763 if (vsw_check_attr(attr_pkt, ldcp)) { 2764 ack = B_FALSE; 2765 2766 DERR(vswp, "%s (chan %d): invalid attributes", 2767 __func__, ldcp->ldc_id); 2768 2769 } else { 2770 2771 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2772 /* 2773 * Versions >= 1.4: 2774 * The mtu is negotiated down to the 2775 * minimum of our mtu and peer's mtu. 2776 */ 2777 mtu = MIN(attr_pkt->mtu, vswp->max_frame_size); 2778 2779 /* 2780 * If we have received an ack for the attr info 2781 * that we sent, then check if the mtu computed 2782 * above matches the mtu that the peer had ack'd 2783 * (saved in local hparams). If they don't 2784 * match, we fail the handshake. 2785 */ 2786 if (lane_out->lstate & VSW_ATTR_ACK_RECV) { 2787 if (mtu != lane_out->mtu) { 2788 /* send NACK */ 2789 ack = B_FALSE; 2790 } 2791 } else { 2792 /* 2793 * Save the mtu computed above in our 2794 * attr parameters, so it gets sent in 2795 * the attr info from us to the peer. 2796 */ 2797 lane_out->mtu = mtu; 2798 } 2799 } 2800 2801 } 2802 2803 if (ack == B_FALSE) { 2804 2805 vsw_free_lane_resources(ldcp, INBOUND); 2806 2807 attr_pkt->tag.vio_sid = ldcp->local_session; 2808 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 2809 2810 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2811 ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 2812 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2813 sizeof (vnet_attr_msg_t), B_TRUE); 2814 2815 vsw_next_milestone(ldcp); 2816 return; 2817 } 2818 2819 /* 2820 * Otherwise store attributes for this lane and update 2821 * lane state. 2822 */ 2823 lane_in->mtu = attr_pkt->mtu; 2824 lane_in->addr = attr_pkt->addr; 2825 lane_in->addr_type = attr_pkt->addr_type; 2826 lane_in->xfer_mode = attr_pkt->xfer_mode; 2827 lane_in->ack_freq = attr_pkt->ack_freq; 2828 lane_in->physlink_update = attr_pkt->physlink_update; 2829 2830 /* 2831 * Check if the client has requested physlink state updates. 2832 * If there is a physical device bound to this vswitch (L2 2833 * mode), set the ack bits to indicate it is supported. 2834 * Otherwise, set the nack bits. 2835 */ 2836 if (VSW_VER_GTEQ(ldcp, 1, 5)) { /* Protocol ver >= 1.5 */ 2837 2838 /* Does the vnet need phys link state updates ? */ 2839 if ((lane_in->physlink_update & 2840 PHYSLINK_UPDATE_STATE_MASK) == 2841 PHYSLINK_UPDATE_STATE) { 2842 2843 if (vswp->smode & VSW_LAYER2) { 2844 /* is a net-dev assigned to us ? */ 2845 attr_pkt->physlink_update = 2846 PHYSLINK_UPDATE_STATE_ACK; 2847 ldcp->pls_negotiated = B_TRUE; 2848 } else { 2849 /* not in L2 mode */ 2850 attr_pkt->physlink_update = 2851 PHYSLINK_UPDATE_STATE_NACK; 2852 ldcp->pls_negotiated = B_FALSE; 2853 } 2854 2855 } else { 2856 attr_pkt->physlink_update = 2857 PHYSLINK_UPDATE_NONE; 2858 ldcp->pls_negotiated = B_FALSE; 2859 } 2860 2861 } else { 2862 /* 2863 * physlink_update bits are ignored 2864 * if set by clients < v1.5 protocol. 2865 */ 2866 attr_pkt->physlink_update = PHYSLINK_UPDATE_NONE; 2867 ldcp->pls_negotiated = B_FALSE; 2868 } 2869 2870 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2871 /* save the MIN mtu in the msg to be replied */ 2872 attr_pkt->mtu = mtu; 2873 } 2874 2875 macaddr = lane_in->addr; 2876 for (i = ETHERADDRL - 1; i >= 0; i--) { 2877 port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 2878 macaddr >>= 8; 2879 } 2880 2881 /* create the fdb entry for this port/mac address */ 2882 vsw_fdbe_add(vswp, port); 2883 2884 /* add the port to the specified vlans */ 2885 vsw_vlan_add_ids(port, VSW_VNETPORT); 2886 2887 /* setup device specifc xmit routines */ 2888 mutex_enter(&port->tx_lock); 2889 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 2890 (lane_in->xfer_mode & VIO_DRING_MODE_V1_2)) || 2891 (VSW_VER_LT(ldcp, 1, 2) && 2892 (lane_in->xfer_mode == VIO_DRING_MODE_V1_0))) { 2893 D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 2894 port->transmit = vsw_dringsend; 2895 } else if (lane_in->xfer_mode == VIO_DESC_MODE) { 2896 D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 2897 vsw_create_privring(ldcp); 2898 port->transmit = vsw_descrsend; 2899 lane_out->xfer_mode = VIO_DESC_MODE; 2900 } 2901 2902 /* 2903 * HybridIO is supported only vnet, not by OBP. 2904 * So, set hio_capable to true only when in DRING mode. 2905 */ 2906 if (VSW_VER_GTEQ(ldcp, 1, 3) && 2907 (lane_in->xfer_mode != VIO_DESC_MODE)) { 2908 (void) atomic_swap_32(&port->p_hio_capable, B_TRUE); 2909 } else { 2910 (void) atomic_swap_32(&port->p_hio_capable, B_FALSE); 2911 } 2912 2913 mutex_exit(&port->tx_lock); 2914 2915 attr_pkt->tag.vio_sid = ldcp->local_session; 2916 attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 2917 2918 DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 2919 2920 lane_in->lstate |= VSW_ATTR_ACK_SENT; 2921 2922 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 2923 sizeof (vnet_attr_msg_t), B_TRUE); 2924 2925 vsw_next_milestone(ldcp); 2926 break; 2927 2928 case VIO_SUBTYPE_ACK: 2929 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 2930 2931 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 2932 return; 2933 2934 if (VSW_VER_GTEQ(ldcp, 1, 4)) { 2935 /* 2936 * Versions >= 1.4: 2937 * The ack msg sent by the peer contains the minimum of 2938 * our mtu (that we had sent in our attr info) and the 2939 * peer's mtu. 2940 * 2941 * If we have sent an ack for the attr info msg from 2942 * the peer, check if the mtu that was computed then 2943 * (saved in lane_out params) matches the mtu that the 2944 * peer has ack'd. If they don't match, we fail the 2945 * handshake. 2946 */ 2947 if (lane_in->lstate & VSW_ATTR_ACK_SENT) { 2948 if (lane_out->mtu != attr_pkt->mtu) { 2949 return; 2950 } 2951 } else { 2952 /* 2953 * If the mtu ack'd by the peer is > our mtu 2954 * fail handshake. Otherwise, save the mtu, so 2955 * we can validate it when we receive attr info 2956 * from our peer. 2957 */ 2958 if (attr_pkt->mtu > lane_out->mtu) { 2959 return; 2960 } 2961 if (attr_pkt->mtu <= lane_out->mtu) { 2962 lane_out->mtu = attr_pkt->mtu; 2963 } 2964 } 2965 } 2966 2967 lane_out->lstate |= VSW_ATTR_ACK_RECV; 2968 vsw_next_milestone(ldcp); 2969 break; 2970 2971 case VIO_SUBTYPE_NACK: 2972 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 2973 2974 if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 2975 return; 2976 2977 lane_out->lstate |= VSW_ATTR_NACK_RECV; 2978 vsw_next_milestone(ldcp); 2979 break; 2980 2981 default: 2982 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 2983 attr_pkt->tag.vio_subtype); 2984 } 2985 2986 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 2987 } 2988 2989 /* 2990 * Process a dring info packet. We can end up here either because our peer 2991 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 2992 * peer has sent us a dring INFO message. 2993 * 2994 * If we get a valid/acceptable INFO packet (and we have already negotiated 2995 * a version) we ACK back and update the lane state, otherwise we NACK back. 2996 * 2997 * FUTURE: nothing to stop client from sending us info on multiple dring's 2998 * but for the moment we will just use the first one we are given. 2999 * 3000 */ 3001 void 3002 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 3003 { 3004 vio_dring_reg_msg_t *dring_pkt; 3005 vsw_t *vswp = ldcp->ldc_vswp; 3006 ldc_mem_info_t minfo; 3007 dring_info_t *dp, *dbp; 3008 int dring_found = 0; 3009 3010 /* 3011 * We know this is a ctrl/dring packet so 3012 * cast it into the correct structure. 3013 */ 3014 dring_pkt = (vio_dring_reg_msg_t *)pkt; 3015 3016 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3017 3018 switch (dring_pkt->tag.vio_subtype) { 3019 case VIO_SUBTYPE_INFO: 3020 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3021 3022 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 3023 return; 3024 3025 /* 3026 * If the dring params are unacceptable then we NACK back. 3027 */ 3028 if (vsw_check_dring_info(dring_pkt)) { 3029 3030 DERR(vswp, "%s (%lld): invalid dring info", 3031 __func__, ldcp->ldc_id); 3032 3033 vsw_free_lane_resources(ldcp, INBOUND); 3034 3035 dring_pkt->tag.vio_sid = ldcp->local_session; 3036 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3037 3038 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3039 3040 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3041 3042 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3043 sizeof (vio_dring_reg_msg_t), B_TRUE); 3044 3045 vsw_next_milestone(ldcp); 3046 return; 3047 } 3048 3049 /* 3050 * Otherwise, attempt to map in the dring using the 3051 * cookie. If that succeeds we send back a unique dring 3052 * identifier that the sending side will use in future 3053 * to refer to this descriptor ring. 3054 */ 3055 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 3056 3057 dp->num_descriptors = dring_pkt->num_descriptors; 3058 dp->descriptor_size = dring_pkt->descriptor_size; 3059 dp->options = dring_pkt->options; 3060 dp->ncookies = dring_pkt->ncookies; 3061 3062 /* 3063 * Note: should only get one cookie. Enforced in 3064 * the ldc layer. 3065 */ 3066 bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 3067 sizeof (ldc_mem_cookie_t)); 3068 3069 D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 3070 dp->num_descriptors, dp->descriptor_size); 3071 D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 3072 dp->options, dp->ncookies); 3073 3074 if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 3075 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 3076 LDC_DIRECT_MAP, &(dp->handle))) != 0) { 3077 3078 DERR(vswp, "%s: dring_map failed\n", __func__); 3079 3080 kmem_free(dp, sizeof (dring_info_t)); 3081 vsw_free_lane_resources(ldcp, INBOUND); 3082 3083 dring_pkt->tag.vio_sid = ldcp->local_session; 3084 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3085 3086 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3087 3088 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3089 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3090 sizeof (vio_dring_reg_msg_t), B_TRUE); 3091 3092 vsw_next_milestone(ldcp); 3093 return; 3094 } 3095 3096 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 3097 3098 DERR(vswp, "%s: dring_addr failed\n", __func__); 3099 3100 kmem_free(dp, sizeof (dring_info_t)); 3101 vsw_free_lane_resources(ldcp, INBOUND); 3102 3103 dring_pkt->tag.vio_sid = ldcp->local_session; 3104 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 3105 3106 DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 3107 3108 ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 3109 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3110 sizeof (vio_dring_reg_msg_t), B_TRUE); 3111 3112 vsw_next_milestone(ldcp); 3113 return; 3114 } else { 3115 /* store the address of the pub part of ring */ 3116 dp->pub_addr = minfo.vaddr; 3117 3118 /* cache the dring mtype */ 3119 dp->dring_mtype = minfo.mtype; 3120 } 3121 3122 /* no private section as we are importing */ 3123 dp->priv_addr = NULL; 3124 3125 /* 3126 * Using simple mono increasing int for ident at 3127 * the moment. 3128 */ 3129 dp->ident = ldcp->next_ident; 3130 ldcp->next_ident++; 3131 3132 dp->end_idx = 0; 3133 dp->next = NULL; 3134 3135 /* 3136 * Link it onto the end of the list of drings 3137 * for this lane. 3138 */ 3139 if (ldcp->lane_in.dringp == NULL) { 3140 D2(vswp, "%s: adding first INBOUND dring", __func__); 3141 ldcp->lane_in.dringp = dp; 3142 } else { 3143 dbp = ldcp->lane_in.dringp; 3144 3145 while (dbp->next != NULL) 3146 dbp = dbp->next; 3147 3148 dbp->next = dp; 3149 } 3150 3151 /* acknowledge it */ 3152 dring_pkt->tag.vio_sid = ldcp->local_session; 3153 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3154 dring_pkt->dring_ident = dp->ident; 3155 3156 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3157 sizeof (vio_dring_reg_msg_t), B_TRUE); 3158 3159 ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 3160 vsw_next_milestone(ldcp); 3161 break; 3162 3163 case VIO_SUBTYPE_ACK: 3164 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3165 3166 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 3167 return; 3168 3169 /* 3170 * Peer is acknowledging our dring info and will have 3171 * sent us a dring identifier which we will use to 3172 * refer to this ring w.r.t. our peer. 3173 */ 3174 dp = ldcp->lane_out.dringp; 3175 if (dp != NULL) { 3176 /* 3177 * Find the ring this ident should be associated 3178 * with. 3179 */ 3180 if (vsw_dring_match(dp, dring_pkt)) { 3181 dring_found = 1; 3182 3183 } else while (dp != NULL) { 3184 if (vsw_dring_match(dp, dring_pkt)) { 3185 dring_found = 1; 3186 break; 3187 } 3188 dp = dp->next; 3189 } 3190 3191 if (dring_found == 0) { 3192 DERR(NULL, "%s: unrecognised ring cookie", 3193 __func__); 3194 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3195 return; 3196 } 3197 3198 } else { 3199 DERR(vswp, "%s: DRING ACK received but no drings " 3200 "allocated", __func__); 3201 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3202 return; 3203 } 3204 3205 /* store ident */ 3206 dp->ident = dring_pkt->dring_ident; 3207 ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 3208 vsw_next_milestone(ldcp); 3209 break; 3210 3211 case VIO_SUBTYPE_NACK: 3212 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3213 3214 if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 3215 return; 3216 3217 ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 3218 vsw_next_milestone(ldcp); 3219 break; 3220 3221 default: 3222 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3223 dring_pkt->tag.vio_subtype); 3224 } 3225 3226 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 3227 } 3228 3229 /* 3230 * Process a request from peer to unregister a dring. 3231 * 3232 * For the moment we just restart the handshake if our 3233 * peer endpoint attempts to unregister a dring. 3234 */ 3235 void 3236 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 3237 { 3238 vsw_t *vswp = ldcp->ldc_vswp; 3239 vio_dring_unreg_msg_t *dring_pkt; 3240 3241 /* 3242 * We know this is a ctrl/dring packet so 3243 * cast it into the correct structure. 3244 */ 3245 dring_pkt = (vio_dring_unreg_msg_t *)pkt; 3246 3247 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3248 3249 switch (dring_pkt->tag.vio_subtype) { 3250 case VIO_SUBTYPE_INFO: 3251 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3252 3253 DWARN(vswp, "%s: restarting handshake..", __func__); 3254 break; 3255 3256 case VIO_SUBTYPE_ACK: 3257 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3258 3259 DWARN(vswp, "%s: restarting handshake..", __func__); 3260 break; 3261 3262 case VIO_SUBTYPE_NACK: 3263 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3264 3265 DWARN(vswp, "%s: restarting handshake..", __func__); 3266 break; 3267 3268 default: 3269 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3270 dring_pkt->tag.vio_subtype); 3271 } 3272 3273 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3274 3275 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3276 } 3277 3278 #define SND_MCST_NACK(ldcp, pkt) \ 3279 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3280 pkt->tag.vio_sid = ldcp->local_session; \ 3281 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3282 sizeof (vnet_mcast_msg_t), B_TRUE); 3283 3284 /* 3285 * Process a multicast request from a vnet. 3286 * 3287 * Vnet's specify a multicast address that they are interested in. This 3288 * address is used as a key into the hash table which forms the multicast 3289 * forwarding database (mFDB). 3290 * 3291 * The table keys are the multicast addresses, while the table entries 3292 * are pointers to lists of ports which wish to receive packets for the 3293 * specified multicast address. 3294 * 3295 * When a multicast packet is being switched we use the address as a key 3296 * into the hash table, and then walk the appropriate port list forwarding 3297 * the pkt to each port in turn. 3298 * 3299 * If a vnet is no longer interested in a particular multicast grouping 3300 * we simply find the correct location in the hash table and then delete 3301 * the relevant port from the port list. 3302 * 3303 * To deal with the case whereby a port is being deleted without first 3304 * removing itself from the lists in the hash table, we maintain a list 3305 * of multicast addresses the port has registered an interest in, within 3306 * the port structure itself. We then simply walk that list of addresses 3307 * using them as keys into the hash table and remove the port from the 3308 * appropriate lists. 3309 */ 3310 static void 3311 vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 3312 { 3313 vnet_mcast_msg_t *mcst_pkt; 3314 vsw_port_t *port = ldcp->ldc_port; 3315 vsw_t *vswp = ldcp->ldc_vswp; 3316 int i; 3317 3318 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3319 3320 /* 3321 * We know this is a ctrl/mcast packet so 3322 * cast it into the correct structure. 3323 */ 3324 mcst_pkt = (vnet_mcast_msg_t *)pkt; 3325 3326 switch (mcst_pkt->tag.vio_subtype) { 3327 case VIO_SUBTYPE_INFO: 3328 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3329 3330 /* 3331 * Check if in correct state to receive a multicast 3332 * message (i.e. handshake complete). If not reset 3333 * the handshake. 3334 */ 3335 if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 3336 return; 3337 3338 /* 3339 * Before attempting to add or remove address check 3340 * that they are valid multicast addresses. 3341 * If not, then NACK back. 3342 */ 3343 for (i = 0; i < mcst_pkt->count; i++) { 3344 if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 3345 DERR(vswp, "%s: invalid multicast address", 3346 __func__); 3347 SND_MCST_NACK(ldcp, mcst_pkt); 3348 return; 3349 } 3350 } 3351 3352 /* 3353 * Now add/remove the addresses. If this fails we 3354 * NACK back. 3355 */ 3356 if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 3357 SND_MCST_NACK(ldcp, mcst_pkt); 3358 return; 3359 } 3360 3361 mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3362 mcst_pkt->tag.vio_sid = ldcp->local_session; 3363 3364 DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 3365 3366 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 3367 sizeof (vnet_mcast_msg_t), B_TRUE); 3368 break; 3369 3370 case VIO_SUBTYPE_ACK: 3371 DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3372 3373 /* 3374 * We shouldn't ever get a multicast ACK message as 3375 * at the moment we never request multicast addresses 3376 * to be set on some other device. This may change in 3377 * the future if we have cascading switches. 3378 */ 3379 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 3380 return; 3381 3382 /* Do nothing */ 3383 break; 3384 3385 case VIO_SUBTYPE_NACK: 3386 DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3387 3388 /* 3389 * We shouldn't get a multicast NACK packet for the 3390 * same reasons as we shouldn't get a ACK packet. 3391 */ 3392 if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 3393 return; 3394 3395 /* Do nothing */ 3396 break; 3397 3398 default: 3399 DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 3400 mcst_pkt->tag.vio_subtype); 3401 } 3402 3403 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3404 } 3405 3406 static void 3407 vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 3408 { 3409 vio_rdx_msg_t *rdx_pkt; 3410 vsw_t *vswp = ldcp->ldc_vswp; 3411 3412 /* 3413 * We know this is a ctrl/rdx packet so 3414 * cast it into the correct structure. 3415 */ 3416 rdx_pkt = (vio_rdx_msg_t *)pkt; 3417 3418 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3419 3420 switch (rdx_pkt->tag.vio_subtype) { 3421 case VIO_SUBTYPE_INFO: 3422 D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 3423 3424 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 3425 return; 3426 3427 rdx_pkt->tag.vio_sid = ldcp->local_session; 3428 rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3429 3430 DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 3431 3432 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 3433 3434 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 3435 sizeof (vio_rdx_msg_t), B_TRUE); 3436 3437 vsw_next_milestone(ldcp); 3438 break; 3439 3440 case VIO_SUBTYPE_ACK: 3441 /* 3442 * Should be handled in-band by callback handler. 3443 */ 3444 DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 3445 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3446 break; 3447 3448 case VIO_SUBTYPE_NACK: 3449 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3450 3451 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 3452 return; 3453 3454 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 3455 vsw_next_milestone(ldcp); 3456 break; 3457 3458 default: 3459 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3460 rdx_pkt->tag.vio_subtype); 3461 } 3462 3463 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3464 } 3465 3466 static void 3467 vsw_process_physlink_msg(vsw_ldc_t *ldcp, void *pkt) 3468 { 3469 vnet_physlink_msg_t *msgp; 3470 vsw_t *vswp = ldcp->ldc_vswp; 3471 3472 msgp = (vnet_physlink_msg_t *)pkt; 3473 3474 D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 3475 3476 switch (msgp->tag.vio_subtype) { 3477 case VIO_SUBTYPE_INFO: 3478 3479 /* vsw shouldn't recv physlink info */ 3480 DWARN(vswp, "%s: Unexpected VIO_SUBTYPE_INFO", __func__); 3481 break; 3482 3483 case VIO_SUBTYPE_ACK: 3484 3485 D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 3486 break; 3487 3488 case VIO_SUBTYPE_NACK: 3489 3490 D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 3491 break; 3492 3493 default: 3494 DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 3495 msgp->tag.vio_subtype); 3496 } 3497 3498 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3499 } 3500 3501 static void 3502 vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t *tagp, 3503 uint32_t msglen) 3504 { 3505 uint16_t env = tagp->vio_subtype_env; 3506 vsw_t *vswp = ldcp->ldc_vswp; 3507 3508 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3509 3510 /* session id check */ 3511 if (ldcp->session_status & VSW_PEER_SESSION) { 3512 if (ldcp->peer_session != tagp->vio_sid) { 3513 DERR(vswp, "%s (chan %d): invalid session id (%llx)", 3514 __func__, ldcp->ldc_id, tagp->vio_sid); 3515 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3516 return; 3517 } 3518 } 3519 3520 /* 3521 * It is an error for us to be getting data packets 3522 * before the handshake has completed. 3523 */ 3524 if (ldcp->hphase != VSW_MILESTONE4) { 3525 DERR(vswp, "%s: got data packet before handshake complete " 3526 "hphase %d (%x: %x)", __func__, ldcp->hphase, 3527 ldcp->lane_in.lstate, ldcp->lane_out.lstate); 3528 DUMP_FLAGS(ldcp->lane_in.lstate); 3529 DUMP_FLAGS(ldcp->lane_out.lstate); 3530 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3531 return; 3532 } 3533 3534 /* 3535 * To reduce the locking contention, release the 3536 * ldc_cblock here and re-acquire it once we are done 3537 * receiving packets. 3538 */ 3539 mutex_exit(&ldcp->ldc_cblock); 3540 mutex_enter(&ldcp->ldc_rxlock); 3541 3542 /* 3543 * Switch on vio_subtype envelope, then let lower routines 3544 * decide if its an INFO, ACK or NACK packet. 3545 */ 3546 if (env == VIO_DRING_DATA) { 3547 vsw_process_data_dring_pkt(ldcp, dpkt); 3548 } else if (env == VIO_PKT_DATA) { 3549 ldcp->rx_pktdata(ldcp, dpkt, msglen); 3550 } else if (env == VIO_DESC_DATA) { 3551 vsw_process_data_ibnd_pkt(ldcp, dpkt); 3552 } else { 3553 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 3554 } 3555 3556 mutex_exit(&ldcp->ldc_rxlock); 3557 mutex_enter(&ldcp->ldc_cblock); 3558 3559 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 3560 } 3561 3562 #define SND_DRING_NACK(ldcp, pkt) \ 3563 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 3564 pkt->tag.vio_sid = ldcp->local_session; \ 3565 (void) vsw_send_msg(ldcp, (void *)pkt, \ 3566 sizeof (vio_dring_msg_t), B_TRUE); 3567 3568 static void 3569 vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 3570 { 3571 vio_dring_msg_t *dring_pkt; 3572 vnet_public_desc_t desc, *pub_addr = NULL; 3573 vsw_private_desc_t *priv_addr = NULL; 3574 dring_info_t *dp = NULL; 3575 vsw_t *vswp = ldcp->ldc_vswp; 3576 mblk_t *mp = NULL; 3577 mblk_t *bp = NULL; 3578 mblk_t *bpt = NULL; 3579 size_t nbytes = 0; 3580 uint64_t chain = 0; 3581 uint64_t len; 3582 uint32_t pos, start; 3583 uint32_t range_start, range_end; 3584 int32_t end, num, cnt = 0; 3585 int i, rv, rng_rv = 0, msg_rv = 0; 3586 boolean_t prev_desc_ack = B_FALSE; 3587 int read_attempts = 0; 3588 struct ether_header *ehp; 3589 lane_t *lp = &ldcp->lane_out; 3590 3591 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 3592 3593 /* 3594 * We know this is a data/dring packet so 3595 * cast it into the correct structure. 3596 */ 3597 dring_pkt = (vio_dring_msg_t *)dpkt; 3598 3599 /* 3600 * Switch on the vio_subtype. If its INFO then we need to 3601 * process the data. If its an ACK we need to make sure 3602 * it makes sense (i.e did we send an earlier data/info), 3603 * and if its a NACK then we maybe attempt a retry. 3604 */ 3605 switch (dring_pkt->tag.vio_subtype) { 3606 case VIO_SUBTYPE_INFO: 3607 D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 3608 3609 READ_ENTER(&ldcp->lane_in.dlistrw); 3610 if ((dp = vsw_ident2dring(&ldcp->lane_in, 3611 dring_pkt->dring_ident)) == NULL) { 3612 RW_EXIT(&ldcp->lane_in.dlistrw); 3613 3614 DERR(vswp, "%s(%lld): unable to find dring from " 3615 "ident 0x%llx", __func__, ldcp->ldc_id, 3616 dring_pkt->dring_ident); 3617 3618 SND_DRING_NACK(ldcp, dring_pkt); 3619 return; 3620 } 3621 3622 start = pos = dring_pkt->start_idx; 3623 end = dring_pkt->end_idx; 3624 len = dp->num_descriptors; 3625 3626 range_start = range_end = pos; 3627 3628 D2(vswp, "%s(%lld): start index %ld : end %ld\n", 3629 __func__, ldcp->ldc_id, start, end); 3630 3631 if (end == -1) { 3632 num = -1; 3633 } else if (end >= 0) { 3634 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 3635 3636 /* basic sanity check */ 3637 if (end > len) { 3638 RW_EXIT(&ldcp->lane_in.dlistrw); 3639 DERR(vswp, "%s(%lld): endpoint %lld outside " 3640 "ring length %lld", __func__, 3641 ldcp->ldc_id, end, len); 3642 3643 SND_DRING_NACK(ldcp, dring_pkt); 3644 return; 3645 } 3646 } else { 3647 RW_EXIT(&ldcp->lane_in.dlistrw); 3648 DERR(vswp, "%s(%lld): invalid endpoint %lld", 3649 __func__, ldcp->ldc_id, end); 3650 SND_DRING_NACK(ldcp, dring_pkt); 3651 return; 3652 } 3653 3654 while (cnt != num) { 3655 vsw_recheck_desc: 3656 pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 3657 3658 if ((rng_rv = vnet_dring_entry_copy(pub_addr, 3659 &desc, dp->dring_mtype, dp->handle, 3660 pos, pos)) != 0) { 3661 DERR(vswp, "%s(%lld): unable to copy " 3662 "descriptor at pos %d: err %d", 3663 __func__, pos, ldcp->ldc_id, rng_rv); 3664 ldcp->ldc_stats.ierrors++; 3665 break; 3666 } 3667 3668 /* 3669 * When given a bounded range of descriptors 3670 * to process, its an error to hit a descriptor 3671 * which is not ready. In the non-bounded case 3672 * (end_idx == -1) this simply indicates we have 3673 * reached the end of the current active range. 3674 */ 3675 if (desc.hdr.dstate != VIO_DESC_READY) { 3676 /* unbound - no error */ 3677 if (end == -1) { 3678 if (read_attempts == vsw_read_attempts) 3679 break; 3680 3681 delay(drv_usectohz(vsw_desc_delay)); 3682 read_attempts++; 3683 goto vsw_recheck_desc; 3684 } 3685 3686 /* bounded - error - so NACK back */ 3687 RW_EXIT(&ldcp->lane_in.dlistrw); 3688 DERR(vswp, "%s(%lld): descriptor not READY " 3689 "(%d)", __func__, ldcp->ldc_id, 3690 desc.hdr.dstate); 3691 SND_DRING_NACK(ldcp, dring_pkt); 3692 return; 3693 } 3694 3695 DTRACE_PROBE1(read_attempts, int, read_attempts); 3696 3697 range_end = pos; 3698 3699 /* 3700 * If we ACK'd the previous descriptor then now 3701 * record the new range start position for later 3702 * ACK's. 3703 */ 3704 if (prev_desc_ack) { 3705 range_start = pos; 3706 3707 D2(vswp, "%s(%lld): updating range start to be " 3708 "%d", __func__, ldcp->ldc_id, range_start); 3709 3710 prev_desc_ack = B_FALSE; 3711 } 3712 3713 D2(vswp, "%s(%lld): processing desc %lld at pos" 3714 " 0x%llx : dstate 0x%lx : datalen 0x%lx", 3715 __func__, ldcp->ldc_id, pos, &desc, 3716 desc.hdr.dstate, desc.nbytes); 3717 3718 if ((desc.nbytes < ETHERMIN) || 3719 (desc.nbytes > lp->mtu)) { 3720 /* invalid size; drop the packet */ 3721 ldcp->ldc_stats.ierrors++; 3722 goto vsw_process_desc_done; 3723 } 3724 3725 /* 3726 * Ensure that we ask ldc for an aligned 3727 * number of bytes. Data is padded to align on 8 3728 * byte boundary, desc.nbytes is actual data length, 3729 * i.e. minus that padding. 3730 */ 3731 nbytes = (desc.nbytes + VNET_IPALIGN + 7) & ~7; 3732 if (nbytes > ldcp->max_rxpool_size) { 3733 mp = allocb(desc.nbytes + VNET_IPALIGN + 8, 3734 BPRI_MED); 3735 } else { 3736 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 3737 if (mp == NULL) { 3738 ldcp->ldc_stats.rx_vio_allocb_fail++; 3739 /* 3740 * No free receive buffers available, 3741 * so fallback onto allocb(9F). Make 3742 * sure that we get a data buffer which 3743 * is a multiple of 8 as this is 3744 * required by ldc_mem_copy. 3745 */ 3746 DTRACE_PROBE(allocb); 3747 mp = allocb(desc.nbytes + 3748 VNET_IPALIGN + 8, BPRI_MED); 3749 } 3750 } 3751 if (mp == NULL) { 3752 DERR(vswp, "%s(%ld): allocb failed", 3753 __func__, ldcp->ldc_id); 3754 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3755 dp->dring_mtype, dp->handle, pos, pos, 3756 VIO_DESC_DONE); 3757 ldcp->ldc_stats.ierrors++; 3758 ldcp->ldc_stats.rx_allocb_fail++; 3759 break; 3760 } 3761 3762 rv = ldc_mem_copy(ldcp->ldc_handle, 3763 (caddr_t)mp->b_rptr, 0, &nbytes, 3764 desc.memcookie, desc.ncookies, LDC_COPY_IN); 3765 if (rv != 0) { 3766 DERR(vswp, "%s(%d): unable to copy in data " 3767 "from %d cookies in desc %d (rv %d)", 3768 __func__, ldcp->ldc_id, desc.ncookies, 3769 pos, rv); 3770 freemsg(mp); 3771 3772 rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3773 dp->dring_mtype, dp->handle, pos, pos, 3774 VIO_DESC_DONE); 3775 ldcp->ldc_stats.ierrors++; 3776 break; 3777 } else { 3778 D2(vswp, "%s(%d): copied in %ld bytes" 3779 " using %d cookies", __func__, 3780 ldcp->ldc_id, nbytes, desc.ncookies); 3781 } 3782 3783 /* adjust the read pointer to skip over the padding */ 3784 mp->b_rptr += VNET_IPALIGN; 3785 3786 /* point to the actual end of data */ 3787 mp->b_wptr = mp->b_rptr + desc.nbytes; 3788 3789 /* update statistics */ 3790 ehp = (struct ether_header *)mp->b_rptr; 3791 if (IS_BROADCAST(ehp)) 3792 ldcp->ldc_stats.brdcstrcv++; 3793 else if (IS_MULTICAST(ehp)) 3794 ldcp->ldc_stats.multircv++; 3795 3796 ldcp->ldc_stats.ipackets++; 3797 ldcp->ldc_stats.rbytes += desc.nbytes; 3798 3799 /* 3800 * IPALIGN space can be used for VLAN_TAG 3801 */ 3802 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, 3803 VSW_VNETPORT, mp); 3804 3805 /* build a chain of received packets */ 3806 if (bp == NULL) { 3807 /* first pkt */ 3808 bp = mp; 3809 bp->b_next = bp->b_prev = NULL; 3810 bpt = bp; 3811 chain = 1; 3812 } else { 3813 mp->b_next = mp->b_prev = NULL; 3814 bpt->b_next = mp; 3815 bpt = mp; 3816 chain++; 3817 } 3818 3819 vsw_process_desc_done: 3820 /* mark we are finished with this descriptor */ 3821 if ((rng_rv = vnet_dring_entry_set_dstate(pub_addr, 3822 dp->dring_mtype, dp->handle, pos, pos, 3823 VIO_DESC_DONE)) != 0) { 3824 DERR(vswp, "%s(%lld): unable to update " 3825 "dstate at pos %d: err %d", 3826 __func__, pos, ldcp->ldc_id, rng_rv); 3827 ldcp->ldc_stats.ierrors++; 3828 break; 3829 } 3830 3831 /* 3832 * Send an ACK back to peer if requested. 3833 */ 3834 if (desc.hdr.ack) { 3835 dring_pkt->start_idx = range_start; 3836 dring_pkt->end_idx = range_end; 3837 3838 DERR(vswp, "%s(%lld): processed %d %d, ACK" 3839 " requested", __func__, ldcp->ldc_id, 3840 dring_pkt->start_idx, dring_pkt->end_idx); 3841 3842 dring_pkt->dring_process_state = VIO_DP_ACTIVE; 3843 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3844 dring_pkt->tag.vio_sid = ldcp->local_session; 3845 3846 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 3847 sizeof (vio_dring_msg_t), B_FALSE); 3848 3849 /* 3850 * Check if ACK was successfully sent. If not 3851 * we break and deal with that below. 3852 */ 3853 if (msg_rv != 0) 3854 break; 3855 3856 prev_desc_ack = B_TRUE; 3857 range_start = pos; 3858 } 3859 3860 /* next descriptor */ 3861 pos = (pos + 1) % len; 3862 cnt++; 3863 3864 /* 3865 * Break out of loop here and stop processing to 3866 * allow some other network device (or disk) to 3867 * get access to the cpu. 3868 */ 3869 if (chain > vsw_chain_len) { 3870 D3(vswp, "%s(%lld): switching chain of %d " 3871 "msgs", __func__, ldcp->ldc_id, chain); 3872 break; 3873 } 3874 } 3875 RW_EXIT(&ldcp->lane_in.dlistrw); 3876 3877 /* send the chain of packets to be switched */ 3878 if (bp != NULL) { 3879 DTRACE_PROBE1(vsw_rcv_msgs, int, chain); 3880 D3(vswp, "%s(%lld): switching chain of %d msgs", 3881 __func__, ldcp->ldc_id, chain); 3882 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 3883 ldcp->ldc_port, NULL); 3884 } 3885 3886 /* 3887 * If when we encountered an error when attempting to 3888 * access an imported dring, initiate a connection reset. 3889 */ 3890 if (rng_rv != 0) { 3891 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 3892 break; 3893 } 3894 3895 /* 3896 * If when we attempted to send the ACK we found that the 3897 * channel had been reset then now handle this. We deal with 3898 * it here as we cannot reset the channel while holding the 3899 * dlistrw lock, and we don't want to acquire/release it 3900 * continuously in the above loop, as a channel reset should 3901 * be a rare event. 3902 */ 3903 if (msg_rv == ECONNRESET) { 3904 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 3905 break; 3906 } 3907 3908 DTRACE_PROBE1(msg_cnt, int, cnt); 3909 3910 /* 3911 * We are now finished so ACK back with the state 3912 * set to STOPPING so our peer knows we are finished 3913 */ 3914 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 3915 dring_pkt->tag.vio_sid = ldcp->local_session; 3916 3917 dring_pkt->dring_process_state = VIO_DP_STOPPED; 3918 3919 DTRACE_PROBE(stop_process_sent); 3920 3921 /* 3922 * We have not processed any more descriptors beyond 3923 * the last one we ACK'd. 3924 */ 3925 if (prev_desc_ack) 3926 range_start = range_end; 3927 3928 dring_pkt->start_idx = range_start; 3929 dring_pkt->end_idx = range_end; 3930 3931 D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 3932 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3933 dring_pkt->end_idx); 3934 3935 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 3936 sizeof (vio_dring_msg_t), B_TRUE); 3937 break; 3938 3939 case VIO_SUBTYPE_ACK: 3940 D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 3941 /* 3942 * Verify that the relevant descriptors are all 3943 * marked as DONE 3944 */ 3945 READ_ENTER(&ldcp->lane_out.dlistrw); 3946 if ((dp = vsw_ident2dring(&ldcp->lane_out, 3947 dring_pkt->dring_ident)) == NULL) { 3948 RW_EXIT(&ldcp->lane_out.dlistrw); 3949 DERR(vswp, "%s: unknown ident in ACK", __func__); 3950 return; 3951 } 3952 3953 start = end = 0; 3954 start = dring_pkt->start_idx; 3955 end = dring_pkt->end_idx; 3956 len = dp->num_descriptors; 3957 3958 3959 mutex_enter(&dp->dlock); 3960 dp->last_ack_recv = end; 3961 ldcp->ldc_stats.dring_data_acks++; 3962 mutex_exit(&dp->dlock); 3963 3964 (void) vsw_reclaim_dring(dp, start); 3965 3966 /* 3967 * If our peer is stopping processing descriptors then 3968 * we check to make sure it has processed all the descriptors 3969 * we have updated. If not then we send it a new message 3970 * to prompt it to restart. 3971 */ 3972 if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 3973 DTRACE_PROBE(stop_process_recv); 3974 D2(vswp, "%s(%lld): got stopping msg : %d : %d", 3975 __func__, ldcp->ldc_id, dring_pkt->start_idx, 3976 dring_pkt->end_idx); 3977 3978 /* 3979 * Check next descriptor in public section of ring. 3980 * If its marked as READY then we need to prompt our 3981 * peer to start processing the ring again. 3982 */ 3983 i = (end + 1) % len; 3984 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 3985 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 3986 3987 /* 3988 * Hold the restart lock across all of this to 3989 * make sure that its not possible for us to 3990 * decide that a msg needs to be sent in the future 3991 * but the sending code having already checked is 3992 * about to exit. 3993 */ 3994 mutex_enter(&dp->restart_lock); 3995 ldcp->ldc_stats.dring_stopped_acks++; 3996 mutex_enter(&priv_addr->dstate_lock); 3997 if (pub_addr->hdr.dstate == VIO_DESC_READY) { 3998 3999 mutex_exit(&priv_addr->dstate_lock); 4000 4001 dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4002 dring_pkt->tag.vio_sid = ldcp->local_session; 4003 4004 dring_pkt->start_idx = (end + 1) % len; 4005 dring_pkt->end_idx = -1; 4006 4007 D2(vswp, "%s(%lld) : sending restart msg:" 4008 " %d : %d", __func__, ldcp->ldc_id, 4009 dring_pkt->start_idx, dring_pkt->end_idx); 4010 4011 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 4012 sizeof (vio_dring_msg_t), B_FALSE); 4013 ldcp->ldc_stats.dring_data_msgs++; 4014 4015 } else { 4016 mutex_exit(&priv_addr->dstate_lock); 4017 dp->restart_reqd = B_TRUE; 4018 } 4019 mutex_exit(&dp->restart_lock); 4020 } 4021 RW_EXIT(&ldcp->lane_out.dlistrw); 4022 4023 /* only do channel reset after dropping dlistrw lock */ 4024 if (msg_rv == ECONNRESET) 4025 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 4026 4027 break; 4028 4029 case VIO_SUBTYPE_NACK: 4030 DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 4031 __func__, ldcp->ldc_id); 4032 /* 4033 * Something is badly wrong if we are getting NACK's 4034 * for our data pkts. So reset the channel. 4035 */ 4036 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 4037 4038 break; 4039 4040 default: 4041 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4042 ldcp->ldc_id, dring_pkt->tag.vio_subtype); 4043 } 4044 4045 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4046 } 4047 4048 /* 4049 * dummy pkt data handler function for vnet protocol version 1.0 4050 */ 4051 static void 4052 vsw_process_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 4053 { 4054 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 4055 } 4056 4057 /* 4058 * This function handles raw pkt data messages received over the channel. 4059 * Currently, only priority-eth-type frames are received through this mechanism. 4060 * In this case, the frame(data) is present within the message itself which 4061 * is copied into an mblk before switching it. 4062 */ 4063 static void 4064 vsw_process_pkt_data(void *arg1, void *arg2, uint32_t msglen) 4065 { 4066 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg1; 4067 vio_raw_data_msg_t *dpkt = (vio_raw_data_msg_t *)arg2; 4068 uint32_t size; 4069 mblk_t *mp; 4070 vsw_t *vswp = ldcp->ldc_vswp; 4071 vgen_stats_t *statsp = &ldcp->ldc_stats; 4072 lane_t *lp = &ldcp->lane_out; 4073 4074 size = msglen - VIO_PKT_DATA_HDRSIZE; 4075 if (size < ETHERMIN || size > lp->mtu) { 4076 (void) atomic_inc_32(&statsp->rx_pri_fail); 4077 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4078 ldcp->ldc_id, size); 4079 return; 4080 } 4081 4082 mp = vio_multipool_allocb(&ldcp->vmp, size + VLAN_TAGSZ); 4083 if (mp == NULL) { 4084 mp = allocb(size + VLAN_TAGSZ, BPRI_MED); 4085 if (mp == NULL) { 4086 (void) atomic_inc_32(&statsp->rx_pri_fail); 4087 DWARN(vswp, "%s(%lld) allocb failure, " 4088 "unable to process priority frame\n", __func__, 4089 ldcp->ldc_id); 4090 return; 4091 } 4092 } 4093 4094 /* skip over the extra space for vlan tag */ 4095 mp->b_rptr += VLAN_TAGSZ; 4096 4097 /* copy the frame from the payload of raw data msg into the mblk */ 4098 bcopy(dpkt->data, mp->b_rptr, size); 4099 mp->b_wptr = mp->b_rptr + size; 4100 4101 /* update stats */ 4102 (void) atomic_inc_64(&statsp->rx_pri_packets); 4103 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 4104 4105 /* 4106 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed. 4107 */ 4108 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4109 4110 /* switch the frame to destination */ 4111 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, ldcp->ldc_port, NULL); 4112 } 4113 4114 /* 4115 * Process an in-band descriptor message (most likely from 4116 * OBP). 4117 */ 4118 static void 4119 vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 4120 { 4121 vnet_ibnd_desc_t *ibnd_desc; 4122 dring_info_t *dp = NULL; 4123 vsw_private_desc_t *priv_addr = NULL; 4124 vsw_t *vswp = ldcp->ldc_vswp; 4125 mblk_t *mp = NULL; 4126 size_t nbytes = 0; 4127 size_t off = 0; 4128 uint64_t idx = 0; 4129 uint32_t num = 1, len, datalen = 0; 4130 uint64_t ncookies = 0; 4131 int i, rv; 4132 int j = 0; 4133 4134 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4135 4136 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 4137 4138 switch (ibnd_desc->hdr.tag.vio_subtype) { 4139 case VIO_SUBTYPE_INFO: 4140 D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 4141 4142 if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 4143 return; 4144 4145 /* 4146 * Data is padded to align on a 8 byte boundary, 4147 * nbytes is actual data length, i.e. minus that 4148 * padding. 4149 */ 4150 datalen = ibnd_desc->nbytes; 4151 4152 D2(vswp, "%s(%lld): processing inband desc : " 4153 ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 4154 4155 ncookies = ibnd_desc->ncookies; 4156 4157 /* 4158 * allocb(9F) returns an aligned data block. We 4159 * need to ensure that we ask ldc for an aligned 4160 * number of bytes also. 4161 */ 4162 nbytes = datalen; 4163 if (nbytes & 0x7) { 4164 off = 8 - (nbytes & 0x7); 4165 nbytes += off; 4166 } 4167 4168 /* alloc extra space for VLAN_TAG */ 4169 mp = allocb(datalen + 8, BPRI_MED); 4170 if (mp == NULL) { 4171 DERR(vswp, "%s(%lld): allocb failed", 4172 __func__, ldcp->ldc_id); 4173 ldcp->ldc_stats.rx_allocb_fail++; 4174 return; 4175 } 4176 4177 /* skip over the extra space for VLAN_TAG */ 4178 mp->b_rptr += 8; 4179 4180 rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 4181 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 4182 LDC_COPY_IN); 4183 4184 if (rv != 0) { 4185 DERR(vswp, "%s(%d): unable to copy in data from " 4186 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 4187 freemsg(mp); 4188 ldcp->ldc_stats.ierrors++; 4189 return; 4190 } 4191 4192 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 4193 __func__, ldcp->ldc_id, nbytes, ncookies); 4194 4195 /* point to the actual end of data */ 4196 mp->b_wptr = mp->b_rptr + datalen; 4197 ldcp->ldc_stats.ipackets++; 4198 ldcp->ldc_stats.rbytes += datalen; 4199 4200 /* 4201 * We ACK back every in-band descriptor message we process 4202 */ 4203 ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 4204 ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 4205 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 4206 sizeof (vnet_ibnd_desc_t), B_TRUE); 4207 4208 /* 4209 * there is extra space alloc'd for VLAN_TAG 4210 */ 4211 (void) vsw_vlan_frame_pretag(ldcp->ldc_port, VSW_VNETPORT, mp); 4212 4213 /* send the packet to be switched */ 4214 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 4215 ldcp->ldc_port, NULL); 4216 4217 break; 4218 4219 case VIO_SUBTYPE_ACK: 4220 D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 4221 4222 /* Verify the ACK is valid */ 4223 idx = ibnd_desc->hdr.desc_handle; 4224 4225 if (idx >= vsw_ntxds) { 4226 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 4227 "(idx %ld)", vswp->instance, idx); 4228 return; 4229 } 4230 4231 if ((dp = ldcp->lane_out.dringp) == NULL) { 4232 DERR(vswp, "%s: no dring found", __func__); 4233 return; 4234 } 4235 4236 len = dp->num_descriptors; 4237 /* 4238 * If the descriptor we are being ACK'ed for is not the 4239 * one we expected, then pkts were lost somwhere, either 4240 * when we tried to send a msg, or a previous ACK msg from 4241 * our peer. In either case we now reclaim the descriptors 4242 * in the range from the last ACK we received up to the 4243 * current ACK. 4244 */ 4245 if (idx != dp->last_ack_recv) { 4246 DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 4247 __func__, dp->last_ack_recv, idx); 4248 num = idx >= dp->last_ack_recv ? 4249 idx - dp->last_ack_recv + 1: 4250 (len - dp->last_ack_recv + 1) + idx; 4251 } 4252 4253 /* 4254 * When we sent the in-band message to our peer we 4255 * marked the copy in our private ring as READY. We now 4256 * check that the descriptor we are being ACK'ed for is in 4257 * fact READY, i.e. it is one we have shared with our peer. 4258 * 4259 * If its not we flag an error, but still reset the descr 4260 * back to FREE. 4261 */ 4262 for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 4263 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 4264 mutex_enter(&priv_addr->dstate_lock); 4265 if (priv_addr->dstate != VIO_DESC_READY) { 4266 DERR(vswp, "%s: (%ld) desc at index %ld not " 4267 "READY (0x%lx)", __func__, 4268 ldcp->ldc_id, idx, priv_addr->dstate); 4269 DERR(vswp, "%s: bound %d: ncookies %ld : " 4270 "datalen %ld", __func__, 4271 priv_addr->bound, priv_addr->ncookies, 4272 priv_addr->datalen); 4273 } 4274 D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 4275 ldcp->ldc_id, idx); 4276 /* release resources associated with sent msg */ 4277 priv_addr->datalen = 0; 4278 priv_addr->dstate = VIO_DESC_FREE; 4279 mutex_exit(&priv_addr->dstate_lock); 4280 } 4281 /* update to next expected value */ 4282 dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 4283 4284 break; 4285 4286 case VIO_SUBTYPE_NACK: 4287 DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 4288 4289 /* 4290 * We should only get a NACK if our peer doesn't like 4291 * something about a message we have sent it. If this 4292 * happens we just release the resources associated with 4293 * the message. (We are relying on higher layers to decide 4294 * whether or not to resend. 4295 */ 4296 4297 /* limit check */ 4298 idx = ibnd_desc->hdr.desc_handle; 4299 4300 if (idx >= vsw_ntxds) { 4301 DERR(vswp, "%s: corrupted NACK received (idx %lld)", 4302 __func__, idx); 4303 return; 4304 } 4305 4306 if ((dp = ldcp->lane_out.dringp) == NULL) { 4307 DERR(vswp, "%s: no dring found", __func__); 4308 return; 4309 } 4310 4311 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 4312 4313 /* move to correct location in ring */ 4314 priv_addr += idx; 4315 4316 /* release resources associated with sent msg */ 4317 mutex_enter(&priv_addr->dstate_lock); 4318 priv_addr->datalen = 0; 4319 priv_addr->dstate = VIO_DESC_FREE; 4320 mutex_exit(&priv_addr->dstate_lock); 4321 4322 break; 4323 4324 default: 4325 DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 4326 ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 4327 } 4328 4329 D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 4330 } 4331 4332 static void 4333 vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t *tagp) 4334 { 4335 _NOTE(ARGUNUSED(epkt)) 4336 4337 vsw_t *vswp = ldcp->ldc_vswp; 4338 uint16_t env = tagp->vio_subtype_env; 4339 4340 D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 4341 4342 /* 4343 * Error vio_subtypes have yet to be defined. So for 4344 * the moment we can't do anything. 4345 */ 4346 D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 4347 4348 D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 4349 } 4350 4351 /* transmit the packet over the given port */ 4352 int 4353 vsw_portsend(vsw_port_t *port, mblk_t *mp) 4354 { 4355 vsw_ldc_list_t *ldcl = &port->p_ldclist; 4356 vsw_ldc_t *ldcp; 4357 mblk_t *mpt; 4358 int count; 4359 int status = 0; 4360 4361 READ_ENTER(&ldcl->lockrw); 4362 /* 4363 * Note for now, we have a single channel. 4364 */ 4365 ldcp = ldcl->head; 4366 if (ldcp == NULL) { 4367 DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 4368 freemsgchain(mp); 4369 RW_EXIT(&ldcl->lockrw); 4370 return (1); 4371 } 4372 4373 count = vsw_vlan_frame_untag(port, VSW_VNETPORT, &mp, &mpt); 4374 4375 if (count != 0) { 4376 status = ldcp->tx(ldcp, mp, mpt, count); 4377 } 4378 4379 RW_EXIT(&ldcl->lockrw); 4380 return (status); 4381 } 4382 4383 /* 4384 * Break up frames into 2 seperate chains: normal and 4385 * priority, based on the frame type. The number of 4386 * priority frames is also counted and returned. 4387 * 4388 * Params: 4389 * vswp: pointer to the instance of vsw 4390 * np: head of packet chain to be broken 4391 * npt: tail of packet chain to be broken 4392 * 4393 * Returns: 4394 * np: head of normal data packets 4395 * npt: tail of normal data packets 4396 * hp: head of high priority packets 4397 * hpt: tail of high priority packets 4398 */ 4399 static uint32_t 4400 vsw_get_pri_packets(vsw_t *vswp, mblk_t **np, mblk_t **npt, 4401 mblk_t **hp, mblk_t **hpt) 4402 { 4403 mblk_t *tmp = NULL; 4404 mblk_t *smp = NULL; 4405 mblk_t *hmp = NULL; /* high prio pkts head */ 4406 mblk_t *hmpt = NULL; /* high prio pkts tail */ 4407 mblk_t *nmp = NULL; /* normal pkts head */ 4408 mblk_t *nmpt = NULL; /* normal pkts tail */ 4409 uint32_t count = 0; 4410 int i; 4411 struct ether_header *ehp; 4412 uint32_t num_types; 4413 uint16_t *types; 4414 4415 tmp = *np; 4416 while (tmp != NULL) { 4417 4418 smp = tmp; 4419 tmp = tmp->b_next; 4420 smp->b_next = NULL; 4421 smp->b_prev = NULL; 4422 4423 ehp = (struct ether_header *)smp->b_rptr; 4424 num_types = vswp->pri_num_types; 4425 types = vswp->pri_types; 4426 for (i = 0; i < num_types; i++) { 4427 if (ehp->ether_type == types[i]) { 4428 /* high priority frame */ 4429 4430 if (hmp != NULL) { 4431 hmpt->b_next = smp; 4432 hmpt = smp; 4433 } else { 4434 hmp = hmpt = smp; 4435 } 4436 count++; 4437 break; 4438 } 4439 } 4440 if (i == num_types) { 4441 /* normal data frame */ 4442 4443 if (nmp != NULL) { 4444 nmpt->b_next = smp; 4445 nmpt = smp; 4446 } else { 4447 nmp = nmpt = smp; 4448 } 4449 } 4450 } 4451 4452 *hp = hmp; 4453 *hpt = hmpt; 4454 *np = nmp; 4455 *npt = nmpt; 4456 4457 return (count); 4458 } 4459 4460 /* 4461 * Wrapper function to transmit normal and/or priority frames over the channel. 4462 */ 4463 static int 4464 vsw_ldctx_pri(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4465 { 4466 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4467 mblk_t *tmp; 4468 mblk_t *smp; 4469 mblk_t *hmp; /* high prio pkts head */ 4470 mblk_t *hmpt; /* high prio pkts tail */ 4471 mblk_t *nmp; /* normal pkts head */ 4472 mblk_t *nmpt; /* normal pkts tail */ 4473 uint32_t n = 0; 4474 vsw_t *vswp = ldcp->ldc_vswp; 4475 4476 ASSERT(VSW_PRI_ETH_DEFINED(vswp)); 4477 ASSERT(count != 0); 4478 4479 nmp = mp; 4480 nmpt = mpt; 4481 4482 /* gather any priority frames from the chain of packets */ 4483 n = vsw_get_pri_packets(vswp, &nmp, &nmpt, &hmp, &hmpt); 4484 4485 /* transmit priority frames */ 4486 tmp = hmp; 4487 while (tmp != NULL) { 4488 smp = tmp; 4489 tmp = tmp->b_next; 4490 smp->b_next = NULL; 4491 vsw_ldcsend_pkt(ldcp, smp); 4492 } 4493 4494 count -= n; 4495 4496 if (count == 0) { 4497 /* no normal data frames to process */ 4498 return (0); 4499 } 4500 4501 return (vsw_ldctx(ldcp, nmp, nmpt, count)); 4502 } 4503 4504 /* 4505 * Wrapper function to transmit normal frames over the channel. 4506 */ 4507 static int 4508 vsw_ldctx(void *arg, mblk_t *mp, mblk_t *mpt, uint32_t count) 4509 { 4510 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 4511 mblk_t *tmp = NULL; 4512 4513 ASSERT(count != 0); 4514 /* 4515 * If the TX thread is enabled, then queue the 4516 * ordinary frames and signal the tx thread. 4517 */ 4518 if (ldcp->tx_thread != NULL) { 4519 4520 mutex_enter(&ldcp->tx_thr_lock); 4521 4522 if ((ldcp->tx_cnt + count) >= vsw_max_tx_qcount) { 4523 /* 4524 * If we reached queue limit, 4525 * do not queue new packets, 4526 * drop them. 4527 */ 4528 ldcp->ldc_stats.tx_qfull += count; 4529 mutex_exit(&ldcp->tx_thr_lock); 4530 freemsgchain(mp); 4531 goto exit; 4532 } 4533 if (ldcp->tx_mhead == NULL) { 4534 ldcp->tx_mhead = mp; 4535 ldcp->tx_mtail = mpt; 4536 cv_signal(&ldcp->tx_thr_cv); 4537 } else { 4538 ldcp->tx_mtail->b_next = mp; 4539 ldcp->tx_mtail = mpt; 4540 } 4541 ldcp->tx_cnt += count; 4542 mutex_exit(&ldcp->tx_thr_lock); 4543 } else { 4544 while (mp != NULL) { 4545 tmp = mp->b_next; 4546 mp->b_next = mp->b_prev = NULL; 4547 (void) vsw_ldcsend(ldcp, mp, 1); 4548 mp = tmp; 4549 } 4550 } 4551 4552 exit: 4553 return (0); 4554 } 4555 4556 /* 4557 * This function transmits the frame in the payload of a raw data 4558 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 4559 * send special frames with high priorities, without going through 4560 * the normal data path which uses descriptor ring mechanism. 4561 */ 4562 static void 4563 vsw_ldcsend_pkt(vsw_ldc_t *ldcp, mblk_t *mp) 4564 { 4565 vio_raw_data_msg_t *pkt; 4566 mblk_t *bp; 4567 mblk_t *nmp = NULL; 4568 caddr_t dst; 4569 uint32_t mblksz; 4570 uint32_t size; 4571 uint32_t nbytes; 4572 int rv; 4573 vsw_t *vswp = ldcp->ldc_vswp; 4574 vgen_stats_t *statsp = &ldcp->ldc_stats; 4575 4576 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4577 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4578 (void) atomic_inc_32(&statsp->tx_pri_fail); 4579 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4580 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4581 ldcp->lane_out.lstate); 4582 goto send_pkt_exit; 4583 } 4584 4585 size = msgsize(mp); 4586 4587 /* frame size bigger than available payload len of raw data msg ? */ 4588 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 4589 (void) atomic_inc_32(&statsp->tx_pri_fail); 4590 DWARN(vswp, "%s(%lld) invalid size(%d)\n", __func__, 4591 ldcp->ldc_id, size); 4592 goto send_pkt_exit; 4593 } 4594 4595 if (size < ETHERMIN) 4596 size = ETHERMIN; 4597 4598 /* alloc space for a raw data message */ 4599 nmp = vio_allocb(vswp->pri_tx_vmp); 4600 if (nmp == NULL) { 4601 (void) atomic_inc_32(&statsp->tx_pri_fail); 4602 DWARN(vswp, "vio_allocb failed\n"); 4603 goto send_pkt_exit; 4604 } 4605 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 4606 4607 /* copy frame into the payload of raw data message */ 4608 dst = (caddr_t)pkt->data; 4609 for (bp = mp; bp != NULL; bp = bp->b_cont) { 4610 mblksz = MBLKL(bp); 4611 bcopy(bp->b_rptr, dst, mblksz); 4612 dst += mblksz; 4613 } 4614 4615 /* setup the raw data msg */ 4616 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 4617 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 4618 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 4619 pkt->tag.vio_sid = ldcp->local_session; 4620 nbytes = VIO_PKT_DATA_HDRSIZE + size; 4621 4622 /* send the msg over ldc */ 4623 rv = vsw_send_msg(ldcp, (void *)pkt, nbytes, B_TRUE); 4624 if (rv != 0) { 4625 (void) atomic_inc_32(&statsp->tx_pri_fail); 4626 DWARN(vswp, "%s(%lld) Error sending priority frame\n", __func__, 4627 ldcp->ldc_id); 4628 goto send_pkt_exit; 4629 } 4630 4631 /* update stats */ 4632 (void) atomic_inc_64(&statsp->tx_pri_packets); 4633 (void) atomic_add_64(&statsp->tx_pri_packets, size); 4634 4635 send_pkt_exit: 4636 if (nmp != NULL) 4637 freemsg(nmp); 4638 freemsg(mp); 4639 } 4640 4641 /* 4642 * Transmit the packet over the given LDC channel. 4643 * 4644 * The 'retries' argument indicates how many times a packet 4645 * is retried before it is dropped. Note, the retry is done 4646 * only for a resource related failure, for all other failures 4647 * the packet is dropped immediately. 4648 */ 4649 static int 4650 vsw_ldcsend(vsw_ldc_t *ldcp, mblk_t *mp, uint32_t retries) 4651 { 4652 int i; 4653 int rc; 4654 int status = 0; 4655 vsw_port_t *port = ldcp->ldc_port; 4656 dring_info_t *dp = NULL; 4657 4658 4659 for (i = 0; i < retries; ) { 4660 /* 4661 * Send the message out using the appropriate 4662 * transmit function which will free mblock when it 4663 * is finished with it. 4664 */ 4665 mutex_enter(&port->tx_lock); 4666 if (port->transmit != NULL) { 4667 status = (*port->transmit)(ldcp, mp); 4668 } 4669 if (status == LDC_TX_SUCCESS) { 4670 mutex_exit(&port->tx_lock); 4671 break; 4672 } 4673 i++; /* increment the counter here */ 4674 4675 /* If its the last retry, then update the oerror */ 4676 if ((i == retries) && (status == LDC_TX_NORESOURCES)) { 4677 ldcp->ldc_stats.oerrors++; 4678 } 4679 mutex_exit(&port->tx_lock); 4680 4681 if (status != LDC_TX_NORESOURCES) { 4682 /* 4683 * No retrying required for errors un-related 4684 * to resources. 4685 */ 4686 break; 4687 } 4688 READ_ENTER(&ldcp->lane_out.dlistrw); 4689 if (((dp = ldcp->lane_out.dringp) != NULL) && 4690 ((VSW_VER_GTEQ(ldcp, 1, 2) && 4691 (ldcp->lane_out.xfer_mode & VIO_DRING_MODE_V1_2)) || 4692 ((VSW_VER_LT(ldcp, 1, 2) && 4693 (ldcp->lane_out.xfer_mode == VIO_DRING_MODE_V1_0))))) { 4694 rc = vsw_reclaim_dring(dp, dp->end_idx); 4695 } else { 4696 /* 4697 * If there is no dring or the xfer_mode is 4698 * set to DESC_MODE(ie., OBP), then simply break here. 4699 */ 4700 RW_EXIT(&ldcp->lane_out.dlistrw); 4701 break; 4702 } 4703 RW_EXIT(&ldcp->lane_out.dlistrw); 4704 4705 /* 4706 * Delay only if none were reclaimed 4707 * and its not the last retry. 4708 */ 4709 if ((rc == 0) && (i < retries)) { 4710 delay(drv_usectohz(vsw_ldc_tx_delay)); 4711 } 4712 } 4713 freemsg(mp); 4714 return (status); 4715 } 4716 4717 /* 4718 * Send packet out via descriptor ring to a logical device. 4719 */ 4720 static int 4721 vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 4722 { 4723 vio_dring_msg_t dring_pkt; 4724 dring_info_t *dp = NULL; 4725 vsw_private_desc_t *priv_desc = NULL; 4726 vnet_public_desc_t *pub = NULL; 4727 vsw_t *vswp = ldcp->ldc_vswp; 4728 mblk_t *bp; 4729 size_t n, size; 4730 caddr_t bufp; 4731 int idx; 4732 int status = LDC_TX_SUCCESS; 4733 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 4734 lane_t *lp = &ldcp->lane_out; 4735 4736 D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 4737 4738 /* TODO: make test a macro */ 4739 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4740 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4741 DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 4742 "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 4743 ldcp->lane_out.lstate); 4744 ldcp->ldc_stats.oerrors++; 4745 return (LDC_TX_FAILURE); 4746 } 4747 4748 /* 4749 * Note - using first ring only, this may change 4750 * in the future. 4751 */ 4752 READ_ENTER(&ldcp->lane_out.dlistrw); 4753 if ((dp = ldcp->lane_out.dringp) == NULL) { 4754 RW_EXIT(&ldcp->lane_out.dlistrw); 4755 DERR(vswp, "%s(%lld): no dring for outbound lane on" 4756 " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 4757 ldcp->ldc_stats.oerrors++; 4758 return (LDC_TX_FAILURE); 4759 } 4760 4761 size = msgsize(mp); 4762 if (size > (size_t)lp->mtu) { 4763 RW_EXIT(&ldcp->lane_out.dlistrw); 4764 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4765 ldcp->ldc_id, size); 4766 ldcp->ldc_stats.oerrors++; 4767 return (LDC_TX_FAILURE); 4768 } 4769 4770 /* 4771 * Find a free descriptor 4772 * 4773 * Note: for the moment we are assuming that we will only 4774 * have one dring going from the switch to each of its 4775 * peers. This may change in the future. 4776 */ 4777 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4778 D2(vswp, "%s(%lld): no descriptor available for ring " 4779 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4780 4781 /* nothing more we can do */ 4782 status = LDC_TX_NORESOURCES; 4783 ldcp->ldc_stats.tx_no_desc++; 4784 goto vsw_dringsend_free_exit; 4785 } else { 4786 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 4787 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 4788 } 4789 4790 /* copy data into the descriptor */ 4791 bufp = priv_desc->datap; 4792 bufp += VNET_IPALIGN; 4793 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4794 n = MBLKL(bp); 4795 bcopy(bp->b_rptr, bufp, n); 4796 bufp += n; 4797 } 4798 4799 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4800 4801 pub = priv_desc->descp; 4802 pub->nbytes = priv_desc->datalen; 4803 4804 /* update statistics */ 4805 if (IS_BROADCAST(ehp)) 4806 ldcp->ldc_stats.brdcstxmt++; 4807 else if (IS_MULTICAST(ehp)) 4808 ldcp->ldc_stats.multixmt++; 4809 ldcp->ldc_stats.opackets++; 4810 ldcp->ldc_stats.obytes += priv_desc->datalen; 4811 4812 mutex_enter(&priv_desc->dstate_lock); 4813 pub->hdr.dstate = VIO_DESC_READY; 4814 mutex_exit(&priv_desc->dstate_lock); 4815 4816 /* 4817 * Determine whether or not we need to send a message to our 4818 * peer prompting them to read our newly updated descriptor(s). 4819 */ 4820 mutex_enter(&dp->restart_lock); 4821 if (dp->restart_reqd) { 4822 dp->restart_reqd = B_FALSE; 4823 ldcp->ldc_stats.dring_data_msgs++; 4824 mutex_exit(&dp->restart_lock); 4825 4826 /* 4827 * Send a vio_dring_msg to peer to prompt them to read 4828 * the updated descriptor ring. 4829 */ 4830 dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 4831 dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 4832 dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 4833 dring_pkt.tag.vio_sid = ldcp->local_session; 4834 4835 /* Note - for now using first ring */ 4836 dring_pkt.dring_ident = dp->ident; 4837 4838 /* 4839 * If last_ack_recv is -1 then we know we've not 4840 * received any ack's yet, so this must be the first 4841 * msg sent, so set the start to the begining of the ring. 4842 */ 4843 mutex_enter(&dp->dlock); 4844 if (dp->last_ack_recv == -1) { 4845 dring_pkt.start_idx = 0; 4846 } else { 4847 dring_pkt.start_idx = 4848 (dp->last_ack_recv + 1) % dp->num_descriptors; 4849 } 4850 dring_pkt.end_idx = -1; 4851 mutex_exit(&dp->dlock); 4852 4853 D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 4854 ldcp->ldc_id, dp, dring_pkt.dring_ident); 4855 D3(vswp, "%s(%lld): start %lld : end %lld :\n", 4856 __func__, ldcp->ldc_id, dring_pkt.start_idx, 4857 dring_pkt.end_idx); 4858 4859 RW_EXIT(&ldcp->lane_out.dlistrw); 4860 4861 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 4862 sizeof (vio_dring_msg_t), B_TRUE); 4863 4864 return (status); 4865 4866 } else { 4867 mutex_exit(&dp->restart_lock); 4868 D2(vswp, "%s(%lld): updating descp %d", __func__, 4869 ldcp->ldc_id, idx); 4870 } 4871 4872 vsw_dringsend_free_exit: 4873 4874 RW_EXIT(&ldcp->lane_out.dlistrw); 4875 4876 D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 4877 return (status); 4878 } 4879 4880 /* 4881 * Send an in-band descriptor message over ldc. 4882 */ 4883 static int 4884 vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 4885 { 4886 vsw_t *vswp = ldcp->ldc_vswp; 4887 vnet_ibnd_desc_t ibnd_msg; 4888 vsw_private_desc_t *priv_desc = NULL; 4889 dring_info_t *dp = NULL; 4890 size_t n, size = 0; 4891 caddr_t bufp; 4892 mblk_t *bp; 4893 int idx, i; 4894 int status = LDC_TX_SUCCESS; 4895 static int warn_msg = 1; 4896 lane_t *lp = &ldcp->lane_out; 4897 4898 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 4899 4900 ASSERT(mp != NULL); 4901 4902 if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 4903 (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 4904 DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 4905 __func__, ldcp->ldc_id, ldcp->ldc_status, 4906 ldcp->lane_out.lstate); 4907 ldcp->ldc_stats.oerrors++; 4908 return (LDC_TX_FAILURE); 4909 } 4910 4911 /* 4912 * only expect single dring to exist, which we use 4913 * as an internal buffer, rather than a transfer channel. 4914 */ 4915 READ_ENTER(&ldcp->lane_out.dlistrw); 4916 if ((dp = ldcp->lane_out.dringp) == NULL) { 4917 DERR(vswp, "%s(%lld): no dring for outbound lane", 4918 __func__, ldcp->ldc_id); 4919 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 4920 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 4921 RW_EXIT(&ldcp->lane_out.dlistrw); 4922 ldcp->ldc_stats.oerrors++; 4923 return (LDC_TX_FAILURE); 4924 } 4925 4926 size = msgsize(mp); 4927 if (size > (size_t)lp->mtu) { 4928 RW_EXIT(&ldcp->lane_out.dlistrw); 4929 DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 4930 ldcp->ldc_id, size); 4931 ldcp->ldc_stats.oerrors++; 4932 return (LDC_TX_FAILURE); 4933 } 4934 4935 /* 4936 * Find a free descriptor in our buffer ring 4937 */ 4938 if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 4939 RW_EXIT(&ldcp->lane_out.dlistrw); 4940 if (warn_msg) { 4941 DERR(vswp, "%s(%lld): no descriptor available for ring " 4942 "at 0x%llx", __func__, ldcp->ldc_id, dp); 4943 warn_msg = 0; 4944 } 4945 4946 /* nothing more we can do */ 4947 status = LDC_TX_NORESOURCES; 4948 goto vsw_descrsend_free_exit; 4949 } else { 4950 D2(vswp, "%s(%lld): free private descriptor found at pos " 4951 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 4952 warn_msg = 1; 4953 } 4954 4955 /* copy data into the descriptor */ 4956 bufp = priv_desc->datap; 4957 for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 4958 n = MBLKL(bp); 4959 bcopy(bp->b_rptr, bufp, n); 4960 bufp += n; 4961 } 4962 4963 priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 4964 4965 /* create and send the in-band descp msg */ 4966 ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 4967 ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 4968 ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 4969 ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 4970 4971 /* 4972 * Copy the mem cookies describing the data from the 4973 * private region of the descriptor ring into the inband 4974 * descriptor. 4975 */ 4976 for (i = 0; i < priv_desc->ncookies; i++) { 4977 bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 4978 sizeof (ldc_mem_cookie_t)); 4979 } 4980 4981 ibnd_msg.hdr.desc_handle = idx; 4982 ibnd_msg.ncookies = priv_desc->ncookies; 4983 ibnd_msg.nbytes = size; 4984 4985 ldcp->ldc_stats.opackets++; 4986 ldcp->ldc_stats.obytes += size; 4987 4988 RW_EXIT(&ldcp->lane_out.dlistrw); 4989 4990 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 4991 sizeof (vnet_ibnd_desc_t), B_TRUE); 4992 4993 vsw_descrsend_free_exit: 4994 4995 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 4996 return (status); 4997 } 4998 4999 static void 5000 vsw_send_ver(void *arg) 5001 { 5002 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 5003 vsw_t *vswp = ldcp->ldc_vswp; 5004 lane_t *lp = &ldcp->lane_out; 5005 vio_ver_msg_t ver_msg; 5006 5007 D1(vswp, "%s enter", __func__); 5008 5009 ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5010 ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5011 ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 5012 ver_msg.tag.vio_sid = ldcp->local_session; 5013 5014 if (vsw_obp_ver_proto_workaround == B_FALSE) { 5015 ver_msg.ver_major = vsw_versions[0].ver_major; 5016 ver_msg.ver_minor = vsw_versions[0].ver_minor; 5017 } else { 5018 /* use the major,minor that we've ack'd */ 5019 lane_t *lpi = &ldcp->lane_in; 5020 ver_msg.ver_major = lpi->ver_major; 5021 ver_msg.ver_minor = lpi->ver_minor; 5022 } 5023 ver_msg.dev_class = VDEV_NETWORK_SWITCH; 5024 5025 lp->lstate |= VSW_VER_INFO_SENT; 5026 lp->ver_major = ver_msg.ver_major; 5027 lp->ver_minor = ver_msg.ver_minor; 5028 5029 DUMP_TAG(ver_msg.tag); 5030 5031 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 5032 5033 D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 5034 } 5035 5036 static void 5037 vsw_send_attr(vsw_ldc_t *ldcp) 5038 { 5039 vsw_t *vswp = ldcp->ldc_vswp; 5040 lane_t *lp = &ldcp->lane_out; 5041 vnet_attr_msg_t attr_msg; 5042 5043 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5044 5045 /* 5046 * Subtype is set to INFO by default 5047 */ 5048 attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5049 attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5050 attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 5051 attr_msg.tag.vio_sid = ldcp->local_session; 5052 5053 /* payload copied from default settings for lane */ 5054 attr_msg.mtu = lp->mtu; 5055 attr_msg.addr_type = lp->addr_type; 5056 attr_msg.xfer_mode = lp->xfer_mode; 5057 attr_msg.ack_freq = lp->xfer_mode; 5058 5059 READ_ENTER(&vswp->if_lockrw); 5060 attr_msg.addr = vnet_macaddr_strtoul((vswp->if_addr).ether_addr_octet); 5061 RW_EXIT(&vswp->if_lockrw); 5062 5063 ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 5064 5065 DUMP_TAG(attr_msg.tag); 5066 5067 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 5068 5069 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5070 } 5071 5072 /* 5073 * Create dring info msg (which also results in the creation of 5074 * a dring). 5075 */ 5076 static vio_dring_reg_msg_t * 5077 vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 5078 { 5079 vio_dring_reg_msg_t *mp; 5080 dring_info_t *dp; 5081 vsw_t *vswp = ldcp->ldc_vswp; 5082 int rv; 5083 5084 D1(vswp, "vsw_create_dring_info_pkt enter\n"); 5085 5086 /* 5087 * If we can't create a dring, obviously no point sending 5088 * a message. 5089 */ 5090 if ((dp = vsw_create_dring(ldcp)) == NULL) 5091 return (NULL); 5092 5093 /* Allocate pools of receive mblks */ 5094 rv = vsw_init_multipools(ldcp, vswp); 5095 if (rv) { 5096 /* 5097 * We do not return failure if receive mblk pools can't be 5098 * allocated, instead allocb(9F) will be used to dynamically 5099 * allocate buffers during receive. 5100 */ 5101 DWARN(vswp, "%s: unable to create free mblk pools for" 5102 " channel %ld (rv %d)", __func__, ldcp->ldc_id, rv); 5103 } 5104 5105 mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 5106 5107 mp->tag.vio_msgtype = VIO_TYPE_CTRL; 5108 mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 5109 mp->tag.vio_subtype_env = VIO_DRING_REG; 5110 mp->tag.vio_sid = ldcp->local_session; 5111 5112 /* payload */ 5113 mp->num_descriptors = dp->num_descriptors; 5114 mp->descriptor_size = dp->descriptor_size; 5115 mp->options = dp->options; 5116 mp->ncookies = dp->ncookies; 5117 bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 5118 5119 mp->dring_ident = 0; 5120 5121 D1(vswp, "vsw_create_dring_info_pkt exit\n"); 5122 5123 return (mp); 5124 } 5125 5126 static void 5127 vsw_send_dring_info(vsw_ldc_t *ldcp) 5128 { 5129 vio_dring_reg_msg_t *dring_msg; 5130 vsw_t *vswp = ldcp->ldc_vswp; 5131 5132 D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 5133 5134 dring_msg = vsw_create_dring_info_pkt(ldcp); 5135 if (dring_msg == NULL) { 5136 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 5137 vswp->instance, __func__); 5138 return; 5139 } 5140 5141 ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 5142 5143 DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 5144 5145 (void) vsw_send_msg(ldcp, dring_msg, 5146 sizeof (vio_dring_reg_msg_t), B_TRUE); 5147 5148 kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 5149 5150 D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 5151 } 5152 5153 static void 5154 vsw_send_rdx(vsw_ldc_t *ldcp) 5155 { 5156 vsw_t *vswp = ldcp->ldc_vswp; 5157 vio_rdx_msg_t rdx_msg; 5158 5159 D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 5160 5161 rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 5162 rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 5163 rdx_msg.tag.vio_subtype_env = VIO_RDX; 5164 rdx_msg.tag.vio_sid = ldcp->local_session; 5165 5166 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 5167 5168 DUMP_TAG(rdx_msg.tag); 5169 5170 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 5171 5172 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 5173 } 5174 5175 /* 5176 * Generic routine to send message out over ldc channel. 5177 * 5178 * It is possible that when we attempt to write over the ldc channel 5179 * that we get notified that it has been reset. Depending on the value 5180 * of the handle_reset flag we either handle that event here or simply 5181 * notify the caller that the channel was reset. 5182 */ 5183 int 5184 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 5185 { 5186 int rv; 5187 size_t msglen = size; 5188 vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 5189 vsw_t *vswp = ldcp->ldc_vswp; 5190 vio_dring_msg_t *dmsg; 5191 vio_raw_data_msg_t *rmsg; 5192 vnet_ibnd_desc_t *imsg; 5193 boolean_t data_msg = B_FALSE; 5194 5195 D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 5196 ldcp->ldc_id, size); 5197 5198 D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 5199 D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 5200 D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 5201 5202 mutex_enter(&ldcp->ldc_txlock); 5203 5204 if (tag->vio_subtype == VIO_SUBTYPE_INFO) { 5205 if (tag->vio_subtype_env == VIO_DRING_DATA) { 5206 dmsg = (vio_dring_msg_t *)tag; 5207 dmsg->seq_num = ldcp->lane_out.seq_num; 5208 data_msg = B_TRUE; 5209 } else if (tag->vio_subtype_env == VIO_PKT_DATA) { 5210 rmsg = (vio_raw_data_msg_t *)tag; 5211 rmsg->seq_num = ldcp->lane_out.seq_num; 5212 data_msg = B_TRUE; 5213 } else if (tag->vio_subtype_env == VIO_DESC_DATA) { 5214 imsg = (vnet_ibnd_desc_t *)tag; 5215 imsg->hdr.seq_num = ldcp->lane_out.seq_num; 5216 data_msg = B_TRUE; 5217 } 5218 } 5219 5220 do { 5221 msglen = size; 5222 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 5223 } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 5224 5225 if (rv == 0 && data_msg == B_TRUE) { 5226 ldcp->lane_out.seq_num++; 5227 } 5228 5229 if ((rv != 0) || (msglen != size)) { 5230 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 5231 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 5232 ldcp->ldc_stats.oerrors++; 5233 } 5234 5235 mutex_exit(&ldcp->ldc_txlock); 5236 5237 /* 5238 * If channel has been reset we either handle it here or 5239 * simply report back that it has been reset and let caller 5240 * decide what to do. 5241 */ 5242 if (rv == ECONNRESET) { 5243 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 5244 5245 /* 5246 * N.B - must never be holding the dlistrw lock when 5247 * we do a reset of the channel. 5248 */ 5249 if (handle_reset) { 5250 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 5251 } 5252 } 5253 5254 return (rv); 5255 } 5256 5257 /* 5258 * Remove the specified address from the list of address maintained 5259 * in this port node. 5260 */ 5261 mcst_addr_t * 5262 vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 5263 { 5264 vsw_t *vswp = NULL; 5265 vsw_port_t *port = NULL; 5266 mcst_addr_t *prev_p = NULL; 5267 mcst_addr_t *curr_p = NULL; 5268 5269 D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 5270 __func__, devtype, addr); 5271 5272 if (devtype == VSW_VNETPORT) { 5273 port = (vsw_port_t *)arg; 5274 mutex_enter(&port->mca_lock); 5275 prev_p = curr_p = port->mcap; 5276 } else { 5277 vswp = (vsw_t *)arg; 5278 mutex_enter(&vswp->mca_lock); 5279 prev_p = curr_p = vswp->mcap; 5280 } 5281 5282 while (curr_p != NULL) { 5283 if (curr_p->addr == addr) { 5284 D2(NULL, "%s: address found", __func__); 5285 /* match found */ 5286 if (prev_p == curr_p) { 5287 /* list head */ 5288 if (devtype == VSW_VNETPORT) 5289 port->mcap = curr_p->nextp; 5290 else 5291 vswp->mcap = curr_p->nextp; 5292 } else { 5293 prev_p->nextp = curr_p->nextp; 5294 } 5295 break; 5296 } else { 5297 prev_p = curr_p; 5298 curr_p = curr_p->nextp; 5299 } 5300 } 5301 5302 if (devtype == VSW_VNETPORT) 5303 mutex_exit(&port->mca_lock); 5304 else 5305 mutex_exit(&vswp->mca_lock); 5306 5307 D1(NULL, "%s: exit", __func__); 5308 5309 return (curr_p); 5310 } 5311 5312 /* 5313 * Creates a descriptor ring (dring) and links it into the 5314 * link of outbound drings for this channel. 5315 * 5316 * Returns NULL if creation failed. 5317 */ 5318 static dring_info_t * 5319 vsw_create_dring(vsw_ldc_t *ldcp) 5320 { 5321 vsw_private_desc_t *priv_addr = NULL; 5322 vsw_t *vswp = ldcp->ldc_vswp; 5323 ldc_mem_info_t minfo; 5324 dring_info_t *dp, *tp; 5325 int i; 5326 5327 dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5328 5329 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5330 5331 /* create public section of ring */ 5332 if ((ldc_mem_dring_create(vsw_ntxds, 5333 VSW_PUB_SIZE, &dp->handle)) != 0) { 5334 5335 DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 5336 "failed", ldcp->ldc_id); 5337 goto create_fail_exit; 5338 } 5339 5340 ASSERT(dp->handle != NULL); 5341 5342 /* 5343 * Get the base address of the public section of the ring. 5344 */ 5345 if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 5346 DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 5347 ldcp->ldc_id); 5348 goto dring_fail_exit; 5349 } else { 5350 ASSERT(minfo.vaddr != 0); 5351 dp->pub_addr = minfo.vaddr; 5352 } 5353 5354 dp->num_descriptors = vsw_ntxds; 5355 dp->descriptor_size = VSW_PUB_SIZE; 5356 dp->options = VIO_TX_DRING; 5357 dp->ncookies = 1; /* guaranteed by ldc */ 5358 5359 /* 5360 * create private portion of ring 5361 */ 5362 dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 5363 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5364 5365 if (vsw_setup_ring(ldcp, dp)) { 5366 DERR(vswp, "%s: unable to setup ring", __func__); 5367 goto dring_fail_exit; 5368 } 5369 5370 /* haven't used any descriptors yet */ 5371 dp->end_idx = 0; 5372 dp->last_ack_recv = -1; 5373 5374 /* bind dring to the channel */ 5375 if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 5376 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 5377 &dp->cookie[0], &dp->ncookies)) != 0) { 5378 DERR(vswp, "vsw_create_dring: unable to bind to channel " 5379 "%lld", ldcp->ldc_id); 5380 goto dring_fail_exit; 5381 } 5382 5383 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5384 dp->restart_reqd = B_TRUE; 5385 5386 /* 5387 * Only ever create rings for outgoing lane. Link it onto 5388 * end of list. 5389 */ 5390 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5391 if (ldcp->lane_out.dringp == NULL) { 5392 D2(vswp, "vsw_create_dring: adding first outbound ring"); 5393 ldcp->lane_out.dringp = dp; 5394 } else { 5395 tp = ldcp->lane_out.dringp; 5396 while (tp->next != NULL) 5397 tp = tp->next; 5398 5399 tp->next = dp; 5400 } 5401 RW_EXIT(&ldcp->lane_out.dlistrw); 5402 5403 return (dp); 5404 5405 dring_fail_exit: 5406 (void) ldc_mem_dring_destroy(dp->handle); 5407 5408 create_fail_exit: 5409 if (dp->priv_addr != NULL) { 5410 priv_addr = dp->priv_addr; 5411 for (i = 0; i < vsw_ntxds; i++) { 5412 if (priv_addr->memhandle != NULL) 5413 (void) ldc_mem_free_handle( 5414 priv_addr->memhandle); 5415 priv_addr++; 5416 } 5417 kmem_free(dp->priv_addr, 5418 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5419 } 5420 mutex_destroy(&dp->dlock); 5421 5422 kmem_free(dp, sizeof (dring_info_t)); 5423 return (NULL); 5424 } 5425 5426 /* 5427 * Create a ring consisting of just a private portion and link 5428 * it into the list of rings for the outbound lane. 5429 * 5430 * These type of rings are used primarily for temporary data 5431 * storage (i.e. as data buffers). 5432 */ 5433 void 5434 vsw_create_privring(vsw_ldc_t *ldcp) 5435 { 5436 dring_info_t *dp, *tp; 5437 vsw_t *vswp = ldcp->ldc_vswp; 5438 5439 D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 5440 5441 dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 5442 5443 mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 5444 5445 /* no public section */ 5446 dp->pub_addr = NULL; 5447 5448 dp->priv_addr = kmem_zalloc( 5449 (sizeof (vsw_private_desc_t) * vsw_ntxds), KM_SLEEP); 5450 5451 dp->num_descriptors = vsw_ntxds; 5452 5453 if (vsw_setup_ring(ldcp, dp)) { 5454 DERR(vswp, "%s: setup of ring failed", __func__); 5455 kmem_free(dp->priv_addr, 5456 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5457 mutex_destroy(&dp->dlock); 5458 kmem_free(dp, sizeof (dring_info_t)); 5459 return; 5460 } 5461 5462 /* haven't used any descriptors yet */ 5463 dp->end_idx = 0; 5464 5465 mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 5466 dp->restart_reqd = B_TRUE; 5467 5468 /* 5469 * Only ever create rings for outgoing lane. Link it onto 5470 * end of list. 5471 */ 5472 WRITE_ENTER(&ldcp->lane_out.dlistrw); 5473 if (ldcp->lane_out.dringp == NULL) { 5474 D2(vswp, "%s: adding first outbound privring", __func__); 5475 ldcp->lane_out.dringp = dp; 5476 } else { 5477 tp = ldcp->lane_out.dringp; 5478 while (tp->next != NULL) 5479 tp = tp->next; 5480 5481 tp->next = dp; 5482 } 5483 RW_EXIT(&ldcp->lane_out.dlistrw); 5484 5485 D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 5486 } 5487 5488 /* 5489 * Setup the descriptors in the dring. Returns 0 on success, 1 on 5490 * failure. 5491 */ 5492 int 5493 vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 5494 { 5495 vnet_public_desc_t *pub_addr = NULL; 5496 vsw_private_desc_t *priv_addr = NULL; 5497 vsw_t *vswp = ldcp->ldc_vswp; 5498 uint64_t *tmpp; 5499 uint64_t offset = 0; 5500 uint32_t ncookies = 0; 5501 static char *name = "vsw_setup_ring"; 5502 int i, j, nc, rv; 5503 size_t data_sz; 5504 void *data_addr; 5505 5506 priv_addr = dp->priv_addr; 5507 pub_addr = dp->pub_addr; 5508 5509 /* public section may be null but private should never be */ 5510 ASSERT(priv_addr != NULL); 5511 5512 /* 5513 * Allocate the region of memory which will be used to hold 5514 * the data the descriptors will refer to. 5515 */ 5516 data_sz = vswp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 5517 5518 /* 5519 * In order to ensure that the number of ldc cookies per descriptor is 5520 * limited to be within the default MAX_COOKIES (2), we take the steps 5521 * outlined below: 5522 * 5523 * Align the entire data buffer area to 8K and carve out per descriptor 5524 * data buffers starting from this 8K aligned base address. 5525 * 5526 * We round up the mtu specified to be a multiple of 2K or 4K. 5527 * For sizes up to 12K we round up the size to the next 2K. 5528 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 5529 * 14K could end up needing 3 cookies, with the buffer spread across 5530 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 5531 */ 5532 if (data_sz <= VNET_12K) { 5533 data_sz = VNET_ROUNDUP_2K(data_sz); 5534 } else { 5535 data_sz = VNET_ROUNDUP_4K(data_sz); 5536 } 5537 5538 dp->desc_data_sz = data_sz; 5539 5540 /* allocate extra 8K bytes for alignment */ 5541 dp->data_sz = (vsw_ntxds * data_sz) + VNET_8K; 5542 data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 5543 dp->data_addr = data_addr; 5544 5545 D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 5546 dp->data_sz, dp->data_addr); 5547 5548 /* align the starting address of the data area to 8K */ 5549 data_addr = (void *)VNET_ROUNDUP_8K((uintptr_t)data_addr); 5550 5551 tmpp = (uint64_t *)data_addr; 5552 offset = dp->desc_data_sz/sizeof (tmpp); 5553 5554 /* 5555 * Initialise some of the private and public (if they exist) 5556 * descriptor fields. 5557 */ 5558 for (i = 0; i < vsw_ntxds; i++) { 5559 mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 5560 5561 if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 5562 &priv_addr->memhandle)) != 0) { 5563 DERR(vswp, "%s: alloc mem handle failed", name); 5564 goto setup_ring_cleanup; 5565 } 5566 5567 priv_addr->datap = (void *)tmpp; 5568 5569 rv = ldc_mem_bind_handle(priv_addr->memhandle, 5570 (caddr_t)priv_addr->datap, dp->desc_data_sz, 5571 LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 5572 &(priv_addr->memcookie[0]), &ncookies); 5573 if (rv != 0) { 5574 DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 5575 "(rv %d)", name, ldcp->ldc_id, rv); 5576 goto setup_ring_cleanup; 5577 } 5578 priv_addr->bound = 1; 5579 5580 D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 5581 name, i, priv_addr->memcookie[0].addr, 5582 priv_addr->memcookie[0].size); 5583 5584 if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 5585 DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 5586 "invalid num of cookies (%d) for size 0x%llx", 5587 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 5588 5589 goto setup_ring_cleanup; 5590 } else { 5591 for (j = 1; j < ncookies; j++) { 5592 rv = ldc_mem_nextcookie(priv_addr->memhandle, 5593 &(priv_addr->memcookie[j])); 5594 if (rv != 0) { 5595 DERR(vswp, "%s: ldc_mem_nextcookie " 5596 "failed rv (%d)", name, rv); 5597 goto setup_ring_cleanup; 5598 } 5599 D3(vswp, "%s: memcookie %d : addr 0x%llx : " 5600 "size 0x%llx", name, j, 5601 priv_addr->memcookie[j].addr, 5602 priv_addr->memcookie[j].size); 5603 } 5604 5605 } 5606 priv_addr->ncookies = ncookies; 5607 priv_addr->dstate = VIO_DESC_FREE; 5608 5609 if (pub_addr != NULL) { 5610 5611 /* link pub and private sides */ 5612 priv_addr->descp = pub_addr; 5613 5614 pub_addr->ncookies = priv_addr->ncookies; 5615 5616 for (nc = 0; nc < pub_addr->ncookies; nc++) { 5617 bcopy(&priv_addr->memcookie[nc], 5618 &pub_addr->memcookie[nc], 5619 sizeof (ldc_mem_cookie_t)); 5620 } 5621 5622 pub_addr->hdr.dstate = VIO_DESC_FREE; 5623 pub_addr++; 5624 } 5625 5626 /* 5627 * move to next element in the dring and the next 5628 * position in the data buffer. 5629 */ 5630 priv_addr++; 5631 tmpp += offset; 5632 } 5633 5634 return (0); 5635 5636 setup_ring_cleanup: 5637 priv_addr = dp->priv_addr; 5638 5639 for (j = 0; j < i; j++) { 5640 (void) ldc_mem_unbind_handle(priv_addr->memhandle); 5641 (void) ldc_mem_free_handle(priv_addr->memhandle); 5642 5643 mutex_destroy(&priv_addr->dstate_lock); 5644 5645 priv_addr++; 5646 } 5647 kmem_free(dp->data_addr, dp->data_sz); 5648 5649 return (1); 5650 } 5651 5652 /* 5653 * Searches the private section of a ring for a free descriptor, 5654 * starting at the location of the last free descriptor found 5655 * previously. 5656 * 5657 * Returns 0 if free descriptor is available, and updates state 5658 * of private descriptor to VIO_DESC_READY, otherwise returns 1. 5659 * 5660 * FUTURE: might need to return contiguous range of descriptors 5661 * as dring info msg assumes all will be contiguous. 5662 */ 5663 static int 5664 vsw_dring_find_free_desc(dring_info_t *dringp, 5665 vsw_private_desc_t **priv_p, int *idx) 5666 { 5667 vsw_private_desc_t *addr = NULL; 5668 int num = vsw_ntxds; 5669 int ret = 1; 5670 5671 D1(NULL, "%s enter\n", __func__); 5672 5673 ASSERT(dringp->priv_addr != NULL); 5674 5675 D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 5676 __func__, dringp, dringp->end_idx); 5677 5678 addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 5679 5680 mutex_enter(&addr->dstate_lock); 5681 if (addr->dstate == VIO_DESC_FREE) { 5682 addr->dstate = VIO_DESC_READY; 5683 *priv_p = addr; 5684 *idx = dringp->end_idx; 5685 dringp->end_idx = (dringp->end_idx + 1) % num; 5686 ret = 0; 5687 5688 } 5689 mutex_exit(&addr->dstate_lock); 5690 5691 /* ring full */ 5692 if (ret == 1) { 5693 D2(NULL, "%s: no desp free: started at %d", __func__, 5694 dringp->end_idx); 5695 } 5696 5697 D1(NULL, "%s: exit\n", __func__); 5698 5699 return (ret); 5700 } 5701 5702 /* 5703 * Map from a dring identifier to the ring itself. Returns 5704 * pointer to ring or NULL if no match found. 5705 * 5706 * Should be called with dlistrw rwlock held as reader. 5707 */ 5708 static dring_info_t * 5709 vsw_ident2dring(lane_t *lane, uint64_t ident) 5710 { 5711 dring_info_t *dp = NULL; 5712 5713 if ((dp = lane->dringp) == NULL) { 5714 return (NULL); 5715 } else { 5716 if (dp->ident == ident) 5717 return (dp); 5718 5719 while (dp != NULL) { 5720 if (dp->ident == ident) 5721 break; 5722 dp = dp->next; 5723 } 5724 } 5725 5726 return (dp); 5727 } 5728 5729 /* 5730 * Set the default lane attributes. These are copied into 5731 * the attr msg we send to our peer. If they are not acceptable 5732 * then (currently) the handshake ends. 5733 */ 5734 static void 5735 vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 5736 { 5737 bzero(lp, sizeof (lane_t)); 5738 5739 READ_ENTER(&vswp->if_lockrw); 5740 ether_copy(&(vswp->if_addr), &(lp->addr)); 5741 RW_EXIT(&vswp->if_lockrw); 5742 5743 lp->mtu = vswp->max_frame_size; 5744 lp->addr_type = ADDR_TYPE_MAC; 5745 lp->xfer_mode = VIO_DRING_MODE_V1_0; 5746 lp->ack_freq = 0; /* for shared mode */ 5747 lp->seq_num = VNET_ISS; 5748 } 5749 5750 /* 5751 * Verify that the attributes are acceptable. 5752 * 5753 * FUTURE: If some attributes are not acceptable, change them 5754 * our desired values. 5755 */ 5756 static int 5757 vsw_check_attr(vnet_attr_msg_t *pkt, vsw_ldc_t *ldcp) 5758 { 5759 int ret = 0; 5760 struct ether_addr ea; 5761 vsw_port_t *port = ldcp->ldc_port; 5762 lane_t *lp = &ldcp->lane_out; 5763 5764 D1(NULL, "vsw_check_attr enter\n"); 5765 5766 if ((pkt->xfer_mode != VIO_DESC_MODE) && 5767 (pkt->xfer_mode != lp->xfer_mode)) { 5768 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 5769 ret = 1; 5770 } 5771 5772 /* Only support MAC addresses at moment. */ 5773 if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 5774 D2(NULL, "vsw_check_attr: invalid addr_type %x, " 5775 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 5776 ret = 1; 5777 } 5778 5779 /* 5780 * MAC address supplied by device should match that stored 5781 * in the vsw-port OBP node. Need to decide what to do if they 5782 * don't match, for the moment just warn but don't fail. 5783 */ 5784 vnet_macaddr_ultostr(pkt->addr, ea.ether_addr_octet); 5785 if (ether_cmp(&ea, &port->p_macaddr) != 0) { 5786 DERR(NULL, "vsw_check_attr: device supplied address " 5787 "0x%llx doesn't match node address 0x%llx\n", 5788 pkt->addr, port->p_macaddr); 5789 } 5790 5791 /* 5792 * Ack freq only makes sense in pkt mode, in shared 5793 * mode the ring descriptors say whether or not to 5794 * send back an ACK. 5795 */ 5796 if ((VSW_VER_GTEQ(ldcp, 1, 2) && 5797 (pkt->xfer_mode & VIO_DRING_MODE_V1_2)) || 5798 (VSW_VER_LT(ldcp, 1, 2) && 5799 (pkt->xfer_mode == VIO_DRING_MODE_V1_0))) { 5800 if (pkt->ack_freq > 0) { 5801 D2(NULL, "vsw_check_attr: non zero ack freq " 5802 " in SHM mode\n"); 5803 ret = 1; 5804 } 5805 } 5806 5807 if (VSW_VER_LT(ldcp, 1, 4)) { 5808 /* versions < 1.4, mtu must match */ 5809 if (pkt->mtu != lp->mtu) { 5810 D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 5811 pkt->mtu); 5812 ret = 1; 5813 } 5814 } else { 5815 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5816 if (pkt->mtu < ETHERMAX) { 5817 ret = 1; 5818 } 5819 } 5820 5821 D1(NULL, "vsw_check_attr exit\n"); 5822 5823 return (ret); 5824 } 5825 5826 /* 5827 * Returns 1 if there is a problem, 0 otherwise. 5828 */ 5829 static int 5830 vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 5831 { 5832 _NOTE(ARGUNUSED(pkt)) 5833 5834 int ret = 0; 5835 5836 D1(NULL, "vsw_check_dring_info enter\n"); 5837 5838 if ((pkt->num_descriptors == 0) || 5839 (pkt->descriptor_size == 0) || 5840 (pkt->ncookies != 1)) { 5841 DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 5842 ret = 1; 5843 } 5844 5845 D1(NULL, "vsw_check_dring_info exit\n"); 5846 5847 return (ret); 5848 } 5849 5850 /* 5851 * Returns 1 if two memory cookies match. Otherwise returns 0. 5852 */ 5853 static int 5854 vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 5855 { 5856 if ((m1->addr != m2->addr) || 5857 (m2->size != m2->size)) { 5858 return (0); 5859 } else { 5860 return (1); 5861 } 5862 } 5863 5864 /* 5865 * Returns 1 if ring described in reg message matches that 5866 * described by dring_info structure. Otherwise returns 0. 5867 */ 5868 static int 5869 vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 5870 { 5871 if ((msg->descriptor_size != dp->descriptor_size) || 5872 (msg->num_descriptors != dp->num_descriptors) || 5873 (msg->ncookies != dp->ncookies) || 5874 !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 5875 return (0); 5876 } else { 5877 return (1); 5878 } 5879 5880 } 5881 5882 /* 5883 * Reset and free all the resources associated with 5884 * the channel. 5885 */ 5886 static void 5887 vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 5888 { 5889 dring_info_t *dp, *dpp; 5890 lane_t *lp = NULL; 5891 5892 ASSERT(ldcp != NULL); 5893 5894 D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 5895 5896 if (dir == INBOUND) { 5897 D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 5898 " of channel %lld", __func__, ldcp->ldc_id); 5899 lp = &ldcp->lane_in; 5900 } else { 5901 D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 5902 " of channel %lld", __func__, ldcp->ldc_id); 5903 lp = &ldcp->lane_out; 5904 } 5905 5906 lp->lstate = VSW_LANE_INACTIV; 5907 lp->seq_num = VNET_ISS; 5908 5909 if (lp->dringp) { 5910 if (dir == INBOUND) { 5911 WRITE_ENTER(&lp->dlistrw); 5912 dp = lp->dringp; 5913 while (dp != NULL) { 5914 dpp = dp->next; 5915 if (dp->handle != NULL) 5916 (void) ldc_mem_dring_unmap(dp->handle); 5917 kmem_free(dp, sizeof (dring_info_t)); 5918 dp = dpp; 5919 } 5920 RW_EXIT(&lp->dlistrw); 5921 } else { 5922 /* 5923 * unbind, destroy exported dring, free dring struct 5924 */ 5925 WRITE_ENTER(&lp->dlistrw); 5926 dp = lp->dringp; 5927 vsw_free_ring(dp); 5928 RW_EXIT(&lp->dlistrw); 5929 } 5930 lp->dringp = NULL; 5931 } 5932 5933 D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 5934 } 5935 5936 /* 5937 * Free ring and all associated resources. 5938 * 5939 * Should be called with dlistrw rwlock held as writer. 5940 */ 5941 static void 5942 vsw_free_ring(dring_info_t *dp) 5943 { 5944 vsw_private_desc_t *paddr = NULL; 5945 dring_info_t *dpp; 5946 int i; 5947 5948 while (dp != NULL) { 5949 mutex_enter(&dp->dlock); 5950 dpp = dp->next; 5951 if (dp->priv_addr != NULL) { 5952 /* 5953 * First unbind and free the memory handles 5954 * stored in each descriptor within the ring. 5955 */ 5956 for (i = 0; i < vsw_ntxds; i++) { 5957 paddr = (vsw_private_desc_t *) 5958 dp->priv_addr + i; 5959 if (paddr->memhandle != NULL) { 5960 if (paddr->bound == 1) { 5961 if (ldc_mem_unbind_handle( 5962 paddr->memhandle) != 0) { 5963 DERR(NULL, "error " 5964 "unbinding handle for " 5965 "ring 0x%llx at pos %d", 5966 dp, i); 5967 continue; 5968 } 5969 paddr->bound = 0; 5970 } 5971 5972 if (ldc_mem_free_handle( 5973 paddr->memhandle) != 0) { 5974 DERR(NULL, "error freeing " 5975 "handle for ring 0x%llx " 5976 "at pos %d", dp, i); 5977 continue; 5978 } 5979 paddr->memhandle = NULL; 5980 } 5981 mutex_destroy(&paddr->dstate_lock); 5982 } 5983 kmem_free(dp->priv_addr, 5984 (sizeof (vsw_private_desc_t) * vsw_ntxds)); 5985 } 5986 5987 /* 5988 * Now unbind and destroy the ring itself. 5989 */ 5990 if (dp->handle != NULL) { 5991 (void) ldc_mem_dring_unbind(dp->handle); 5992 (void) ldc_mem_dring_destroy(dp->handle); 5993 } 5994 5995 if (dp->data_addr != NULL) { 5996 kmem_free(dp->data_addr, dp->data_sz); 5997 } 5998 5999 mutex_exit(&dp->dlock); 6000 mutex_destroy(&dp->dlock); 6001 mutex_destroy(&dp->restart_lock); 6002 kmem_free(dp, sizeof (dring_info_t)); 6003 6004 dp = dpp; 6005 } 6006 } 6007 6008 /* 6009 * vsw_ldc_rx_worker -- A per LDC worker thread to receive data. 6010 * This thread is woken up by the LDC interrupt handler to process 6011 * LDC packets and receive data. 6012 */ 6013 static void 6014 vsw_ldc_rx_worker(void *arg) 6015 { 6016 callb_cpr_t cprinfo; 6017 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 6018 vsw_t *vswp = ldcp->ldc_vswp; 6019 6020 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6021 CALLB_CPR_INIT(&cprinfo, &ldcp->rx_thr_lock, callb_generic_cpr, 6022 "vsw_rx_thread"); 6023 mutex_enter(&ldcp->rx_thr_lock); 6024 while (!(ldcp->rx_thr_flags & VSW_WTHR_STOP)) { 6025 6026 CALLB_CPR_SAFE_BEGIN(&cprinfo); 6027 /* 6028 * Wait until the data is received or a stop 6029 * request is received. 6030 */ 6031 while (!(ldcp->rx_thr_flags & 6032 (VSW_WTHR_DATARCVD | VSW_WTHR_STOP))) { 6033 cv_wait(&ldcp->rx_thr_cv, &ldcp->rx_thr_lock); 6034 } 6035 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rx_thr_lock) 6036 6037 /* 6038 * First process the stop request. 6039 */ 6040 if (ldcp->rx_thr_flags & VSW_WTHR_STOP) { 6041 D2(vswp, "%s(%lld):Rx thread stopped\n", 6042 __func__, ldcp->ldc_id); 6043 break; 6044 } 6045 ldcp->rx_thr_flags &= ~VSW_WTHR_DATARCVD; 6046 mutex_exit(&ldcp->rx_thr_lock); 6047 D1(vswp, "%s(%lld):calling vsw_process_pkt\n", 6048 __func__, ldcp->ldc_id); 6049 mutex_enter(&ldcp->ldc_cblock); 6050 vsw_process_pkt(ldcp); 6051 mutex_exit(&ldcp->ldc_cblock); 6052 mutex_enter(&ldcp->rx_thr_lock); 6053 } 6054 6055 /* 6056 * Update the run status and wakeup the thread that 6057 * has sent the stop request. 6058 */ 6059 ldcp->rx_thr_flags &= ~VSW_WTHR_STOP; 6060 ldcp->rx_thread = NULL; 6061 CALLB_CPR_EXIT(&cprinfo); 6062 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6063 thread_exit(); 6064 } 6065 6066 /* vsw_stop_rx_thread -- Co-ordinate with receive thread to stop it */ 6067 static void 6068 vsw_stop_rx_thread(vsw_ldc_t *ldcp) 6069 { 6070 kt_did_t tid = 0; 6071 vsw_t *vswp = ldcp->ldc_vswp; 6072 6073 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6074 /* 6075 * Send a stop request by setting the stop flag and 6076 * wait until the receive thread stops. 6077 */ 6078 mutex_enter(&ldcp->rx_thr_lock); 6079 if (ldcp->rx_thread != NULL) { 6080 tid = ldcp->rx_thread->t_did; 6081 ldcp->rx_thr_flags |= VSW_WTHR_STOP; 6082 cv_signal(&ldcp->rx_thr_cv); 6083 } 6084 mutex_exit(&ldcp->rx_thr_lock); 6085 6086 if (tid != 0) { 6087 thread_join(tid); 6088 } 6089 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6090 } 6091 6092 /* 6093 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data. 6094 * This thread is woken up by the vsw_portsend to transmit 6095 * packets. 6096 */ 6097 static void 6098 vsw_ldc_tx_worker(void *arg) 6099 { 6100 callb_cpr_t cprinfo; 6101 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 6102 vsw_t *vswp = ldcp->ldc_vswp; 6103 mblk_t *mp; 6104 mblk_t *tmp; 6105 6106 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6107 CALLB_CPR_INIT(&cprinfo, &ldcp->tx_thr_lock, callb_generic_cpr, 6108 "vnet_tx_thread"); 6109 mutex_enter(&ldcp->tx_thr_lock); 6110 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP)) { 6111 6112 CALLB_CPR_SAFE_BEGIN(&cprinfo); 6113 /* 6114 * Wait until the data is received or a stop 6115 * request is received. 6116 */ 6117 while (!(ldcp->tx_thr_flags & VSW_WTHR_STOP) && 6118 (ldcp->tx_mhead == NULL)) { 6119 cv_wait(&ldcp->tx_thr_cv, &ldcp->tx_thr_lock); 6120 } 6121 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->tx_thr_lock) 6122 6123 /* 6124 * First process the stop request. 6125 */ 6126 if (ldcp->tx_thr_flags & VSW_WTHR_STOP) { 6127 D2(vswp, "%s(%lld):tx thread stopped\n", 6128 __func__, ldcp->ldc_id); 6129 break; 6130 } 6131 mp = ldcp->tx_mhead; 6132 ldcp->tx_mhead = ldcp->tx_mtail = NULL; 6133 ldcp->tx_cnt = 0; 6134 mutex_exit(&ldcp->tx_thr_lock); 6135 D2(vswp, "%s(%lld):calling vsw_ldcsend\n", 6136 __func__, ldcp->ldc_id); 6137 while (mp != NULL) { 6138 tmp = mp->b_next; 6139 mp->b_next = mp->b_prev = NULL; 6140 (void) vsw_ldcsend(ldcp, mp, vsw_ldc_tx_retries); 6141 mp = tmp; 6142 } 6143 mutex_enter(&ldcp->tx_thr_lock); 6144 } 6145 6146 /* 6147 * Update the run status and wakeup the thread that 6148 * has sent the stop request. 6149 */ 6150 ldcp->tx_thr_flags &= ~VSW_WTHR_STOP; 6151 ldcp->tx_thread = NULL; 6152 CALLB_CPR_EXIT(&cprinfo); 6153 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6154 thread_exit(); 6155 } 6156 6157 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */ 6158 static void 6159 vsw_stop_tx_thread(vsw_ldc_t *ldcp) 6160 { 6161 kt_did_t tid = 0; 6162 vsw_t *vswp = ldcp->ldc_vswp; 6163 6164 D1(vswp, "%s(%lld):enter\n", __func__, ldcp->ldc_id); 6165 /* 6166 * Send a stop request by setting the stop flag and 6167 * wait until the receive thread stops. 6168 */ 6169 mutex_enter(&ldcp->tx_thr_lock); 6170 if (ldcp->tx_thread != NULL) { 6171 tid = ldcp->tx_thread->t_did; 6172 ldcp->tx_thr_flags |= VSW_WTHR_STOP; 6173 cv_signal(&ldcp->tx_thr_cv); 6174 } 6175 mutex_exit(&ldcp->tx_thr_lock); 6176 6177 if (tid != 0) { 6178 thread_join(tid); 6179 } 6180 6181 D1(vswp, "%s(%lld):exit\n", __func__, ldcp->ldc_id); 6182 } 6183 6184 /* vsw_reclaim_dring -- reclaim descriptors */ 6185 static int 6186 vsw_reclaim_dring(dring_info_t *dp, int start) 6187 { 6188 int i, j, len; 6189 vsw_private_desc_t *priv_addr; 6190 vnet_public_desc_t *pub_addr; 6191 6192 pub_addr = (vnet_public_desc_t *)dp->pub_addr; 6193 priv_addr = (vsw_private_desc_t *)dp->priv_addr; 6194 len = dp->num_descriptors; 6195 6196 D2(NULL, "%s: start index %ld\n", __func__, start); 6197 6198 j = 0; 6199 for (i = start; j < len; i = (i + 1) % len, j++) { 6200 pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 6201 priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6202 6203 mutex_enter(&priv_addr->dstate_lock); 6204 if (pub_addr->hdr.dstate != VIO_DESC_DONE) { 6205 mutex_exit(&priv_addr->dstate_lock); 6206 break; 6207 } 6208 pub_addr->hdr.dstate = VIO_DESC_FREE; 6209 priv_addr->dstate = VIO_DESC_FREE; 6210 /* clear all the fields */ 6211 priv_addr->datalen = 0; 6212 pub_addr->hdr.ack = 0; 6213 mutex_exit(&priv_addr->dstate_lock); 6214 6215 D3(NULL, "claiming descp:%d pub state:0x%llx priv state 0x%llx", 6216 i, pub_addr->hdr.dstate, priv_addr->dstate); 6217 } 6218 return (j); 6219 } 6220 6221 /* 6222 * Debugging routines 6223 */ 6224 static void 6225 display_state(void) 6226 { 6227 vsw_t *vswp; 6228 vsw_port_list_t *plist; 6229 vsw_port_t *port; 6230 vsw_ldc_list_t *ldcl; 6231 vsw_ldc_t *ldcp; 6232 extern vsw_t *vsw_head; 6233 6234 cmn_err(CE_NOTE, "***** system state *****"); 6235 6236 for (vswp = vsw_head; vswp; vswp = vswp->next) { 6237 plist = &vswp->plist; 6238 READ_ENTER(&plist->lockrw); 6239 cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 6240 vswp->instance, plist->num_ports); 6241 6242 for (port = plist->head; port != NULL; port = port->p_next) { 6243 ldcl = &port->p_ldclist; 6244 cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 6245 port->p_instance, port->num_ldcs); 6246 READ_ENTER(&ldcl->lockrw); 6247 ldcp = ldcl->head; 6248 for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 6249 cmn_err(CE_CONT, "chan %lu : dev %d : " 6250 "status %d : phase %u\n", 6251 ldcp->ldc_id, ldcp->dev_class, 6252 ldcp->ldc_status, ldcp->hphase); 6253 cmn_err(CE_CONT, "chan %lu : lsession %lu : " 6254 "psession %lu\n", ldcp->ldc_id, 6255 ldcp->local_session, ldcp->peer_session); 6256 6257 cmn_err(CE_CONT, "Inbound lane:\n"); 6258 display_lane(&ldcp->lane_in); 6259 cmn_err(CE_CONT, "Outbound lane:\n"); 6260 display_lane(&ldcp->lane_out); 6261 } 6262 RW_EXIT(&ldcl->lockrw); 6263 } 6264 RW_EXIT(&plist->lockrw); 6265 } 6266 cmn_err(CE_NOTE, "***** system state *****"); 6267 } 6268 6269 static void 6270 display_lane(lane_t *lp) 6271 { 6272 dring_info_t *drp; 6273 6274 cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 6275 lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 6276 cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 6277 lp->addr_type, lp->addr, lp->xfer_mode); 6278 cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 6279 6280 cmn_err(CE_CONT, "Dring info:\n"); 6281 for (drp = lp->dringp; drp != NULL; drp = drp->next) { 6282 cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 6283 drp->num_descriptors, drp->descriptor_size); 6284 cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 6285 cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 6286 (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 6287 cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 6288 drp->ident, drp->end_idx); 6289 display_ring(drp); 6290 } 6291 } 6292 6293 static void 6294 display_ring(dring_info_t *dringp) 6295 { 6296 uint64_t i; 6297 uint64_t priv_count = 0; 6298 uint64_t pub_count = 0; 6299 vnet_public_desc_t *pub_addr = NULL; 6300 vsw_private_desc_t *priv_addr = NULL; 6301 6302 for (i = 0; i < vsw_ntxds; i++) { 6303 if (dringp->pub_addr != NULL) { 6304 pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 6305 6306 if (pub_addr->hdr.dstate == VIO_DESC_FREE) 6307 pub_count++; 6308 } 6309 6310 if (dringp->priv_addr != NULL) { 6311 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 6312 6313 if (priv_addr->dstate == VIO_DESC_FREE) 6314 priv_count++; 6315 } 6316 } 6317 cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 6318 i, priv_count, pub_count); 6319 } 6320 6321 static void 6322 dump_flags(uint64_t state) 6323 { 6324 int i; 6325 6326 typedef struct flag_name { 6327 int flag_val; 6328 char *flag_name; 6329 } flag_name_t; 6330 6331 flag_name_t flags[] = { 6332 VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 6333 VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 6334 VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 6335 VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 6336 VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 6337 VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 6338 VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 6339 VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 6340 VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 6341 VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 6342 VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 6343 VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 6344 VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 6345 VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 6346 VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 6347 VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 6348 VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 6349 VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 6350 VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 6351 VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 6352 VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 6353 VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 6354 VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 6355 VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 6356 VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 6357 VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 6358 VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 6359 VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 6360 VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 6361 VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 6362 VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 6363 6364 DERR(NULL, "DUMP_FLAGS: %llx\n", state); 6365 for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 6366 if (state & flags[i].flag_val) 6367 DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 6368 } 6369 } 6370