1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/sysmacros.h> 30 #include <sys/param.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/kmem.h> 34 #include <sys/conf.h> 35 #include <sys/devops.h> 36 #include <sys/ksynch.h> 37 #include <sys/stat.h> 38 #include <sys/modctl.h> 39 #include <sys/debug.h> 40 #include <sys/ethernet.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/strsun.h> 44 #include <sys/note.h> 45 #include <sys/mac_provider.h> 46 #include <sys/mac_ether.h> 47 #include <sys/ldc.h> 48 #include <sys/mach_descrip.h> 49 #include <sys/mdeg.h> 50 #include <net/if.h> 51 #include <sys/vnet.h> 52 #include <sys/vio_mailbox.h> 53 #include <sys/vio_common.h> 54 #include <sys/vnet_common.h> 55 #include <sys/vnet_mailbox.h> 56 #include <sys/vio_util.h> 57 #include <sys/vnet_gen.h> 58 #include <sys/atomic.h> 59 #include <sys/callb.h> 60 #include <sys/sdt.h> 61 #include <sys/intr.h> 62 #include <sys/pattr.h> 63 #include <sys/vlan.h> 64 65 /* 66 * Implementation of the mac functionality for vnet using the 67 * generic(default) transport layer of sun4v Logical Domain Channels(LDC). 68 */ 69 70 /* 71 * Function prototypes. 72 */ 73 /* vgen proxy entry points */ 74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 75 const uint8_t *macaddr, void **vgenhdl); 76 void vgen_uninit(void *arg); 77 int vgen_dds_tx(void *arg, void *dmsg); 78 void vgen_mod_init(void); 79 int vgen_mod_cleanup(void); 80 void vgen_mod_fini(void); 81 static int vgen_start(void *arg); 82 static void vgen_stop(void *arg); 83 static mblk_t *vgen_tx(void *arg, mblk_t *mp); 84 static int vgen_multicst(void *arg, boolean_t add, 85 const uint8_t *mca); 86 static int vgen_promisc(void *arg, boolean_t on); 87 static int vgen_unicst(void *arg, const uint8_t *mca); 88 static int vgen_stat(void *arg, uint_t stat, uint64_t *val); 89 static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp); 90 #ifdef VNET_IOC_DEBUG 91 static int vgen_force_link_state(vgen_port_t *portp, int link_state); 92 #endif 93 94 /* vgen internal functions */ 95 static int vgen_read_mdprops(vgen_t *vgenp); 96 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); 97 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, 98 mde_cookie_t node); 99 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, 100 uint32_t *mtu); 101 static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, 102 boolean_t *pls); 103 static void vgen_detach_ports(vgen_t *vgenp); 104 static void vgen_port_detach(vgen_port_t *portp); 105 static void vgen_port_list_insert(vgen_port_t *portp); 106 static void vgen_port_list_remove(vgen_port_t *portp); 107 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp, 108 int port_num); 109 static int vgen_mdeg_reg(vgen_t *vgenp); 110 static void vgen_mdeg_unreg(vgen_t *vgenp); 111 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp); 112 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp); 113 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); 114 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp, 115 mde_cookie_t mdex); 116 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); 117 static int vgen_port_attach(vgen_port_t *portp); 118 static void vgen_port_detach_mdeg(vgen_port_t *portp); 119 static void vgen_port_detach_mdeg(vgen_port_t *portp); 120 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, 121 mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex); 122 static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat); 123 static void vgen_port_reset(vgen_port_t *portp); 124 static void vgen_reset_vsw_port(vgen_t *vgenp); 125 static void vgen_ldc_reset(vgen_ldc_t *ldcp); 126 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id); 127 static void vgen_ldc_detach(vgen_ldc_t *ldcp); 128 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp); 129 static void vgen_free_tx_ring(vgen_ldc_t *ldcp); 130 static void vgen_init_ports(vgen_t *vgenp); 131 static void vgen_port_init(vgen_port_t *portp); 132 static void vgen_uninit_ports(vgen_t *vgenp); 133 static void vgen_port_uninit(vgen_port_t *portp); 134 static void vgen_init_ldcs(vgen_port_t *portp); 135 static void vgen_uninit_ldcs(vgen_port_t *portp); 136 static int vgen_ldc_init(vgen_ldc_t *ldcp); 137 static void vgen_ldc_uninit(vgen_ldc_t *ldcp); 138 static int vgen_init_tbufs(vgen_ldc_t *ldcp); 139 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp); 140 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp); 141 static void vgen_clobber_rxds(vgen_ldc_t *ldcp); 142 static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat); 143 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg); 144 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp); 145 static int vgen_ldcsend(void *arg, mblk_t *mp); 146 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp); 147 static int vgen_ldcsend_dring(void *arg, mblk_t *mp); 148 static void vgen_reclaim(vgen_ldc_t *ldcp); 149 static void vgen_reclaim_dring(vgen_ldc_t *ldcp); 150 static int vgen_num_txpending(vgen_ldc_t *ldcp); 151 static int vgen_tx_dring_full(vgen_ldc_t *ldcp); 152 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp); 153 static void vgen_ldc_watchdog(void *arg); 154 155 /* vgen handshake functions */ 156 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp); 157 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, 158 boolean_t caller_holds_lock); 159 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp); 160 static int vgen_send_attr_info(vgen_ldc_t *ldcp); 161 static int vgen_send_dring_reg(vgen_ldc_t *ldcp); 162 static int vgen_send_rdx_info(vgen_ldc_t *ldcp); 163 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end); 164 static int vgen_send_mcast_info(vgen_ldc_t *ldcp); 165 static int vgen_handshake_phase2(vgen_ldc_t *ldcp); 166 static void vgen_handshake_reset(vgen_ldc_t *ldcp); 167 static void vgen_reset_hphase(vgen_ldc_t *ldcp); 168 static void vgen_handshake(vgen_ldc_t *ldcp); 169 static int vgen_handshake_done(vgen_ldc_t *ldcp); 170 static void vgen_handshake_retry(vgen_ldc_t *ldcp); 171 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp, 172 vio_msg_tag_t *tagp); 173 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 174 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 175 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 176 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 177 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 178 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen); 179 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); 180 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 181 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 182 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 183 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 184 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 185 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 186 uint32_t start, int32_t end, uint8_t pstate); 187 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 188 uint32_t msglen); 189 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 190 static void vgen_handle_evt_up(vgen_ldc_t *ldcp); 191 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp); 192 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 193 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 194 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf); 195 static void vgen_hwatchdog(void *arg); 196 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint); 197 static void vgen_print_hparams(vgen_hparams_t *hp); 198 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp); 199 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp); 200 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp); 201 static void vgen_ldc_rcv_worker(void *arg); 202 static void vgen_handle_evt_read(vgen_ldc_t *ldcp); 203 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp); 204 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp); 205 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp); 206 static void vgen_link_update(vgen_t *vgenp, link_state_t link_state); 207 208 /* VLAN routines */ 209 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp, 210 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 211 uint16_t *nvidsp, uint16_t *default_idp); 212 static void vgen_vlan_create_hash(vgen_port_t *portp); 213 static void vgen_vlan_destroy_hash(vgen_port_t *portp); 214 static void vgen_vlan_add_ids(vgen_port_t *portp); 215 static void vgen_vlan_remove_ids(vgen_port_t *portp); 216 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 217 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, 218 uint16_t *vidp); 219 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, 220 boolean_t is_tagged, uint16_t vid); 221 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp); 222 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp); 223 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 224 225 /* externs */ 226 extern void vnet_dds_rx(void *arg, void *dmsg); 227 extern void vnet_dds_cleanup_hio(vnet_t *vnetp); 228 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 229 extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state); 230 231 /* 232 * The handshake process consists of 5 phases defined below, with VH_PHASE0 233 * being the pre-handshake phase and VH_DONE is the phase to indicate 234 * successful completion of all phases. 235 * Each phase may have one to several handshake states which are required 236 * to complete successfully to move to the next phase. 237 * Refer to the functions vgen_handshake() and vgen_handshake_done() for 238 * more details. 239 */ 240 /* handshake phases */ 241 enum { VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 }; 242 243 /* handshake states */ 244 enum { 245 246 VER_INFO_SENT = 0x1, 247 VER_ACK_RCVD = 0x2, 248 VER_INFO_RCVD = 0x4, 249 VER_ACK_SENT = 0x8, 250 VER_NEGOTIATED = (VER_ACK_RCVD | VER_ACK_SENT), 251 252 ATTR_INFO_SENT = 0x10, 253 ATTR_ACK_RCVD = 0x20, 254 ATTR_INFO_RCVD = 0x40, 255 ATTR_ACK_SENT = 0x80, 256 ATTR_INFO_EXCHANGED = (ATTR_ACK_RCVD | ATTR_ACK_SENT), 257 258 DRING_INFO_SENT = 0x100, 259 DRING_ACK_RCVD = 0x200, 260 DRING_INFO_RCVD = 0x400, 261 DRING_ACK_SENT = 0x800, 262 DRING_INFO_EXCHANGED = (DRING_ACK_RCVD | DRING_ACK_SENT), 263 264 RDX_INFO_SENT = 0x1000, 265 RDX_ACK_RCVD = 0x2000, 266 RDX_INFO_RCVD = 0x4000, 267 RDX_ACK_SENT = 0x8000, 268 RDX_EXCHANGED = (RDX_ACK_RCVD | RDX_ACK_SENT) 269 270 }; 271 272 #define VGEN_PRI_ETH_DEFINED(vgenp) ((vgenp)->pri_num_types != 0) 273 274 #define LDC_LOCK(ldcp) \ 275 mutex_enter(&((ldcp)->cblock));\ 276 mutex_enter(&((ldcp)->rxlock));\ 277 mutex_enter(&((ldcp)->wrlock));\ 278 mutex_enter(&((ldcp)->txlock));\ 279 mutex_enter(&((ldcp)->tclock)); 280 #define LDC_UNLOCK(ldcp) \ 281 mutex_exit(&((ldcp)->tclock));\ 282 mutex_exit(&((ldcp)->txlock));\ 283 mutex_exit(&((ldcp)->wrlock));\ 284 mutex_exit(&((ldcp)->rxlock));\ 285 mutex_exit(&((ldcp)->cblock)); 286 287 #define VGEN_VER_EQ(ldcp, major, minor) \ 288 ((ldcp)->local_hparams.ver_major == (major) && \ 289 (ldcp)->local_hparams.ver_minor == (minor)) 290 291 #define VGEN_VER_LT(ldcp, major, minor) \ 292 (((ldcp)->local_hparams.ver_major < (major)) || \ 293 ((ldcp)->local_hparams.ver_major == (major) && \ 294 (ldcp)->local_hparams.ver_minor < (minor))) 295 296 #define VGEN_VER_GTEQ(ldcp, major, minor) \ 297 (((ldcp)->local_hparams.ver_major > (major)) || \ 298 ((ldcp)->local_hparams.ver_major == (major) && \ 299 (ldcp)->local_hparams.ver_minor >= (minor))) 300 301 static struct ether_addr etherbroadcastaddr = { 302 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 303 }; 304 /* 305 * MIB II broadcast/multicast packets 306 */ 307 #define IS_BROADCAST(ehp) \ 308 (ether_cmp(&ehp->ether_dhost, ðerbroadcastaddr) == 0) 309 #define IS_MULTICAST(ehp) \ 310 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 311 312 /* 313 * Property names 314 */ 315 static char macaddr_propname[] = "mac-address"; 316 static char rmacaddr_propname[] = "remote-mac-address"; 317 static char channel_propname[] = "channel-endpoint"; 318 static char reg_propname[] = "reg"; 319 static char port_propname[] = "port"; 320 static char swport_propname[] = "switch-port"; 321 static char id_propname[] = "id"; 322 static char vdev_propname[] = "virtual-device"; 323 static char vnet_propname[] = "network"; 324 static char pri_types_propname[] = "priority-ether-types"; 325 static char vgen_pvid_propname[] = "port-vlan-id"; 326 static char vgen_vid_propname[] = "vlan-id"; 327 static char vgen_dvid_propname[] = "default-vlan-id"; 328 static char port_pvid_propname[] = "remote-port-vlan-id"; 329 static char port_vid_propname[] = "remote-vlan-id"; 330 static char vgen_mtu_propname[] = "mtu"; 331 static char vgen_linkprop_propname[] = "linkprop"; 332 333 /* 334 * VIO Protocol Version Info: 335 * 336 * The version specified below represents the version of protocol currently 337 * supported in the driver. It means the driver can negotiate with peers with 338 * versions <= this version. Here is a summary of the feature(s) that are 339 * supported at each version of the protocol: 340 * 341 * 1.0 Basic VIO protocol. 342 * 1.1 vDisk protocol update (no virtual network update). 343 * 1.2 Support for priority frames (priority-ether-types). 344 * 1.3 VLAN and HybridIO support. 345 * 1.4 Jumbo Frame support. 346 * 1.5 Link State Notification support with optional support 347 * for Physical Link information. 348 */ 349 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 5} }; 350 351 /* Tunables */ 352 uint32_t vgen_hwd_interval = 5; /* handshake watchdog freq in sec */ 353 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */ 354 uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */ 355 uint32_t vgen_ldcup_retries = 5; /* max # of ldc_up() retries */ 356 uint32_t vgen_ldccl_retries = 5; /* max # of ldc_close() retries */ 357 uint32_t vgen_recv_delay = 1; /* delay when rx descr not ready */ 358 uint32_t vgen_recv_retries = 10; /* retry when rx descr not ready */ 359 uint32_t vgen_tx_retries = 0x4; /* retry when tx descr not available */ 360 uint32_t vgen_tx_delay = 0x30; /* delay when tx descr not available */ 361 362 int vgen_rcv_thread_enabled = 1; /* Enable Recieve thread */ 363 364 static vio_mblk_pool_t *vgen_rx_poolp = NULL; 365 static krwlock_t vgen_rw; 366 367 /* 368 * max # of packets accumulated prior to sending them up. It is best 369 * to keep this at 60% of the number of recieve buffers. 370 */ 371 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6); 372 373 /* 374 * Internal tunables for receive buffer pools, that is, the size and number of 375 * mblks for each pool. At least 3 sizes must be specified if these are used. 376 * The sizes must be specified in increasing order. Non-zero value of the first 377 * size will be used as a hint to use these values instead of the algorithm 378 * that determines the sizes based on MTU. 379 */ 380 uint32_t vgen_rbufsz1 = 0; 381 uint32_t vgen_rbufsz2 = 0; 382 uint32_t vgen_rbufsz3 = 0; 383 uint32_t vgen_rbufsz4 = 0; 384 385 uint32_t vgen_nrbufs1 = VGEN_NRBUFS; 386 uint32_t vgen_nrbufs2 = VGEN_NRBUFS; 387 uint32_t vgen_nrbufs3 = VGEN_NRBUFS; 388 uint32_t vgen_nrbufs4 = VGEN_NRBUFS; 389 390 /* 391 * In the absence of "priority-ether-types" property in MD, the following 392 * internal tunable can be set to specify a single priority ethertype. 393 */ 394 uint64_t vgen_pri_eth_type = 0; 395 396 /* 397 * Number of transmit priority buffers that are preallocated per device. 398 * This number is chosen to be a small value to throttle transmission 399 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 400 */ 401 uint32_t vgen_pri_tx_nmblks = 64; 402 403 uint32_t vgen_vlan_nchains = 4; /* # of chains in vlan id hash table */ 404 405 #ifdef DEBUG 406 /* flags to simulate error conditions for debugging */ 407 int vgen_trigger_txtimeout = 0; 408 int vgen_trigger_rxlost = 0; 409 #endif 410 411 /* 412 * Matching criteria passed to the MDEG to register interest 413 * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified 414 * by their 'name' and 'cfg-handle' properties. 415 */ 416 static md_prop_match_t vdev_prop_match[] = { 417 { MDET_PROP_STR, "name" }, 418 { MDET_PROP_VAL, "cfg-handle" }, 419 { MDET_LIST_END, NULL } 420 }; 421 422 static mdeg_node_match_t vdev_match = { "virtual-device", 423 vdev_prop_match }; 424 425 /* MD update matching structure */ 426 static md_prop_match_t vport_prop_match[] = { 427 { MDET_PROP_VAL, "id" }, 428 { MDET_LIST_END, NULL } 429 }; 430 431 static mdeg_node_match_t vport_match = { "virtual-device-port", 432 vport_prop_match }; 433 434 /* template for matching a particular vnet instance */ 435 static mdeg_prop_spec_t vgen_prop_template[] = { 436 { MDET_PROP_STR, "name", "network" }, 437 { MDET_PROP_VAL, "cfg-handle", NULL }, 438 { MDET_LIST_END, NULL, NULL } 439 }; 440 441 #define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val) 442 443 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp); 444 445 #ifdef VNET_IOC_DEBUG 446 #define VGEN_M_CALLBACK_FLAGS (MC_IOCTL) 447 #else 448 #define VGEN_M_CALLBACK_FLAGS (0) 449 #endif 450 451 static mac_callbacks_t vgen_m_callbacks = { 452 VGEN_M_CALLBACK_FLAGS, 453 vgen_stat, 454 vgen_start, 455 vgen_stop, 456 vgen_promisc, 457 vgen_multicst, 458 vgen_unicst, 459 vgen_tx, 460 vgen_ioctl, 461 NULL, 462 NULL 463 }; 464 465 /* externs */ 466 extern pri_t maxclsyspri; 467 extern proc_t p0; 468 extern uint32_t vnet_ntxds; 469 extern uint32_t vnet_ldcwd_interval; 470 extern uint32_t vnet_ldcwd_txtimeout; 471 extern uint32_t vnet_ldc_mtu; 472 extern uint32_t vnet_nrbufs; 473 extern uint32_t vnet_ethermtu; 474 extern uint16_t vnet_default_vlan_id; 475 extern boolean_t vnet_jumbo_rxpools; 476 477 #ifdef DEBUG 478 479 extern int vnet_dbglevel; 480 static void debug_printf(const char *fname, vgen_t *vgenp, 481 vgen_ldc_t *ldcp, const char *fmt, ...); 482 483 /* -1 for all LDCs info, or ldc_id for a specific LDC info */ 484 int vgendbg_ldcid = -1; 485 486 /* simulate handshake error conditions for debug */ 487 uint32_t vgen_hdbg; 488 #define HDBG_VERSION 0x1 489 #define HDBG_TIMEOUT 0x2 490 #define HDBG_BAD_SID 0x4 491 #define HDBG_OUT_STATE 0x8 492 493 #endif 494 495 /* 496 * vgen_init() is called by an instance of vnet driver to initialize the 497 * corresponding generic proxy transport layer. The arguments passed by vnet 498 * are - an opaque pointer to the vnet instance, pointers to dev_info_t and 499 * the mac address of the vnet device, and a pointer to vgen_t is passed 500 * back as a handle to vnet. 501 */ 502 int 503 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 504 const uint8_t *macaddr, void **vgenhdl) 505 { 506 vgen_t *vgenp; 507 int instance; 508 int rv; 509 510 if ((vnetp == NULL) || (vnetdip == NULL)) 511 return (DDI_FAILURE); 512 513 instance = ddi_get_instance(vnetdip); 514 515 DBG1(NULL, NULL, "vnet(%d): enter\n", instance); 516 517 vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP); 518 519 vgenp->vnetp = vnetp; 520 vgenp->instance = instance; 521 vgenp->regprop = regprop; 522 vgenp->vnetdip = vnetdip; 523 bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL); 524 vgenp->phys_link_state = LINK_STATE_UNKNOWN; 525 526 /* allocate multicast table */ 527 vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE * 528 sizeof (struct ether_addr), KM_SLEEP); 529 vgenp->mccount = 0; 530 vgenp->mcsize = VGEN_INIT_MCTAB_SIZE; 531 532 mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL); 533 rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL); 534 535 rv = vgen_read_mdprops(vgenp); 536 if (rv != 0) { 537 goto vgen_init_fail; 538 } 539 540 /* register with MD event generator */ 541 rv = vgen_mdeg_reg(vgenp); 542 if (rv != DDI_SUCCESS) { 543 goto vgen_init_fail; 544 } 545 546 *vgenhdl = (void *)vgenp; 547 548 DBG1(NULL, NULL, "vnet(%d): exit\n", instance); 549 return (DDI_SUCCESS); 550 551 vgen_init_fail: 552 rw_destroy(&vgenp->vgenports.rwlock); 553 mutex_destroy(&vgenp->lock); 554 kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE * 555 sizeof (struct ether_addr)); 556 if (VGEN_PRI_ETH_DEFINED(vgenp)) { 557 kmem_free(vgenp->pri_types, 558 sizeof (uint16_t) * vgenp->pri_num_types); 559 (void) vio_destroy_mblks(vgenp->pri_tx_vmp); 560 } 561 KMEM_FREE(vgenp); 562 return (DDI_FAILURE); 563 } 564 565 /* 566 * Called by vnet to undo the initializations done by vgen_init(). 567 * The handle provided by generic transport during vgen_init() is the argument. 568 */ 569 void 570 vgen_uninit(void *arg) 571 { 572 vgen_t *vgenp = (vgen_t *)arg; 573 vio_mblk_pool_t *rp; 574 vio_mblk_pool_t *nrp; 575 576 if (vgenp == NULL) { 577 return; 578 } 579 580 DBG1(vgenp, NULL, "enter\n"); 581 582 /* unregister with MD event generator */ 583 vgen_mdeg_unreg(vgenp); 584 585 mutex_enter(&vgenp->lock); 586 587 /* detach all ports from the device */ 588 vgen_detach_ports(vgenp); 589 590 /* 591 * free any pending rx mblk pools, 592 * that couldn't be freed previously during channel detach. 593 */ 594 rp = vgenp->rmp; 595 while (rp != NULL) { 596 nrp = vgenp->rmp = rp->nextp; 597 if (vio_destroy_mblks(rp)) { 598 WRITE_ENTER(&vgen_rw); 599 rp->nextp = vgen_rx_poolp; 600 vgen_rx_poolp = rp; 601 RW_EXIT(&vgen_rw); 602 } 603 rp = nrp; 604 } 605 606 /* free multicast table */ 607 kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr)); 608 609 /* free pri_types table */ 610 if (VGEN_PRI_ETH_DEFINED(vgenp)) { 611 kmem_free(vgenp->pri_types, 612 sizeof (uint16_t) * vgenp->pri_num_types); 613 (void) vio_destroy_mblks(vgenp->pri_tx_vmp); 614 } 615 616 mutex_exit(&vgenp->lock); 617 618 rw_destroy(&vgenp->vgenports.rwlock); 619 mutex_destroy(&vgenp->lock); 620 621 DBG1(vgenp, NULL, "exit\n"); 622 KMEM_FREE(vgenp); 623 } 624 625 /* 626 * module specific initialization common to all instances of vnet/vgen. 627 */ 628 void 629 vgen_mod_init(void) 630 { 631 rw_init(&vgen_rw, NULL, RW_DRIVER, NULL); 632 } 633 634 /* 635 * module specific cleanup common to all instances of vnet/vgen. 636 */ 637 int 638 vgen_mod_cleanup(void) 639 { 640 vio_mblk_pool_t *poolp, *npoolp; 641 642 /* 643 * If any rx mblk pools are still in use, return 644 * error and stop the module from unloading. 645 */ 646 WRITE_ENTER(&vgen_rw); 647 poolp = vgen_rx_poolp; 648 while (poolp != NULL) { 649 npoolp = vgen_rx_poolp = poolp->nextp; 650 if (vio_destroy_mblks(poolp) != 0) { 651 vgen_rx_poolp = poolp; 652 RW_EXIT(&vgen_rw); 653 return (EBUSY); 654 } 655 poolp = npoolp; 656 } 657 RW_EXIT(&vgen_rw); 658 659 return (0); 660 } 661 662 /* 663 * module specific uninitialization common to all instances of vnet/vgen. 664 */ 665 void 666 vgen_mod_fini(void) 667 { 668 rw_destroy(&vgen_rw); 669 } 670 671 /* enable transmit/receive for the device */ 672 int 673 vgen_start(void *arg) 674 { 675 vgen_port_t *portp = (vgen_port_t *)arg; 676 vgen_t *vgenp = portp->vgenp; 677 678 DBG1(vgenp, NULL, "enter\n"); 679 mutex_enter(&portp->lock); 680 vgen_port_init(portp); 681 portp->flags |= VGEN_STARTED; 682 mutex_exit(&portp->lock); 683 DBG1(vgenp, NULL, "exit\n"); 684 685 return (DDI_SUCCESS); 686 } 687 688 /* stop transmit/receive */ 689 void 690 vgen_stop(void *arg) 691 { 692 vgen_port_t *portp = (vgen_port_t *)arg; 693 vgen_t *vgenp = portp->vgenp; 694 695 DBG1(vgenp, NULL, "enter\n"); 696 697 mutex_enter(&portp->lock); 698 vgen_port_uninit(portp); 699 portp->flags &= ~(VGEN_STARTED); 700 mutex_exit(&portp->lock); 701 DBG1(vgenp, NULL, "exit\n"); 702 703 } 704 705 /* vgen transmit function */ 706 static mblk_t * 707 vgen_tx(void *arg, mblk_t *mp) 708 { 709 int i; 710 vgen_port_t *portp; 711 int status = VGEN_FAILURE; 712 713 portp = (vgen_port_t *)arg; 714 /* 715 * Retry so that we avoid reporting a failure 716 * to the upper layer. Returning a failure may cause the 717 * upper layer to go into single threaded mode there by 718 * causing performance degradation, especially for a large 719 * number of connections. 720 */ 721 for (i = 0; i < vgen_tx_retries; ) { 722 status = vgen_portsend(portp, mp); 723 if (status == VGEN_SUCCESS) { 724 break; 725 } 726 if (++i < vgen_tx_retries) 727 delay(drv_usectohz(vgen_tx_delay)); 728 } 729 if (status != VGEN_SUCCESS) { 730 /* failure */ 731 return (mp); 732 } 733 /* success */ 734 return (NULL); 735 } 736 737 /* 738 * This function provides any necessary tagging/untagging of the frames 739 * that are being transmitted over the port. It first verifies the vlan 740 * membership of the destination(port) and drops the packet if the 741 * destination doesn't belong to the given vlan. 742 * 743 * Arguments: 744 * portp: port over which the frames should be transmitted 745 * mp: frame to be transmitted 746 * is_tagged: 747 * B_TRUE: indicates frame header contains the vlan tag already. 748 * B_FALSE: indicates frame is untagged. 749 * vid: vlan in which the frame should be transmitted. 750 * 751 * Returns: 752 * Sucess: frame(mblk_t *) after doing the necessary tag/untag. 753 * Failure: NULL 754 */ 755 static mblk_t * 756 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged, 757 uint16_t vid) 758 { 759 vgen_t *vgenp; 760 boolean_t dst_tagged; 761 int rv; 762 763 vgenp = portp->vgenp; 764 765 /* 766 * If the packet is going to a vnet: 767 * Check if the destination vnet is in the same vlan. 768 * Check the frame header if tag or untag is needed. 769 * 770 * We do not check the above conditions if the packet is going to vsw: 771 * vsw must be present implicitly in all the vlans that a vnet device 772 * is configured into; even if vsw itself is not assigned to those 773 * vlans as an interface. For instance, the packet might be destined 774 * to another vnet(indirectly through vsw) or to an external host 775 * which is in the same vlan as this vnet and vsw itself may not be 776 * present in that vlan. Similarly packets going to vsw must be 777 * always tagged(unless in the default-vlan) if not already tagged, 778 * as we do not know the final destination. This is needed because 779 * vsw must always invoke its switching function only after tagging 780 * the packet; otherwise after switching function determines the 781 * destination we cannot figure out if the destination belongs to the 782 * the same vlan that the frame originated from and if it needs tag/ 783 * untag. Note that vsw will tag the packet itself when it receives 784 * it over the channel from a client if needed. However, that is 785 * needed only in the case of vlan unaware clients such as obp or 786 * earlier versions of vnet. 787 * 788 */ 789 if (portp != vgenp->vsw_portp) { 790 /* 791 * Packet going to a vnet. Check if the destination vnet is in 792 * the same vlan. Then check the frame header if tag/untag is 793 * needed. 794 */ 795 rv = vgen_vlan_lookup(portp->vlan_hashp, vid); 796 if (rv == B_FALSE) { 797 /* drop the packet */ 798 freemsg(mp); 799 return (NULL); 800 } 801 802 /* is the destination tagged or untagged in this vlan? */ 803 (vid == portp->pvid) ? (dst_tagged = B_FALSE) : 804 (dst_tagged = B_TRUE); 805 806 if (is_tagged == dst_tagged) { 807 /* no tagging/untagging needed */ 808 return (mp); 809 } 810 811 if (is_tagged == B_TRUE) { 812 /* frame is tagged; destination needs untagged */ 813 mp = vnet_vlan_remove_tag(mp); 814 return (mp); 815 } 816 817 /* (is_tagged == B_FALSE): fallthru to tag tx packet: */ 818 } 819 820 /* 821 * Packet going to a vnet needs tagging. 822 * OR 823 * If the packet is going to vsw, then it must be tagged in all cases: 824 * unknown unicast, broadcast/multicast or to vsw interface. 825 */ 826 827 if (is_tagged == B_FALSE) { 828 mp = vnet_vlan_insert_tag(mp, vid); 829 } 830 831 return (mp); 832 } 833 834 /* transmit packets over the given port */ 835 static int 836 vgen_portsend(vgen_port_t *portp, mblk_t *mp) 837 { 838 vgen_ldclist_t *ldclp; 839 vgen_ldc_t *ldcp; 840 int status; 841 int rv = VGEN_SUCCESS; 842 vgen_t *vgenp = portp->vgenp; 843 vnet_t *vnetp = vgenp->vnetp; 844 boolean_t is_tagged; 845 boolean_t dec_refcnt = B_FALSE; 846 uint16_t vlan_id; 847 struct ether_header *ehp; 848 849 if (portp->use_vsw_port) { 850 (void) atomic_inc_32(&vgenp->vsw_port_refcnt); 851 portp = portp->vgenp->vsw_portp; 852 dec_refcnt = B_TRUE; 853 } 854 if (portp == NULL) { 855 return (VGEN_FAILURE); 856 } 857 858 /* 859 * Determine the vlan id that the frame belongs to. 860 */ 861 ehp = (struct ether_header *)mp->b_rptr; 862 is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id); 863 864 if (vlan_id == vnetp->default_vlan_id) { 865 866 /* Frames in default vlan must be untagged */ 867 ASSERT(is_tagged == B_FALSE); 868 869 /* 870 * If the destination is a vnet-port verify it belongs to the 871 * default vlan; otherwise drop the packet. We do not need 872 * this check for vsw-port, as it should implicitly belong to 873 * this vlan; see comments in vgen_vlan_frame_fixtag(). 874 */ 875 if (portp != vgenp->vsw_portp && 876 portp->pvid != vnetp->default_vlan_id) { 877 freemsg(mp); 878 goto portsend_ret; 879 } 880 881 } else { /* frame not in default-vlan */ 882 883 mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id); 884 if (mp == NULL) { 885 goto portsend_ret; 886 } 887 888 } 889 890 ldclp = &portp->ldclist; 891 READ_ENTER(&ldclp->rwlock); 892 /* 893 * NOTE: for now, we will assume we have a single channel. 894 */ 895 if (ldclp->headp == NULL) { 896 RW_EXIT(&ldclp->rwlock); 897 rv = VGEN_FAILURE; 898 goto portsend_ret; 899 } 900 ldcp = ldclp->headp; 901 902 status = ldcp->tx(ldcp, mp); 903 904 RW_EXIT(&ldclp->rwlock); 905 906 if (status != VGEN_TX_SUCCESS) { 907 rv = VGEN_FAILURE; 908 } 909 910 portsend_ret: 911 if (dec_refcnt == B_TRUE) { 912 (void) atomic_dec_32(&vgenp->vsw_port_refcnt); 913 } 914 return (rv); 915 } 916 917 /* 918 * Wrapper function to transmit normal and/or priority frames over the channel. 919 */ 920 static int 921 vgen_ldcsend(void *arg, mblk_t *mp) 922 { 923 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 924 int status; 925 struct ether_header *ehp; 926 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 927 uint32_t num_types; 928 uint16_t *types; 929 int i; 930 931 ASSERT(VGEN_PRI_ETH_DEFINED(vgenp)); 932 933 num_types = vgenp->pri_num_types; 934 types = vgenp->pri_types; 935 ehp = (struct ether_header *)mp->b_rptr; 936 937 for (i = 0; i < num_types; i++) { 938 939 if (ehp->ether_type == types[i]) { 940 /* priority frame, use pri tx function */ 941 vgen_ldcsend_pkt(ldcp, mp); 942 return (VGEN_SUCCESS); 943 } 944 945 } 946 947 status = vgen_ldcsend_dring(ldcp, mp); 948 949 return (status); 950 } 951 952 /* 953 * This functions handles ldc channel reset while in the context 954 * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring(). 955 */ 956 static void 957 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp) 958 { 959 ldc_status_t istatus; 960 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 961 962 if (mutex_tryenter(&ldcp->cblock)) { 963 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 964 DWARN(vgenp, ldcp, "ldc_status() error\n"); 965 } else { 966 ldcp->ldc_status = istatus; 967 } 968 if (ldcp->ldc_status != LDC_UP) { 969 vgen_handle_evt_reset(ldcp); 970 } 971 mutex_exit(&ldcp->cblock); 972 } 973 } 974 975 /* 976 * This function transmits the frame in the payload of a raw data 977 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 978 * send special frames with high priorities, without going through 979 * the normal data path which uses descriptor ring mechanism. 980 */ 981 static void 982 vgen_ldcsend_pkt(void *arg, mblk_t *mp) 983 { 984 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 985 vio_raw_data_msg_t *pkt; 986 mblk_t *bp; 987 mblk_t *nmp = NULL; 988 caddr_t dst; 989 uint32_t mblksz; 990 uint32_t size; 991 uint32_t nbytes; 992 int rv; 993 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 994 vgen_stats_t *statsp = &ldcp->stats; 995 996 /* drop the packet if ldc is not up or handshake is not done */ 997 if (ldcp->ldc_status != LDC_UP) { 998 (void) atomic_inc_32(&statsp->tx_pri_fail); 999 DWARN(vgenp, ldcp, "status(%d), dropping packet\n", 1000 ldcp->ldc_status); 1001 goto send_pkt_exit; 1002 } 1003 1004 if (ldcp->hphase != VH_DONE) { 1005 (void) atomic_inc_32(&statsp->tx_pri_fail); 1006 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 1007 ldcp->hphase); 1008 goto send_pkt_exit; 1009 } 1010 1011 size = msgsize(mp); 1012 1013 /* frame size bigger than available payload len of raw data msg ? */ 1014 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 1015 (void) atomic_inc_32(&statsp->tx_pri_fail); 1016 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 1017 goto send_pkt_exit; 1018 } 1019 1020 if (size < ETHERMIN) 1021 size = ETHERMIN; 1022 1023 /* alloc space for a raw data message */ 1024 nmp = vio_allocb(vgenp->pri_tx_vmp); 1025 if (nmp == NULL) { 1026 (void) atomic_inc_32(&statsp->tx_pri_fail); 1027 DWARN(vgenp, ldcp, "vio_allocb failed\n"); 1028 goto send_pkt_exit; 1029 } 1030 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 1031 1032 /* copy frame into the payload of raw data message */ 1033 dst = (caddr_t)pkt->data; 1034 for (bp = mp; bp != NULL; bp = bp->b_cont) { 1035 mblksz = MBLKL(bp); 1036 bcopy(bp->b_rptr, dst, mblksz); 1037 dst += mblksz; 1038 } 1039 1040 /* setup the raw data msg */ 1041 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 1042 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 1043 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 1044 pkt->tag.vio_sid = ldcp->local_sid; 1045 nbytes = VIO_PKT_DATA_HDRSIZE + size; 1046 1047 /* send the msg over ldc */ 1048 rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE); 1049 if (rv != VGEN_SUCCESS) { 1050 (void) atomic_inc_32(&statsp->tx_pri_fail); 1051 DWARN(vgenp, ldcp, "Error sending priority frame\n"); 1052 if (rv == ECONNRESET) { 1053 vgen_ldcsend_process_reset(ldcp); 1054 } 1055 goto send_pkt_exit; 1056 } 1057 1058 /* update stats */ 1059 (void) atomic_inc_64(&statsp->tx_pri_packets); 1060 (void) atomic_add_64(&statsp->tx_pri_bytes, size); 1061 1062 send_pkt_exit: 1063 if (nmp != NULL) 1064 freemsg(nmp); 1065 freemsg(mp); 1066 } 1067 1068 /* 1069 * This function transmits normal (non-priority) data frames over 1070 * the channel. It queues the frame into the transmit descriptor ring 1071 * and sends a VIO_DRING_DATA message if needed, to wake up the 1072 * peer to (re)start processing. 1073 */ 1074 static int 1075 vgen_ldcsend_dring(void *arg, mblk_t *mp) 1076 { 1077 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 1078 vgen_private_desc_t *tbufp; 1079 vgen_private_desc_t *rtbufp; 1080 vnet_public_desc_t *rtxdp; 1081 vgen_private_desc_t *ntbufp; 1082 vnet_public_desc_t *txdp; 1083 vio_dring_entry_hdr_t *hdrp; 1084 vgen_stats_t *statsp; 1085 struct ether_header *ehp; 1086 boolean_t is_bcast = B_FALSE; 1087 boolean_t is_mcast = B_FALSE; 1088 size_t mblksz; 1089 caddr_t dst; 1090 mblk_t *bp; 1091 size_t size; 1092 int rv = 0; 1093 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1094 vgen_hparams_t *lp = &ldcp->local_hparams; 1095 1096 statsp = &ldcp->stats; 1097 size = msgsize(mp); 1098 1099 DBG1(vgenp, ldcp, "enter\n"); 1100 1101 if (ldcp->ldc_status != LDC_UP) { 1102 DWARN(vgenp, ldcp, "status(%d), dropping packet\n", 1103 ldcp->ldc_status); 1104 /* retry ldc_up() if needed */ 1105 #ifdef VNET_IOC_DEBUG 1106 if (ldcp->flags & CHANNEL_STARTED && !ldcp->link_down_forced) { 1107 #else 1108 if (ldcp->flags & CHANNEL_STARTED) { 1109 #endif 1110 (void) ldc_up(ldcp->ldc_handle); 1111 } 1112 goto send_dring_exit; 1113 } 1114 1115 /* drop the packet if ldc is not up or handshake is not done */ 1116 if (ldcp->hphase != VH_DONE) { 1117 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 1118 ldcp->hphase); 1119 goto send_dring_exit; 1120 } 1121 1122 if (size > (size_t)lp->mtu) { 1123 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 1124 goto send_dring_exit; 1125 } 1126 if (size < ETHERMIN) 1127 size = ETHERMIN; 1128 1129 ehp = (struct ether_header *)mp->b_rptr; 1130 is_bcast = IS_BROADCAST(ehp); 1131 is_mcast = IS_MULTICAST(ehp); 1132 1133 mutex_enter(&ldcp->txlock); 1134 /* 1135 * allocate a descriptor 1136 */ 1137 tbufp = ldcp->next_tbufp; 1138 ntbufp = NEXTTBUF(ldcp, tbufp); 1139 if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */ 1140 1141 mutex_enter(&ldcp->tclock); 1142 /* Try reclaiming now */ 1143 vgen_reclaim_dring(ldcp); 1144 ldcp->reclaim_lbolt = ddi_get_lbolt(); 1145 1146 if (ntbufp == ldcp->cur_tbufp) { 1147 /* Now we are really out of tbuf/txds */ 1148 ldcp->need_resched = B_TRUE; 1149 mutex_exit(&ldcp->tclock); 1150 1151 statsp->tx_no_desc++; 1152 mutex_exit(&ldcp->txlock); 1153 1154 return (VGEN_TX_NORESOURCES); 1155 } 1156 mutex_exit(&ldcp->tclock); 1157 } 1158 /* update next available tbuf in the ring and update tx index */ 1159 ldcp->next_tbufp = ntbufp; 1160 INCR_TXI(ldcp->next_txi, ldcp); 1161 1162 /* Mark the buffer busy before releasing the lock */ 1163 tbufp->flags = VGEN_PRIV_DESC_BUSY; 1164 mutex_exit(&ldcp->txlock); 1165 1166 /* copy data into pre-allocated transmit buffer */ 1167 dst = tbufp->datap + VNET_IPALIGN; 1168 for (bp = mp; bp != NULL; bp = bp->b_cont) { 1169 mblksz = MBLKL(bp); 1170 bcopy(bp->b_rptr, dst, mblksz); 1171 dst += mblksz; 1172 } 1173 1174 tbufp->datalen = size; 1175 1176 /* initialize the corresponding public descriptor (txd) */ 1177 txdp = tbufp->descp; 1178 hdrp = &txdp->hdr; 1179 txdp->nbytes = size; 1180 txdp->ncookies = tbufp->ncookies; 1181 bcopy((tbufp->memcookie), (txdp->memcookie), 1182 tbufp->ncookies * sizeof (ldc_mem_cookie_t)); 1183 1184 mutex_enter(&ldcp->wrlock); 1185 /* 1186 * If the flags not set to BUSY, it implies that the clobber 1187 * was done while we were copying the data. In such case, 1188 * discard the packet and return. 1189 */ 1190 if (tbufp->flags != VGEN_PRIV_DESC_BUSY) { 1191 statsp->oerrors++; 1192 mutex_exit(&ldcp->wrlock); 1193 goto send_dring_exit; 1194 } 1195 hdrp->dstate = VIO_DESC_READY; 1196 1197 /* update stats */ 1198 statsp->opackets++; 1199 statsp->obytes += size; 1200 if (is_bcast) 1201 statsp->brdcstxmt++; 1202 else if (is_mcast) 1203 statsp->multixmt++; 1204 1205 /* send dring datamsg to the peer */ 1206 if (ldcp->resched_peer) { 1207 1208 rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi]; 1209 rtxdp = rtbufp->descp; 1210 1211 if (rtxdp->hdr.dstate == VIO_DESC_READY) { 1212 1213 rv = vgen_send_dring_data(ldcp, 1214 (uint32_t)ldcp->resched_peer_txi, -1); 1215 if (rv != 0) { 1216 /* error: drop the packet */ 1217 DWARN(vgenp, ldcp, "vgen_send_dring_data " 1218 "failed: rv(%d) len(%d)\n", 1219 ldcp->ldc_id, rv, size); 1220 statsp->oerrors++; 1221 } else { 1222 ldcp->resched_peer = B_FALSE; 1223 } 1224 1225 } 1226 1227 } 1228 1229 mutex_exit(&ldcp->wrlock); 1230 1231 send_dring_exit: 1232 if (rv == ECONNRESET) { 1233 vgen_ldcsend_process_reset(ldcp); 1234 } 1235 freemsg(mp); 1236 DBG1(vgenp, ldcp, "exit\n"); 1237 return (VGEN_TX_SUCCESS); 1238 } 1239 1240 /* enable/disable a multicast address */ 1241 int 1242 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca) 1243 { 1244 vgen_t *vgenp; 1245 vnet_mcast_msg_t mcastmsg; 1246 vio_msg_tag_t *tagp; 1247 vgen_port_t *portp; 1248 vgen_portlist_t *plistp; 1249 vgen_ldc_t *ldcp; 1250 vgen_ldclist_t *ldclp; 1251 struct ether_addr *addrp; 1252 int rv = DDI_FAILURE; 1253 uint32_t i; 1254 1255 portp = (vgen_port_t *)arg; 1256 vgenp = portp->vgenp; 1257 1258 if (portp != vgenp->vsw_portp) { 1259 return (DDI_SUCCESS); 1260 } 1261 1262 addrp = (struct ether_addr *)mca; 1263 tagp = &mcastmsg.tag; 1264 bzero(&mcastmsg, sizeof (mcastmsg)); 1265 1266 mutex_enter(&vgenp->lock); 1267 1268 plistp = &(vgenp->vgenports); 1269 1270 READ_ENTER(&plistp->rwlock); 1271 1272 portp = vgenp->vsw_portp; 1273 if (portp == NULL) { 1274 RW_EXIT(&plistp->rwlock); 1275 mutex_exit(&vgenp->lock); 1276 return (rv); 1277 } 1278 ldclp = &portp->ldclist; 1279 1280 READ_ENTER(&ldclp->rwlock); 1281 1282 ldcp = ldclp->headp; 1283 if (ldcp == NULL) 1284 goto vgen_mcast_exit; 1285 1286 mutex_enter(&ldcp->cblock); 1287 1288 if (ldcp->hphase == VH_DONE) { 1289 /* 1290 * If handshake is done, send a msg to vsw to add/remove 1291 * the multicast address. Otherwise, we just update this 1292 * mcast address in our table and the table will be sync'd 1293 * with vsw when handshake completes. 1294 */ 1295 tagp->vio_msgtype = VIO_TYPE_CTRL; 1296 tagp->vio_subtype = VIO_SUBTYPE_INFO; 1297 tagp->vio_subtype_env = VNET_MCAST_INFO; 1298 tagp->vio_sid = ldcp->local_sid; 1299 bcopy(mca, &(mcastmsg.mca), ETHERADDRL); 1300 mcastmsg.set = add; 1301 mcastmsg.count = 1; 1302 if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg), 1303 B_FALSE) != VGEN_SUCCESS) { 1304 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 1305 mutex_exit(&ldcp->cblock); 1306 goto vgen_mcast_exit; 1307 } 1308 } 1309 1310 mutex_exit(&ldcp->cblock); 1311 1312 if (add) { 1313 1314 /* expand multicast table if necessary */ 1315 if (vgenp->mccount >= vgenp->mcsize) { 1316 struct ether_addr *newtab; 1317 uint32_t newsize; 1318 1319 1320 newsize = vgenp->mcsize * 2; 1321 1322 newtab = kmem_zalloc(newsize * 1323 sizeof (struct ether_addr), KM_NOSLEEP); 1324 if (newtab == NULL) 1325 goto vgen_mcast_exit; 1326 bcopy(vgenp->mctab, newtab, vgenp->mcsize * 1327 sizeof (struct ether_addr)); 1328 kmem_free(vgenp->mctab, 1329 vgenp->mcsize * sizeof (struct ether_addr)); 1330 1331 vgenp->mctab = newtab; 1332 vgenp->mcsize = newsize; 1333 } 1334 1335 /* add address to the table */ 1336 vgenp->mctab[vgenp->mccount++] = *addrp; 1337 1338 } else { 1339 1340 /* delete address from the table */ 1341 for (i = 0; i < vgenp->mccount; i++) { 1342 if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) { 1343 1344 /* 1345 * If there's more than one address in this 1346 * table, delete the unwanted one by moving 1347 * the last one in the list over top of it; 1348 * otherwise, just remove it. 1349 */ 1350 if (vgenp->mccount > 1) { 1351 vgenp->mctab[i] = 1352 vgenp->mctab[vgenp->mccount-1]; 1353 } 1354 vgenp->mccount--; 1355 break; 1356 } 1357 } 1358 } 1359 1360 rv = DDI_SUCCESS; 1361 1362 vgen_mcast_exit: 1363 RW_EXIT(&ldclp->rwlock); 1364 RW_EXIT(&plistp->rwlock); 1365 1366 mutex_exit(&vgenp->lock); 1367 return (rv); 1368 } 1369 1370 /* set or clear promiscuous mode on the device */ 1371 static int 1372 vgen_promisc(void *arg, boolean_t on) 1373 { 1374 _NOTE(ARGUNUSED(arg, on)) 1375 return (DDI_SUCCESS); 1376 } 1377 1378 /* set the unicast mac address of the device */ 1379 static int 1380 vgen_unicst(void *arg, const uint8_t *mca) 1381 { 1382 _NOTE(ARGUNUSED(arg, mca)) 1383 return (DDI_SUCCESS); 1384 } 1385 1386 /* get device statistics */ 1387 int 1388 vgen_stat(void *arg, uint_t stat, uint64_t *val) 1389 { 1390 vgen_port_t *portp = (vgen_port_t *)arg; 1391 1392 *val = vgen_port_stat(portp, stat); 1393 1394 return (0); 1395 } 1396 1397 /* vgen internal functions */ 1398 /* detach all ports from the device */ 1399 static void 1400 vgen_detach_ports(vgen_t *vgenp) 1401 { 1402 vgen_port_t *portp; 1403 vgen_portlist_t *plistp; 1404 1405 plistp = &(vgenp->vgenports); 1406 WRITE_ENTER(&plistp->rwlock); 1407 while ((portp = plistp->headp) != NULL) { 1408 vgen_port_detach(portp); 1409 } 1410 RW_EXIT(&plistp->rwlock); 1411 } 1412 1413 /* 1414 * detach the given port. 1415 */ 1416 static void 1417 vgen_port_detach(vgen_port_t *portp) 1418 { 1419 vgen_t *vgenp; 1420 vgen_ldclist_t *ldclp; 1421 int port_num; 1422 1423 vgenp = portp->vgenp; 1424 port_num = portp->port_num; 1425 1426 DBG1(vgenp, NULL, "port(%d):enter\n", port_num); 1427 1428 /* 1429 * If this port is connected to the vswitch, then 1430 * potentially there could be ports that may be using 1431 * this port to transmit packets. To address this do 1432 * the following: 1433 * - First set vgenp->vsw_portp to NULL, so that 1434 * its not used after that. 1435 * - Then wait for the refcnt to go down to 0. 1436 * - Now we can safely detach this port. 1437 */ 1438 if (vgenp->vsw_portp == portp) { 1439 vgenp->vsw_portp = NULL; 1440 while (vgenp->vsw_port_refcnt > 0) { 1441 delay(drv_usectohz(vgen_tx_delay)); 1442 } 1443 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0); 1444 } 1445 1446 if (portp->vhp != NULL) { 1447 vio_net_resource_unreg(portp->vhp); 1448 portp->vhp = NULL; 1449 } 1450 1451 vgen_vlan_destroy_hash(portp); 1452 1453 /* remove it from port list */ 1454 vgen_port_list_remove(portp); 1455 1456 /* detach channels from this port */ 1457 ldclp = &portp->ldclist; 1458 WRITE_ENTER(&ldclp->rwlock); 1459 while (ldclp->headp) { 1460 vgen_ldc_detach(ldclp->headp); 1461 } 1462 RW_EXIT(&ldclp->rwlock); 1463 rw_destroy(&ldclp->rwlock); 1464 1465 if (portp->num_ldcs != 0) { 1466 kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t)); 1467 portp->num_ldcs = 0; 1468 } 1469 1470 mutex_destroy(&portp->lock); 1471 KMEM_FREE(portp); 1472 1473 DBG1(vgenp, NULL, "port(%d):exit\n", port_num); 1474 } 1475 1476 /* add a port to port list */ 1477 static void 1478 vgen_port_list_insert(vgen_port_t *portp) 1479 { 1480 vgen_portlist_t *plistp; 1481 vgen_t *vgenp; 1482 1483 vgenp = portp->vgenp; 1484 plistp = &(vgenp->vgenports); 1485 1486 if (plistp->headp == NULL) { 1487 plistp->headp = portp; 1488 } else { 1489 plistp->tailp->nextp = portp; 1490 } 1491 plistp->tailp = portp; 1492 portp->nextp = NULL; 1493 } 1494 1495 /* remove a port from port list */ 1496 static void 1497 vgen_port_list_remove(vgen_port_t *portp) 1498 { 1499 vgen_port_t *prevp; 1500 vgen_port_t *nextp; 1501 vgen_portlist_t *plistp; 1502 vgen_t *vgenp; 1503 1504 vgenp = portp->vgenp; 1505 1506 plistp = &(vgenp->vgenports); 1507 1508 if (plistp->headp == NULL) 1509 return; 1510 1511 if (portp == plistp->headp) { 1512 plistp->headp = portp->nextp; 1513 if (portp == plistp->tailp) 1514 plistp->tailp = plistp->headp; 1515 } else { 1516 for (prevp = plistp->headp; 1517 ((nextp = prevp->nextp) != NULL) && (nextp != portp); 1518 prevp = nextp) 1519 ; 1520 if (nextp == portp) { 1521 prevp->nextp = portp->nextp; 1522 } 1523 if (portp == plistp->tailp) 1524 plistp->tailp = prevp; 1525 } 1526 } 1527 1528 /* lookup a port in the list based on port_num */ 1529 static vgen_port_t * 1530 vgen_port_lookup(vgen_portlist_t *plistp, int port_num) 1531 { 1532 vgen_port_t *portp = NULL; 1533 1534 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 1535 if (portp->port_num == port_num) { 1536 break; 1537 } 1538 } 1539 1540 return (portp); 1541 } 1542 1543 /* enable ports for transmit/receive */ 1544 static void 1545 vgen_init_ports(vgen_t *vgenp) 1546 { 1547 vgen_port_t *portp; 1548 vgen_portlist_t *plistp; 1549 1550 plistp = &(vgenp->vgenports); 1551 READ_ENTER(&plistp->rwlock); 1552 1553 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 1554 vgen_port_init(portp); 1555 } 1556 1557 RW_EXIT(&plistp->rwlock); 1558 } 1559 1560 static void 1561 vgen_port_init(vgen_port_t *portp) 1562 { 1563 /* Add the port to the specified vlans */ 1564 vgen_vlan_add_ids(portp); 1565 1566 /* Bring up the channels of this port */ 1567 vgen_init_ldcs(portp); 1568 } 1569 1570 /* disable transmit/receive on ports */ 1571 static void 1572 vgen_uninit_ports(vgen_t *vgenp) 1573 { 1574 vgen_port_t *portp; 1575 vgen_portlist_t *plistp; 1576 1577 plistp = &(vgenp->vgenports); 1578 READ_ENTER(&plistp->rwlock); 1579 1580 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 1581 vgen_port_uninit(portp); 1582 } 1583 1584 RW_EXIT(&plistp->rwlock); 1585 } 1586 1587 static void 1588 vgen_port_uninit(vgen_port_t *portp) 1589 { 1590 vgen_uninit_ldcs(portp); 1591 1592 /* remove the port from vlans it has been assigned to */ 1593 vgen_vlan_remove_ids(portp); 1594 } 1595 1596 /* 1597 * Scan the machine description for this instance of vnet 1598 * and read its properties. Called only from vgen_init(). 1599 * Returns: 0 on success, 1 on failure. 1600 */ 1601 static int 1602 vgen_read_mdprops(vgen_t *vgenp) 1603 { 1604 vnet_t *vnetp = vgenp->vnetp; 1605 md_t *mdp = NULL; 1606 mde_cookie_t rootnode; 1607 mde_cookie_t *listp = NULL; 1608 uint64_t cfgh; 1609 char *name; 1610 int rv = 1; 1611 int num_nodes = 0; 1612 int num_devs = 0; 1613 int listsz = 0; 1614 int i; 1615 1616 if ((mdp = md_get_handle()) == NULL) { 1617 return (rv); 1618 } 1619 1620 num_nodes = md_node_count(mdp); 1621 ASSERT(num_nodes > 0); 1622 1623 listsz = num_nodes * sizeof (mde_cookie_t); 1624 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1625 1626 rootnode = md_root_node(mdp); 1627 1628 /* search for all "virtual_device" nodes */ 1629 num_devs = md_scan_dag(mdp, rootnode, 1630 md_find_name(mdp, vdev_propname), 1631 md_find_name(mdp, "fwd"), listp); 1632 if (num_devs <= 0) { 1633 goto vgen_readmd_exit; 1634 } 1635 1636 /* 1637 * Now loop through the list of virtual-devices looking for 1638 * devices with name "network" and for each such device compare 1639 * its instance with what we have from the 'reg' property to 1640 * find the right node in MD and then read all its properties. 1641 */ 1642 for (i = 0; i < num_devs; i++) { 1643 1644 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1645 goto vgen_readmd_exit; 1646 } 1647 1648 /* is this a "network" device? */ 1649 if (strcmp(name, vnet_propname) != 0) 1650 continue; 1651 1652 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1653 goto vgen_readmd_exit; 1654 } 1655 1656 /* is this the required instance of vnet? */ 1657 if (vgenp->regprop != cfgh) 1658 continue; 1659 1660 /* 1661 * Read the 'linkprop' property to know if this vnet 1662 * device should get physical link updates from vswitch. 1663 */ 1664 vgen_linkprop_read(vgenp, mdp, listp[i], 1665 &vnetp->pls_update); 1666 1667 /* 1668 * Read the mtu. Note that we set the mtu of vnet device within 1669 * this routine itself, after validating the range. 1670 */ 1671 vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu); 1672 if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) { 1673 vnetp->mtu = ETHERMTU; 1674 } 1675 vgenp->max_frame_size = vnetp->mtu + 1676 sizeof (struct ether_header) + VLAN_TAGSZ; 1677 1678 /* read priority ether types */ 1679 vgen_read_pri_eth_types(vgenp, mdp, listp[i]); 1680 1681 /* read vlan id properties of this vnet instance */ 1682 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i], 1683 &vnetp->pvid, &vnetp->vids, &vnetp->nvids, 1684 &vnetp->default_vlan_id); 1685 1686 rv = 0; 1687 break; 1688 } 1689 1690 vgen_readmd_exit: 1691 1692 kmem_free(listp, listsz); 1693 (void) md_fini_handle(mdp); 1694 return (rv); 1695 } 1696 1697 /* 1698 * Read vlan id properties of the given MD node. 1699 * Arguments: 1700 * arg: device argument(vnet device or a port) 1701 * type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port) 1702 * mdp: machine description 1703 * node: md node cookie 1704 * 1705 * Returns: 1706 * pvidp: port-vlan-id of the node 1707 * vidspp: list of vlan-ids of the node 1708 * nvidsp: # of vlan-ids in the list 1709 * default_idp: default-vlan-id of the node(if node is vnet device) 1710 */ 1711 static void 1712 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1713 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1714 uint16_t *default_idp) 1715 { 1716 vgen_t *vgenp; 1717 vnet_t *vnetp; 1718 vgen_port_t *portp; 1719 char *pvid_propname; 1720 char *vid_propname; 1721 uint_t nvids; 1722 uint32_t vids_size; 1723 int rv; 1724 int i; 1725 uint64_t *data; 1726 uint64_t val; 1727 int size; 1728 int inst; 1729 1730 if (type == VGEN_LOCAL) { 1731 1732 vgenp = (vgen_t *)arg; 1733 vnetp = vgenp->vnetp; 1734 pvid_propname = vgen_pvid_propname; 1735 vid_propname = vgen_vid_propname; 1736 inst = vnetp->instance; 1737 1738 } else if (type == VGEN_PEER) { 1739 1740 portp = (vgen_port_t *)arg; 1741 vgenp = portp->vgenp; 1742 vnetp = vgenp->vnetp; 1743 pvid_propname = port_pvid_propname; 1744 vid_propname = port_vid_propname; 1745 inst = portp->port_num; 1746 1747 } else { 1748 return; 1749 } 1750 1751 if (type == VGEN_LOCAL && default_idp != NULL) { 1752 rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val); 1753 if (rv != 0) { 1754 DWARN(vgenp, NULL, "prop(%s) not found", 1755 vgen_dvid_propname); 1756 1757 *default_idp = vnet_default_vlan_id; 1758 } else { 1759 *default_idp = val & 0xFFF; 1760 DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname, 1761 inst, *default_idp); 1762 } 1763 } 1764 1765 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1766 if (rv != 0) { 1767 DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname); 1768 *pvidp = vnet_default_vlan_id; 1769 } else { 1770 1771 *pvidp = val & 0xFFF; 1772 DBG2(vgenp, NULL, "%s(%d): (%d)\n", 1773 pvid_propname, inst, *pvidp); 1774 } 1775 1776 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1777 &size); 1778 if (rv != 0) { 1779 DBG2(vgenp, NULL, "prop(%s) not found", vid_propname); 1780 size = 0; 1781 } else { 1782 size /= sizeof (uint64_t); 1783 } 1784 nvids = size; 1785 1786 if (nvids != 0) { 1787 DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst); 1788 vids_size = sizeof (uint16_t) * nvids; 1789 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1790 for (i = 0; i < nvids; i++) { 1791 (*vidspp)[i] = data[i] & 0xFFFF; 1792 DBG2(vgenp, NULL, " %d ", (*vidspp)[i]); 1793 } 1794 DBG2(vgenp, NULL, "\n"); 1795 } 1796 1797 *nvidsp = nvids; 1798 } 1799 1800 /* 1801 * Create a vlan id hash table for the given port. 1802 */ 1803 static void 1804 vgen_vlan_create_hash(vgen_port_t *portp) 1805 { 1806 char hashname[MAXNAMELEN]; 1807 1808 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 1809 portp->port_num); 1810 1811 portp->vlan_nchains = vgen_vlan_nchains; 1812 portp->vlan_hashp = mod_hash_create_idhash(hashname, 1813 portp->vlan_nchains, mod_hash_null_valdtor); 1814 } 1815 1816 /* 1817 * Destroy the vlan id hash table in the given port. 1818 */ 1819 static void 1820 vgen_vlan_destroy_hash(vgen_port_t *portp) 1821 { 1822 if (portp->vlan_hashp != NULL) { 1823 mod_hash_destroy_hash(portp->vlan_hashp); 1824 portp->vlan_hashp = NULL; 1825 portp->vlan_nchains = 0; 1826 } 1827 } 1828 1829 /* 1830 * Add a port to the vlans specified in its port properites. 1831 */ 1832 static void 1833 vgen_vlan_add_ids(vgen_port_t *portp) 1834 { 1835 int rv; 1836 int i; 1837 1838 rv = mod_hash_insert(portp->vlan_hashp, 1839 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1840 (mod_hash_val_t)B_TRUE); 1841 ASSERT(rv == 0); 1842 1843 for (i = 0; i < portp->nvids; i++) { 1844 rv = mod_hash_insert(portp->vlan_hashp, 1845 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1846 (mod_hash_val_t)B_TRUE); 1847 ASSERT(rv == 0); 1848 } 1849 } 1850 1851 /* 1852 * Remove a port from the vlans it has been assigned to. 1853 */ 1854 static void 1855 vgen_vlan_remove_ids(vgen_port_t *portp) 1856 { 1857 int rv; 1858 int i; 1859 mod_hash_val_t vp; 1860 1861 rv = mod_hash_remove(portp->vlan_hashp, 1862 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1863 (mod_hash_val_t *)&vp); 1864 ASSERT(rv == 0); 1865 1866 for (i = 0; i < portp->nvids; i++) { 1867 rv = mod_hash_remove(portp->vlan_hashp, 1868 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1869 (mod_hash_val_t *)&vp); 1870 ASSERT(rv == 0); 1871 } 1872 } 1873 1874 /* 1875 * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame, 1876 * then the vlan-id is available in the tag; otherwise, its vlan id is 1877 * implicitly obtained from the port-vlan-id of the vnet device. 1878 * The vlan id determined is returned in vidp. 1879 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1880 */ 1881 static boolean_t 1882 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp) 1883 { 1884 struct ether_vlan_header *evhp; 1885 1886 /* If it's a tagged frame, get the vlan id from vlan header */ 1887 if (ehp->ether_type == ETHERTYPE_VLAN) { 1888 1889 evhp = (struct ether_vlan_header *)ehp; 1890 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1891 return (B_TRUE); 1892 } 1893 1894 /* Untagged frame, vlan-id is the pvid of vnet device */ 1895 *vidp = vnetp->pvid; 1896 return (B_FALSE); 1897 } 1898 1899 /* 1900 * Find the given vlan id in the hash table. 1901 * Return: B_TRUE if the id is found; B_FALSE if not found. 1902 */ 1903 static boolean_t 1904 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1905 { 1906 int rv; 1907 mod_hash_val_t vp; 1908 1909 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1910 1911 if (rv != 0) 1912 return (B_FALSE); 1913 1914 return (B_TRUE); 1915 } 1916 1917 /* 1918 * This function reads "priority-ether-types" property from md. This property 1919 * is used to enable support for priority frames. Applications which need 1920 * guaranteed and timely delivery of certain high priority frames to/from 1921 * a vnet or vsw within ldoms, should configure this property by providing 1922 * the ether type(s) for which the priority facility is needed. 1923 * Normal data frames are delivered over a ldc channel using the descriptor 1924 * ring mechanism which is constrained by factors such as descriptor ring size, 1925 * the rate at which the ring is processed at the peer ldc end point, etc. 1926 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1927 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1928 * descriptor ring path and enables a more reliable and timely delivery of 1929 * frames to the peer. 1930 */ 1931 static void 1932 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node) 1933 { 1934 int rv; 1935 uint16_t *types; 1936 uint64_t *data; 1937 int size; 1938 int i; 1939 size_t mblk_sz; 1940 1941 rv = md_get_prop_data(mdp, node, pri_types_propname, 1942 (uint8_t **)&data, &size); 1943 if (rv != 0) { 1944 /* 1945 * Property may not exist if we are running pre-ldoms1.1 f/w. 1946 * Check if 'vgen_pri_eth_type' has been set in that case. 1947 */ 1948 if (vgen_pri_eth_type != 0) { 1949 size = sizeof (vgen_pri_eth_type); 1950 data = &vgen_pri_eth_type; 1951 } else { 1952 DBG2(vgenp, NULL, 1953 "prop(%s) not found", pri_types_propname); 1954 size = 0; 1955 } 1956 } 1957 1958 if (size == 0) { 1959 vgenp->pri_num_types = 0; 1960 return; 1961 } 1962 1963 /* 1964 * we have some priority-ether-types defined; 1965 * allocate a table of these types and also 1966 * allocate a pool of mblks to transmit these 1967 * priority packets. 1968 */ 1969 size /= sizeof (uint64_t); 1970 vgenp->pri_num_types = size; 1971 vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1972 for (i = 0, types = vgenp->pri_types; i < size; i++) { 1973 types[i] = data[i] & 0xFFFF; 1974 } 1975 mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7; 1976 (void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, 1977 &vgenp->pri_tx_vmp); 1978 } 1979 1980 static void 1981 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1982 { 1983 int rv; 1984 uint64_t val; 1985 char *mtu_propname; 1986 1987 mtu_propname = vgen_mtu_propname; 1988 1989 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 1990 if (rv != 0) { 1991 DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname); 1992 *mtu = vnet_ethermtu; 1993 } else { 1994 1995 *mtu = val & 0xFFFF; 1996 DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname, 1997 vgenp->instance, *mtu); 1998 } 1999 } 2000 2001 static void 2002 vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, 2003 boolean_t *pls) 2004 { 2005 int rv; 2006 uint64_t val; 2007 char *linkpropname; 2008 2009 linkpropname = vgen_linkprop_propname; 2010 2011 rv = md_get_prop_val(mdp, node, linkpropname, &val); 2012 if (rv != 0) { 2013 DWARN(vgenp, NULL, "prop(%s) not found", linkpropname); 2014 *pls = B_FALSE; 2015 } else { 2016 2017 *pls = (val & 0x1) ? B_TRUE : B_FALSE; 2018 DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname, 2019 vgenp->instance, *pls); 2020 } 2021 } 2022 2023 /* register with MD event generator */ 2024 static int 2025 vgen_mdeg_reg(vgen_t *vgenp) 2026 { 2027 mdeg_prop_spec_t *pspecp; 2028 mdeg_node_spec_t *parentp; 2029 uint_t templatesz; 2030 int rv; 2031 mdeg_handle_t dev_hdl = NULL; 2032 mdeg_handle_t port_hdl = NULL; 2033 2034 templatesz = sizeof (vgen_prop_template); 2035 pspecp = kmem_zalloc(templatesz, KM_NOSLEEP); 2036 if (pspecp == NULL) { 2037 return (DDI_FAILURE); 2038 } 2039 parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP); 2040 if (parentp == NULL) { 2041 kmem_free(pspecp, templatesz); 2042 return (DDI_FAILURE); 2043 } 2044 2045 bcopy(vgen_prop_template, pspecp, templatesz); 2046 2047 /* 2048 * NOTE: The instance here refers to the value of "reg" property and 2049 * not the dev_info instance (ddi_get_instance()) of vnet. 2050 */ 2051 VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop); 2052 2053 parentp->namep = "virtual-device"; 2054 parentp->specp = pspecp; 2055 2056 /* save parentp in vgen_t */ 2057 vgenp->mdeg_parentp = parentp; 2058 2059 /* 2060 * Register an interest in 'virtual-device' nodes with a 2061 * 'name' property of 'network' 2062 */ 2063 rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl); 2064 if (rv != MDEG_SUCCESS) { 2065 DERR(vgenp, NULL, "mdeg_register failed\n"); 2066 goto mdeg_reg_fail; 2067 } 2068 2069 /* Register an interest in 'port' nodes */ 2070 rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp, 2071 &port_hdl); 2072 if (rv != MDEG_SUCCESS) { 2073 DERR(vgenp, NULL, "mdeg_register failed\n"); 2074 goto mdeg_reg_fail; 2075 } 2076 2077 /* save mdeg handle in vgen_t */ 2078 vgenp->mdeg_dev_hdl = dev_hdl; 2079 vgenp->mdeg_port_hdl = port_hdl; 2080 2081 return (DDI_SUCCESS); 2082 2083 mdeg_reg_fail: 2084 if (dev_hdl != NULL) { 2085 (void) mdeg_unregister(dev_hdl); 2086 } 2087 KMEM_FREE(parentp); 2088 kmem_free(pspecp, templatesz); 2089 vgenp->mdeg_parentp = NULL; 2090 return (DDI_FAILURE); 2091 } 2092 2093 /* unregister with MD event generator */ 2094 static void 2095 vgen_mdeg_unreg(vgen_t *vgenp) 2096 { 2097 (void) mdeg_unregister(vgenp->mdeg_dev_hdl); 2098 (void) mdeg_unregister(vgenp->mdeg_port_hdl); 2099 kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template)); 2100 KMEM_FREE(vgenp->mdeg_parentp); 2101 vgenp->mdeg_parentp = NULL; 2102 vgenp->mdeg_dev_hdl = NULL; 2103 vgenp->mdeg_port_hdl = NULL; 2104 } 2105 2106 /* mdeg callback function for the port node */ 2107 static int 2108 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp) 2109 { 2110 int idx; 2111 int vsw_idx = -1; 2112 uint64_t val; 2113 vgen_t *vgenp; 2114 2115 if ((resp == NULL) || (cb_argp == NULL)) { 2116 return (MDEG_FAILURE); 2117 } 2118 2119 vgenp = (vgen_t *)cb_argp; 2120 DBG1(vgenp, NULL, "enter\n"); 2121 2122 mutex_enter(&vgenp->lock); 2123 2124 DBG1(vgenp, NULL, "ports: removed(%x), " 2125 "added(%x), updated(%x)\n", resp->removed.nelem, 2126 resp->added.nelem, resp->match_curr.nelem); 2127 2128 for (idx = 0; idx < resp->removed.nelem; idx++) { 2129 (void) vgen_remove_port(vgenp, resp->removed.mdp, 2130 resp->removed.mdep[idx]); 2131 } 2132 2133 if (vgenp->vsw_portp == NULL) { 2134 /* 2135 * find vsw_port and add it first, because other ports need 2136 * this when adding fdb entry (see vgen_port_init()). 2137 */ 2138 for (idx = 0; idx < resp->added.nelem; idx++) { 2139 if (!(md_get_prop_val(resp->added.mdp, 2140 resp->added.mdep[idx], swport_propname, &val))) { 2141 if (val == 0) { 2142 /* 2143 * This port is connected to the 2144 * vsw on service domain. 2145 */ 2146 vsw_idx = idx; 2147 if (vgen_add_port(vgenp, 2148 resp->added.mdp, 2149 resp->added.mdep[idx]) != 2150 DDI_SUCCESS) { 2151 cmn_err(CE_NOTE, "vnet%d Could " 2152 "not initialize virtual " 2153 "switch port.", 2154 vgenp->instance); 2155 mutex_exit(&vgenp->lock); 2156 return (MDEG_FAILURE); 2157 } 2158 break; 2159 } 2160 } 2161 } 2162 if (vsw_idx == -1) { 2163 DWARN(vgenp, NULL, "can't find vsw_port\n"); 2164 mutex_exit(&vgenp->lock); 2165 return (MDEG_FAILURE); 2166 } 2167 } 2168 2169 for (idx = 0; idx < resp->added.nelem; idx++) { 2170 if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */ 2171 continue; 2172 2173 /* If this port can't be added just skip it. */ 2174 (void) vgen_add_port(vgenp, resp->added.mdp, 2175 resp->added.mdep[idx]); 2176 } 2177 2178 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 2179 (void) vgen_update_port(vgenp, resp->match_curr.mdp, 2180 resp->match_curr.mdep[idx], 2181 resp->match_prev.mdp, 2182 resp->match_prev.mdep[idx]); 2183 } 2184 2185 mutex_exit(&vgenp->lock); 2186 DBG1(vgenp, NULL, "exit\n"); 2187 return (MDEG_SUCCESS); 2188 } 2189 2190 /* mdeg callback function for the vnet node */ 2191 static int 2192 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2193 { 2194 vgen_t *vgenp; 2195 vnet_t *vnetp; 2196 md_t *mdp; 2197 mde_cookie_t node; 2198 uint64_t inst; 2199 char *node_name = NULL; 2200 2201 if ((resp == NULL) || (cb_argp == NULL)) { 2202 return (MDEG_FAILURE); 2203 } 2204 2205 vgenp = (vgen_t *)cb_argp; 2206 vnetp = vgenp->vnetp; 2207 2208 DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d" 2209 " : prev matched %d", resp->added.nelem, resp->removed.nelem, 2210 resp->match_curr.nelem, resp->match_prev.nelem); 2211 2212 mutex_enter(&vgenp->lock); 2213 2214 /* 2215 * We get an initial callback for this node as 'added' after 2216 * registering with mdeg. Note that we would have already gathered 2217 * information about this vnet node by walking MD earlier during attach 2218 * (in vgen_read_mdprops()). So, there is a window where the properties 2219 * of this node might have changed when we get this initial 'added' 2220 * callback. We handle this as if an update occured and invoke the same 2221 * function which handles updates to the properties of this vnet-node 2222 * if any. A non-zero 'match' value indicates that the MD has been 2223 * updated and that a 'network' node is present which may or may not 2224 * have been updated. It is up to the clients to examine their own 2225 * nodes and determine if they have changed. 2226 */ 2227 if (resp->added.nelem != 0) { 2228 2229 if (resp->added.nelem != 1) { 2230 cmn_err(CE_NOTE, "!vnet%d: number of nodes added " 2231 "invalid: %d\n", vnetp->instance, 2232 resp->added.nelem); 2233 goto vgen_mdeg_cb_err; 2234 } 2235 2236 mdp = resp->added.mdp; 2237 node = resp->added.mdep[0]; 2238 2239 } else if (resp->match_curr.nelem != 0) { 2240 2241 if (resp->match_curr.nelem != 1) { 2242 cmn_err(CE_NOTE, "!vnet%d: number of nodes updated " 2243 "invalid: %d\n", vnetp->instance, 2244 resp->match_curr.nelem); 2245 goto vgen_mdeg_cb_err; 2246 } 2247 2248 mdp = resp->match_curr.mdp; 2249 node = resp->match_curr.mdep[0]; 2250 2251 } else { 2252 goto vgen_mdeg_cb_err; 2253 } 2254 2255 /* Validate name and instance */ 2256 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 2257 DERR(vgenp, NULL, "unable to get node name\n"); 2258 goto vgen_mdeg_cb_err; 2259 } 2260 2261 /* is this a virtual-network device? */ 2262 if (strcmp(node_name, vnet_propname) != 0) { 2263 DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name); 2264 goto vgen_mdeg_cb_err; 2265 } 2266 2267 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 2268 DERR(vgenp, NULL, "prop(cfg-handle) not found\n"); 2269 goto vgen_mdeg_cb_err; 2270 } 2271 2272 /* is this the right instance of vnet? */ 2273 if (inst != vgenp->regprop) { 2274 DERR(vgenp, NULL, "Invalid cfg-handle: %lx\n", inst); 2275 goto vgen_mdeg_cb_err; 2276 } 2277 2278 vgen_update_md_prop(vgenp, mdp, node); 2279 2280 mutex_exit(&vgenp->lock); 2281 return (MDEG_SUCCESS); 2282 2283 vgen_mdeg_cb_err: 2284 mutex_exit(&vgenp->lock); 2285 return (MDEG_FAILURE); 2286 } 2287 2288 /* 2289 * Check to see if the relevant properties in the specified node have 2290 * changed, and if so take the appropriate action. 2291 */ 2292 static void 2293 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) 2294 { 2295 uint16_t pvid; 2296 uint16_t *vids; 2297 uint16_t nvids; 2298 vnet_t *vnetp = vgenp->vnetp; 2299 uint32_t mtu; 2300 boolean_t pls_update; 2301 enum { MD_init = 0x1, 2302 MD_vlans = 0x2, 2303 MD_mtu = 0x4, 2304 MD_pls = 0x8 } updated; 2305 int rv; 2306 2307 updated = MD_init; 2308 2309 /* Read the vlan ids */ 2310 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids, 2311 &nvids, NULL); 2312 2313 /* Determine if there are any vlan id updates */ 2314 if ((pvid != vnetp->pvid) || /* pvid changed? */ 2315 (nvids != vnetp->nvids) || /* # of vids changed? */ 2316 ((nvids != 0) && (vnetp->nvids != 0) && /* vids changed? */ 2317 bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) { 2318 updated |= MD_vlans; 2319 } 2320 2321 /* Read mtu */ 2322 vgen_mtu_read(vgenp, mdp, mdex, &mtu); 2323 if (mtu != vnetp->mtu) { 2324 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2325 updated |= MD_mtu; 2326 } else { 2327 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update" 2328 " as the specified value:%d is invalid\n", 2329 vnetp->instance, mtu); 2330 } 2331 } 2332 2333 /* 2334 * Read the 'linkprop' property. 2335 */ 2336 vgen_linkprop_read(vgenp, mdp, mdex, &pls_update); 2337 if (pls_update != vnetp->pls_update) { 2338 updated |= MD_pls; 2339 } 2340 2341 /* Now process the updated props */ 2342 2343 if (updated & MD_vlans) { 2344 2345 /* save the new vlan ids */ 2346 vnetp->pvid = pvid; 2347 if (vnetp->nvids != 0) { 2348 kmem_free(vnetp->vids, 2349 sizeof (uint16_t) * vnetp->nvids); 2350 vnetp->nvids = 0; 2351 } 2352 if (nvids != 0) { 2353 vnetp->nvids = nvids; 2354 vnetp->vids = vids; 2355 } 2356 2357 /* reset vlan-unaware peers (ver < 1.3) and restart handshake */ 2358 vgen_reset_vlan_unaware_ports(vgenp); 2359 2360 } else { 2361 2362 if (nvids != 0) { 2363 kmem_free(vids, sizeof (uint16_t) * nvids); 2364 } 2365 } 2366 2367 if (updated & MD_mtu) { 2368 2369 DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n", 2370 vnetp->mtu, mtu); 2371 2372 rv = vnet_mtu_update(vnetp, mtu); 2373 if (rv == 0) { 2374 vgenp->max_frame_size = mtu + 2375 sizeof (struct ether_header) + VLAN_TAGSZ; 2376 } 2377 } 2378 2379 if (updated & MD_pls) { 2380 /* enable/disable physical link state updates */ 2381 vnetp->pls_update = pls_update; 2382 mutex_exit(&vgenp->lock); 2383 2384 /* reset vsw-port to re-negotiate with the updated prop. */ 2385 vgen_reset_vsw_port(vgenp); 2386 2387 mutex_enter(&vgenp->lock); 2388 } 2389 } 2390 2391 /* add a new port to the device */ 2392 static int 2393 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) 2394 { 2395 vgen_port_t *portp; 2396 int rv; 2397 2398 portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP); 2399 2400 rv = vgen_port_read_props(portp, vgenp, mdp, mdex); 2401 if (rv != DDI_SUCCESS) { 2402 KMEM_FREE(portp); 2403 return (DDI_FAILURE); 2404 } 2405 2406 rv = vgen_port_attach(portp); 2407 if (rv != DDI_SUCCESS) { 2408 return (DDI_FAILURE); 2409 } 2410 2411 return (DDI_SUCCESS); 2412 } 2413 2414 /* read properties of the port from its md node */ 2415 static int 2416 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp, 2417 mde_cookie_t mdex) 2418 { 2419 uint64_t port_num; 2420 uint64_t *ldc_ids; 2421 uint64_t macaddr; 2422 uint64_t val; 2423 int num_ldcs; 2424 int i; 2425 int addrsz; 2426 int num_nodes = 0; 2427 int listsz = 0; 2428 mde_cookie_t *listp = NULL; 2429 uint8_t *addrp; 2430 struct ether_addr ea; 2431 2432 /* read "id" property to get the port number */ 2433 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) { 2434 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2435 return (DDI_FAILURE); 2436 } 2437 2438 /* 2439 * Find the channel endpoint node(s) under this port node. 2440 */ 2441 if ((num_nodes = md_node_count(mdp)) <= 0) { 2442 DWARN(vgenp, NULL, "invalid number of nodes found (%d)", 2443 num_nodes); 2444 return (DDI_FAILURE); 2445 } 2446 2447 /* allocate space for node list */ 2448 listsz = num_nodes * sizeof (mde_cookie_t); 2449 listp = kmem_zalloc(listsz, KM_NOSLEEP); 2450 if (listp == NULL) 2451 return (DDI_FAILURE); 2452 2453 num_ldcs = md_scan_dag(mdp, mdex, 2454 md_find_name(mdp, channel_propname), 2455 md_find_name(mdp, "fwd"), listp); 2456 2457 if (num_ldcs <= 0) { 2458 DWARN(vgenp, NULL, "can't find %s nodes", channel_propname); 2459 kmem_free(listp, listsz); 2460 return (DDI_FAILURE); 2461 } 2462 2463 DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs); 2464 2465 ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP); 2466 if (ldc_ids == NULL) { 2467 kmem_free(listp, listsz); 2468 return (DDI_FAILURE); 2469 } 2470 2471 for (i = 0; i < num_ldcs; i++) { 2472 /* read channel ids */ 2473 if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) { 2474 DWARN(vgenp, NULL, "prop(%s) not found\n", 2475 id_propname); 2476 kmem_free(listp, listsz); 2477 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2478 return (DDI_FAILURE); 2479 } 2480 DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]); 2481 } 2482 2483 kmem_free(listp, listsz); 2484 2485 if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp, 2486 &addrsz)) { 2487 DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname); 2488 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2489 return (DDI_FAILURE); 2490 } 2491 2492 if (addrsz < ETHERADDRL) { 2493 DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz); 2494 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2495 return (DDI_FAILURE); 2496 } 2497 2498 macaddr = *((uint64_t *)addrp); 2499 2500 DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr); 2501 2502 for (i = ETHERADDRL - 1; i >= 0; i--) { 2503 ea.ether_addr_octet[i] = macaddr & 0xFF; 2504 macaddr >>= 8; 2505 } 2506 2507 if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) { 2508 if (val == 0) { 2509 /* This port is connected to the vswitch */ 2510 portp->is_vsw_port = B_TRUE; 2511 } else { 2512 portp->is_vsw_port = B_FALSE; 2513 } 2514 } 2515 2516 /* now update all properties into the port */ 2517 portp->vgenp = vgenp; 2518 portp->port_num = port_num; 2519 ether_copy(&ea, &portp->macaddr); 2520 portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP); 2521 bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs); 2522 portp->num_ldcs = num_ldcs; 2523 2524 /* read vlan id properties of this port node */ 2525 vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid, 2526 &portp->vids, &portp->nvids, NULL); 2527 2528 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2529 2530 return (DDI_SUCCESS); 2531 } 2532 2533 /* remove a port from the device */ 2534 static int 2535 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) 2536 { 2537 uint64_t port_num; 2538 vgen_port_t *portp; 2539 vgen_portlist_t *plistp; 2540 2541 /* read "id" property to get the port number */ 2542 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) { 2543 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2544 return (DDI_FAILURE); 2545 } 2546 2547 plistp = &(vgenp->vgenports); 2548 2549 WRITE_ENTER(&plistp->rwlock); 2550 portp = vgen_port_lookup(plistp, (int)port_num); 2551 if (portp == NULL) { 2552 DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num); 2553 RW_EXIT(&plistp->rwlock); 2554 return (DDI_FAILURE); 2555 } 2556 2557 vgen_port_detach_mdeg(portp); 2558 RW_EXIT(&plistp->rwlock); 2559 2560 return (DDI_SUCCESS); 2561 } 2562 2563 /* attach a port to the device based on mdeg data */ 2564 static int 2565 vgen_port_attach(vgen_port_t *portp) 2566 { 2567 int i; 2568 vgen_portlist_t *plistp; 2569 vgen_t *vgenp; 2570 uint64_t *ldcids; 2571 uint32_t num_ldcs; 2572 mac_register_t *macp; 2573 vio_net_res_type_t type; 2574 int rv; 2575 2576 ASSERT(portp != NULL); 2577 2578 vgenp = portp->vgenp; 2579 ldcids = portp->ldc_ids; 2580 num_ldcs = portp->num_ldcs; 2581 2582 DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num); 2583 2584 mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL); 2585 rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL); 2586 portp->ldclist.headp = NULL; 2587 2588 for (i = 0; i < num_ldcs; i++) { 2589 DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]); 2590 if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) { 2591 vgen_port_detach(portp); 2592 return (DDI_FAILURE); 2593 } 2594 } 2595 2596 /* create vlan id hash table */ 2597 vgen_vlan_create_hash(portp); 2598 2599 if (portp->is_vsw_port == B_TRUE) { 2600 /* This port is connected to the switch port */ 2601 (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE); 2602 type = VIO_NET_RES_LDC_SERVICE; 2603 } else { 2604 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE); 2605 type = VIO_NET_RES_LDC_GUEST; 2606 } 2607 2608 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2609 vgen_port_detach(portp); 2610 return (DDI_FAILURE); 2611 } 2612 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2613 macp->m_driver = portp; 2614 macp->m_dip = vgenp->vnetdip; 2615 macp->m_src_addr = (uint8_t *)&(vgenp->macaddr); 2616 macp->m_callbacks = &vgen_m_callbacks; 2617 macp->m_min_sdu = 0; 2618 macp->m_max_sdu = ETHERMTU; 2619 2620 mutex_enter(&portp->lock); 2621 rv = vio_net_resource_reg(macp, type, vgenp->macaddr, 2622 portp->macaddr, &portp->vhp, &portp->vcb); 2623 mutex_exit(&portp->lock); 2624 mac_free(macp); 2625 2626 if (rv == 0) { 2627 /* link it into the list of ports */ 2628 plistp = &(vgenp->vgenports); 2629 WRITE_ENTER(&plistp->rwlock); 2630 vgen_port_list_insert(portp); 2631 RW_EXIT(&plistp->rwlock); 2632 2633 if (portp->is_vsw_port == B_TRUE) { 2634 /* We now have the vswitch port attached */ 2635 vgenp->vsw_portp = portp; 2636 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0); 2637 } 2638 } else { 2639 DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p", 2640 portp); 2641 vgen_port_detach(portp); 2642 } 2643 2644 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num); 2645 return (DDI_SUCCESS); 2646 } 2647 2648 /* detach a port from the device based on mdeg data */ 2649 static void 2650 vgen_port_detach_mdeg(vgen_port_t *portp) 2651 { 2652 vgen_t *vgenp = portp->vgenp; 2653 2654 DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num); 2655 2656 mutex_enter(&portp->lock); 2657 2658 /* stop the port if needed */ 2659 if (portp->flags & VGEN_STARTED) { 2660 vgen_port_uninit(portp); 2661 } 2662 2663 mutex_exit(&portp->lock); 2664 vgen_port_detach(portp); 2665 2666 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num); 2667 } 2668 2669 static int 2670 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2671 md_t *prev_mdp, mde_cookie_t prev_mdex) 2672 { 2673 uint64_t cport_num; 2674 uint64_t pport_num; 2675 vgen_portlist_t *plistp; 2676 vgen_port_t *portp; 2677 boolean_t updated_vlans = B_FALSE; 2678 uint16_t pvid; 2679 uint16_t *vids; 2680 uint16_t nvids; 2681 2682 /* 2683 * For now, we get port updates only if vlan ids changed. 2684 * We read the port num and do some sanity check. 2685 */ 2686 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2687 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2688 return (DDI_FAILURE); 2689 } 2690 2691 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2692 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2693 return (DDI_FAILURE); 2694 } 2695 if (cport_num != pport_num) 2696 return (DDI_FAILURE); 2697 2698 plistp = &(vgenp->vgenports); 2699 2700 READ_ENTER(&plistp->rwlock); 2701 2702 portp = vgen_port_lookup(plistp, (int)cport_num); 2703 if (portp == NULL) { 2704 DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num); 2705 RW_EXIT(&plistp->rwlock); 2706 return (DDI_FAILURE); 2707 } 2708 2709 /* Read the vlan ids */ 2710 vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids, 2711 &nvids, NULL); 2712 2713 /* Determine if there are any vlan id updates */ 2714 if ((pvid != portp->pvid) || /* pvid changed? */ 2715 (nvids != portp->nvids) || /* # of vids changed? */ 2716 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2717 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2718 updated_vlans = B_TRUE; 2719 } 2720 2721 if (updated_vlans == B_FALSE) { 2722 RW_EXIT(&plistp->rwlock); 2723 return (DDI_FAILURE); 2724 } 2725 2726 /* remove the port from vlans it has been assigned to */ 2727 vgen_vlan_remove_ids(portp); 2728 2729 /* save the new vlan ids */ 2730 portp->pvid = pvid; 2731 if (portp->nvids != 0) { 2732 kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids); 2733 portp->nvids = 0; 2734 } 2735 if (nvids != 0) { 2736 portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP); 2737 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2738 portp->nvids = nvids; 2739 kmem_free(vids, sizeof (uint16_t) * nvids); 2740 } 2741 2742 /* add port to the new vlans */ 2743 vgen_vlan_add_ids(portp); 2744 2745 /* reset the port if it is vlan unaware (ver < 1.3) */ 2746 vgen_vlan_unaware_port_reset(portp); 2747 2748 RW_EXIT(&plistp->rwlock); 2749 2750 return (DDI_SUCCESS); 2751 } 2752 2753 static uint64_t 2754 vgen_port_stat(vgen_port_t *portp, uint_t stat) 2755 { 2756 vgen_ldclist_t *ldclp; 2757 vgen_ldc_t *ldcp; 2758 uint64_t val; 2759 2760 val = 0; 2761 ldclp = &portp->ldclist; 2762 2763 READ_ENTER(&ldclp->rwlock); 2764 for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) { 2765 val += vgen_ldc_stat(ldcp, stat); 2766 } 2767 RW_EXIT(&ldclp->rwlock); 2768 2769 return (val); 2770 } 2771 2772 /* allocate receive resources */ 2773 static int 2774 vgen_init_multipools(vgen_ldc_t *ldcp) 2775 { 2776 size_t data_sz; 2777 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 2778 int status; 2779 uint32_t sz1 = 0; 2780 uint32_t sz2 = 0; 2781 uint32_t sz3 = 0; 2782 uint32_t sz4 = 0; 2783 2784 /* 2785 * We round up the mtu specified to be a multiple of 2K. 2786 * We then create rx pools based on the rounded up size. 2787 */ 2788 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 2789 data_sz = VNET_ROUNDUP_2K(data_sz); 2790 2791 /* 2792 * If pool sizes are specified, use them. Note that the presence of 2793 * the first tunable will be used as a hint. 2794 */ 2795 if (vgen_rbufsz1 != 0) { 2796 2797 sz1 = vgen_rbufsz1; 2798 sz2 = vgen_rbufsz2; 2799 sz3 = vgen_rbufsz3; 2800 sz4 = vgen_rbufsz4; 2801 2802 if (sz4 == 0) { /* need 3 pools */ 2803 2804 ldcp->max_rxpool_size = sz3; 2805 status = vio_init_multipools(&ldcp->vmp, 2806 VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1, 2807 vgen_nrbufs2, vgen_nrbufs3); 2808 2809 } else { 2810 2811 ldcp->max_rxpool_size = sz4; 2812 status = vio_init_multipools(&ldcp->vmp, 2813 VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 2814 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, 2815 vgen_nrbufs4); 2816 } 2817 return (status); 2818 } 2819 2820 /* 2821 * Pool sizes are not specified. We select the pool sizes based on the 2822 * mtu if vnet_jumbo_rxpools is enabled. 2823 */ 2824 if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 2825 /* 2826 * Receive buffer pool allocation based on mtu is disabled. 2827 * Use the default mechanism of standard size pool allocation. 2828 */ 2829 sz1 = VGEN_DBLK_SZ_128; 2830 sz2 = VGEN_DBLK_SZ_256; 2831 sz3 = VGEN_DBLK_SZ_2048; 2832 ldcp->max_rxpool_size = sz3; 2833 2834 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS, 2835 sz1, sz2, sz3, 2836 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3); 2837 2838 return (status); 2839 } 2840 2841 switch (data_sz) { 2842 2843 case VNET_4K: 2844 2845 sz1 = VGEN_DBLK_SZ_128; 2846 sz2 = VGEN_DBLK_SZ_256; 2847 sz3 = VGEN_DBLK_SZ_2048; 2848 sz4 = sz3 << 1; /* 4K */ 2849 ldcp->max_rxpool_size = sz4; 2850 2851 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, 2852 sz1, sz2, sz3, sz4, 2853 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); 2854 break; 2855 2856 default: /* data_sz: 4K+ to 16K */ 2857 2858 sz1 = VGEN_DBLK_SZ_256; 2859 sz2 = VGEN_DBLK_SZ_2048; 2860 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 2861 sz4 = data_sz; /* Jumbo-size */ 2862 ldcp->max_rxpool_size = sz4; 2863 2864 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, 2865 sz1, sz2, sz3, sz4, 2866 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); 2867 break; 2868 2869 } 2870 2871 return (status); 2872 } 2873 2874 /* attach the channel corresponding to the given ldc_id to the port */ 2875 static int 2876 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id) 2877 { 2878 vgen_t *vgenp; 2879 vgen_ldclist_t *ldclp; 2880 vgen_ldc_t *ldcp, **prev_ldcp; 2881 ldc_attr_t attr; 2882 int status; 2883 ldc_status_t istatus; 2884 char kname[MAXNAMELEN]; 2885 int instance; 2886 enum {AST_init = 0x0, AST_ldc_alloc = 0x1, 2887 AST_mutex_init = 0x2, AST_ldc_init = 0x4, 2888 AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10, 2889 AST_create_rxmblks = 0x20, 2890 AST_create_rcv_thread = 0x40} attach_state; 2891 2892 attach_state = AST_init; 2893 vgenp = portp->vgenp; 2894 ldclp = &portp->ldclist; 2895 2896 ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP); 2897 if (ldcp == NULL) { 2898 goto ldc_attach_failed; 2899 } 2900 ldcp->ldc_id = ldc_id; 2901 ldcp->portp = portp; 2902 2903 attach_state |= AST_ldc_alloc; 2904 2905 mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL); 2906 mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL); 2907 mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL); 2908 mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL); 2909 mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL); 2910 2911 attach_state |= AST_mutex_init; 2912 2913 attr.devclass = LDC_DEV_NT; 2914 attr.instance = vgenp->instance; 2915 attr.mode = LDC_MODE_UNRELIABLE; 2916 attr.mtu = vnet_ldc_mtu; 2917 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 2918 if (status != 0) { 2919 DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status); 2920 goto ldc_attach_failed; 2921 } 2922 attach_state |= AST_ldc_init; 2923 2924 if (vgen_rcv_thread_enabled) { 2925 ldcp->rcv_thr_flags = 0; 2926 2927 mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL); 2928 cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL); 2929 ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 2930 vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 2931 2932 attach_state |= AST_create_rcv_thread; 2933 if (ldcp->rcv_thread == NULL) { 2934 DWARN(vgenp, ldcp, "Failed to create worker thread"); 2935 goto ldc_attach_failed; 2936 } 2937 } 2938 2939 status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp); 2940 if (status != 0) { 2941 DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n", 2942 status); 2943 goto ldc_attach_failed; 2944 } 2945 /* 2946 * allocate a message for ldc_read()s, big enough to hold ctrl and 2947 * data msgs, including raw data msgs used to recv priority frames. 2948 */ 2949 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size; 2950 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 2951 attach_state |= AST_ldc_reg_cb; 2952 2953 (void) ldc_status(ldcp->ldc_handle, &istatus); 2954 ASSERT(istatus == LDC_INIT); 2955 ldcp->ldc_status = istatus; 2956 2957 /* allocate transmit resources */ 2958 status = vgen_alloc_tx_ring(ldcp); 2959 if (status != 0) { 2960 goto ldc_attach_failed; 2961 } 2962 attach_state |= AST_alloc_tx_ring; 2963 2964 /* allocate receive resources */ 2965 status = vgen_init_multipools(ldcp); 2966 if (status != 0) { 2967 /* 2968 * We do not return failure if receive mblk pools can't be 2969 * allocated; instead allocb(9F) will be used to dynamically 2970 * allocate buffers during receive. 2971 */ 2972 DWARN(vgenp, ldcp, 2973 "vnet%d: status(%d), failed to allocate rx mblk pools for " 2974 "channel(0x%lx)\n", 2975 vgenp->instance, status, ldcp->ldc_id); 2976 } else { 2977 attach_state |= AST_create_rxmblks; 2978 } 2979 2980 /* Setup kstats for the channel */ 2981 instance = vgenp->instance; 2982 (void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id); 2983 ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats); 2984 if (ldcp->ksp == NULL) { 2985 goto ldc_attach_failed; 2986 } 2987 2988 /* initialize vgen_versions supported */ 2989 bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions)); 2990 vgen_reset_vnet_proto_ops(ldcp); 2991 2992 /* link it into the list of channels for this port */ 2993 WRITE_ENTER(&ldclp->rwlock); 2994 prev_ldcp = (vgen_ldc_t **)(&ldclp->headp); 2995 ldcp->nextp = *prev_ldcp; 2996 *prev_ldcp = ldcp; 2997 RW_EXIT(&ldclp->rwlock); 2998 2999 ldcp->link_state = LINK_STATE_UNKNOWN; 3000 #ifdef VNET_IOC_DEBUG 3001 ldcp->link_down_forced = B_FALSE; 3002 #endif 3003 ldcp->flags |= CHANNEL_ATTACHED; 3004 return (DDI_SUCCESS); 3005 3006 ldc_attach_failed: 3007 if (attach_state & AST_ldc_reg_cb) { 3008 (void) ldc_unreg_callback(ldcp->ldc_handle); 3009 kmem_free(ldcp->ldcmsg, ldcp->msglen); 3010 } 3011 if (attach_state & AST_create_rcv_thread) { 3012 if (ldcp->rcv_thread != NULL) { 3013 vgen_stop_rcv_thread(ldcp); 3014 } 3015 mutex_destroy(&ldcp->rcv_thr_lock); 3016 cv_destroy(&ldcp->rcv_thr_cv); 3017 } 3018 if (attach_state & AST_create_rxmblks) { 3019 vio_mblk_pool_t *fvmp = NULL; 3020 vio_destroy_multipools(&ldcp->vmp, &fvmp); 3021 ASSERT(fvmp == NULL); 3022 } 3023 if (attach_state & AST_alloc_tx_ring) { 3024 vgen_free_tx_ring(ldcp); 3025 } 3026 if (attach_state & AST_ldc_init) { 3027 (void) ldc_fini(ldcp->ldc_handle); 3028 } 3029 if (attach_state & AST_mutex_init) { 3030 mutex_destroy(&ldcp->tclock); 3031 mutex_destroy(&ldcp->txlock); 3032 mutex_destroy(&ldcp->cblock); 3033 mutex_destroy(&ldcp->wrlock); 3034 mutex_destroy(&ldcp->rxlock); 3035 } 3036 if (attach_state & AST_ldc_alloc) { 3037 KMEM_FREE(ldcp); 3038 } 3039 return (DDI_FAILURE); 3040 } 3041 3042 /* detach a channel from the port */ 3043 static void 3044 vgen_ldc_detach(vgen_ldc_t *ldcp) 3045 { 3046 vgen_port_t *portp; 3047 vgen_t *vgenp; 3048 vgen_ldc_t *pldcp; 3049 vgen_ldc_t **prev_ldcp; 3050 vgen_ldclist_t *ldclp; 3051 3052 portp = ldcp->portp; 3053 vgenp = portp->vgenp; 3054 ldclp = &portp->ldclist; 3055 3056 prev_ldcp = (vgen_ldc_t **)&ldclp->headp; 3057 for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) { 3058 if (pldcp == ldcp) { 3059 break; 3060 } 3061 } 3062 3063 if (pldcp == NULL) { 3064 /* invalid ldcp? */ 3065 return; 3066 } 3067 3068 if (ldcp->ldc_status != LDC_INIT) { 3069 DWARN(vgenp, ldcp, "ldc_status is not INIT\n"); 3070 } 3071 3072 if (ldcp->flags & CHANNEL_ATTACHED) { 3073 ldcp->flags &= ~(CHANNEL_ATTACHED); 3074 3075 (void) ldc_unreg_callback(ldcp->ldc_handle); 3076 if (ldcp->rcv_thread != NULL) { 3077 /* First stop the receive thread */ 3078 vgen_stop_rcv_thread(ldcp); 3079 mutex_destroy(&ldcp->rcv_thr_lock); 3080 cv_destroy(&ldcp->rcv_thr_cv); 3081 } 3082 kmem_free(ldcp->ldcmsg, ldcp->msglen); 3083 3084 vgen_destroy_kstats(ldcp->ksp); 3085 ldcp->ksp = NULL; 3086 3087 /* 3088 * if we cannot reclaim all mblks, put this 3089 * on the list of pools(vgenp->rmp) to be reclaimed when the 3090 * device gets detached (see vgen_uninit()). 3091 */ 3092 vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp); 3093 3094 /* free transmit resources */ 3095 vgen_free_tx_ring(ldcp); 3096 3097 (void) ldc_fini(ldcp->ldc_handle); 3098 mutex_destroy(&ldcp->tclock); 3099 mutex_destroy(&ldcp->txlock); 3100 mutex_destroy(&ldcp->cblock); 3101 mutex_destroy(&ldcp->wrlock); 3102 mutex_destroy(&ldcp->rxlock); 3103 3104 /* unlink it from the list */ 3105 *prev_ldcp = ldcp->nextp; 3106 KMEM_FREE(ldcp); 3107 } 3108 } 3109 3110 /* 3111 * This function allocates transmit resources for the channel. 3112 * The resources consist of a transmit descriptor ring and an associated 3113 * transmit buffer ring. 3114 */ 3115 static int 3116 vgen_alloc_tx_ring(vgen_ldc_t *ldcp) 3117 { 3118 void *tbufp; 3119 ldc_mem_info_t minfo; 3120 uint32_t txdsize; 3121 uint32_t tbufsize; 3122 int status; 3123 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3124 3125 ldcp->num_txds = vnet_ntxds; 3126 txdsize = sizeof (vnet_public_desc_t); 3127 tbufsize = sizeof (vgen_private_desc_t); 3128 3129 /* allocate transmit buffer ring */ 3130 tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP); 3131 if (tbufp == NULL) { 3132 return (DDI_FAILURE); 3133 } 3134 3135 /* create transmit descriptor ring */ 3136 status = ldc_mem_dring_create(ldcp->num_txds, txdsize, 3137 &ldcp->tx_dhandle); 3138 if (status) { 3139 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); 3140 kmem_free(tbufp, ldcp->num_txds * tbufsize); 3141 return (DDI_FAILURE); 3142 } 3143 3144 /* get the addr of descripror ring */ 3145 status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo); 3146 if (status) { 3147 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); 3148 kmem_free(tbufp, ldcp->num_txds * tbufsize); 3149 (void) ldc_mem_dring_destroy(ldcp->tx_dhandle); 3150 ldcp->tbufp = NULL; 3151 return (DDI_FAILURE); 3152 } 3153 ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr); 3154 ldcp->tbufp = tbufp; 3155 3156 ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]); 3157 ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]); 3158 3159 return (DDI_SUCCESS); 3160 } 3161 3162 /* Free transmit resources for the channel */ 3163 static void 3164 vgen_free_tx_ring(vgen_ldc_t *ldcp) 3165 { 3166 int tbufsize = sizeof (vgen_private_desc_t); 3167 3168 /* free transmit descriptor ring */ 3169 (void) ldc_mem_dring_destroy(ldcp->tx_dhandle); 3170 3171 /* free transmit buffer ring */ 3172 kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize); 3173 ldcp->txdp = ldcp->txdendp = NULL; 3174 ldcp->tbufp = ldcp->tbufendp = NULL; 3175 } 3176 3177 /* enable transmit/receive on the channels for the port */ 3178 static void 3179 vgen_init_ldcs(vgen_port_t *portp) 3180 { 3181 vgen_ldclist_t *ldclp = &portp->ldclist; 3182 vgen_ldc_t *ldcp; 3183 3184 READ_ENTER(&ldclp->rwlock); 3185 ldcp = ldclp->headp; 3186 for (; ldcp != NULL; ldcp = ldcp->nextp) { 3187 (void) vgen_ldc_init(ldcp); 3188 } 3189 RW_EXIT(&ldclp->rwlock); 3190 } 3191 3192 /* stop transmit/receive on the channels for the port */ 3193 static void 3194 vgen_uninit_ldcs(vgen_port_t *portp) 3195 { 3196 vgen_ldclist_t *ldclp = &portp->ldclist; 3197 vgen_ldc_t *ldcp; 3198 3199 READ_ENTER(&ldclp->rwlock); 3200 ldcp = ldclp->headp; 3201 for (; ldcp != NULL; ldcp = ldcp->nextp) { 3202 vgen_ldc_uninit(ldcp); 3203 } 3204 RW_EXIT(&ldclp->rwlock); 3205 } 3206 3207 /* enable transmit/receive on the channel */ 3208 static int 3209 vgen_ldc_init(vgen_ldc_t *ldcp) 3210 { 3211 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3212 ldc_status_t istatus; 3213 int rv; 3214 uint32_t retries = 0; 3215 enum { ST_init = 0x0, ST_ldc_open = 0x1, 3216 ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state; 3217 init_state = ST_init; 3218 3219 DBG1(vgenp, ldcp, "enter\n"); 3220 LDC_LOCK(ldcp); 3221 3222 rv = ldc_open(ldcp->ldc_handle); 3223 if (rv != 0) { 3224 DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv); 3225 goto ldcinit_failed; 3226 } 3227 init_state |= ST_ldc_open; 3228 3229 (void) ldc_status(ldcp->ldc_handle, &istatus); 3230 if (istatus != LDC_OPEN && istatus != LDC_READY) { 3231 DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus); 3232 goto ldcinit_failed; 3233 } 3234 ldcp->ldc_status = istatus; 3235 3236 rv = vgen_init_tbufs(ldcp); 3237 if (rv != 0) { 3238 DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n"); 3239 goto ldcinit_failed; 3240 } 3241 init_state |= ST_init_tbufs; 3242 3243 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE); 3244 if (rv != 0) { 3245 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv); 3246 goto ldcinit_failed; 3247 } 3248 3249 init_state |= ST_cb_enable; 3250 3251 do { 3252 rv = ldc_up(ldcp->ldc_handle); 3253 if ((rv != 0) && (rv == EWOULDBLOCK)) { 3254 DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 3255 drv_usecwait(VGEN_LDC_UP_DELAY); 3256 } 3257 if (retries++ >= vgen_ldcup_retries) 3258 break; 3259 } while (rv == EWOULDBLOCK); 3260 3261 (void) ldc_status(ldcp->ldc_handle, &istatus); 3262 if (istatus == LDC_UP) { 3263 DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus); 3264 } 3265 3266 ldcp->ldc_status = istatus; 3267 3268 /* initialize transmit watchdog timeout */ 3269 ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp, 3270 drv_usectohz(vnet_ldcwd_interval * 1000)); 3271 3272 ldcp->hphase = -1; 3273 ldcp->flags |= CHANNEL_STARTED; 3274 3275 /* if channel is already UP - start handshake */ 3276 if (istatus == LDC_UP) { 3277 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3278 if (ldcp->portp != vgenp->vsw_portp) { 3279 /* 3280 * As the channel is up, use this port from now on. 3281 */ 3282 (void) atomic_swap_32( 3283 &ldcp->portp->use_vsw_port, B_FALSE); 3284 } 3285 3286 /* Initialize local session id */ 3287 ldcp->local_sid = ddi_get_lbolt(); 3288 3289 /* clear peer session id */ 3290 ldcp->peer_sid = 0; 3291 ldcp->hretries = 0; 3292 3293 /* Initiate Handshake process with peer ldc endpoint */ 3294 vgen_reset_hphase(ldcp); 3295 3296 mutex_exit(&ldcp->tclock); 3297 mutex_exit(&ldcp->txlock); 3298 mutex_exit(&ldcp->wrlock); 3299 mutex_exit(&ldcp->rxlock); 3300 vgen_handshake(vh_nextphase(ldcp)); 3301 mutex_exit(&ldcp->cblock); 3302 } else { 3303 LDC_UNLOCK(ldcp); 3304 } 3305 3306 return (DDI_SUCCESS); 3307 3308 ldcinit_failed: 3309 if (init_state & ST_cb_enable) { 3310 (void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3311 } 3312 if (init_state & ST_init_tbufs) { 3313 vgen_uninit_tbufs(ldcp); 3314 } 3315 if (init_state & ST_ldc_open) { 3316 (void) ldc_close(ldcp->ldc_handle); 3317 } 3318 LDC_UNLOCK(ldcp); 3319 DBG1(vgenp, ldcp, "exit\n"); 3320 return (DDI_FAILURE); 3321 } 3322 3323 /* stop transmit/receive on the channel */ 3324 static void 3325 vgen_ldc_uninit(vgen_ldc_t *ldcp) 3326 { 3327 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3328 int rv; 3329 uint_t retries = 0; 3330 3331 DBG1(vgenp, ldcp, "enter\n"); 3332 LDC_LOCK(ldcp); 3333 3334 if ((ldcp->flags & CHANNEL_STARTED) == 0) { 3335 LDC_UNLOCK(ldcp); 3336 DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n"); 3337 return; 3338 } 3339 3340 /* disable further callbacks */ 3341 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3342 if (rv != 0) { 3343 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n"); 3344 } 3345 3346 /* 3347 * clear handshake done bit and wait for pending tx and cb to finish. 3348 * release locks before untimeout(9F) is invoked to cancel timeouts. 3349 */ 3350 ldcp->hphase &= ~(VH_DONE); 3351 LDC_UNLOCK(ldcp); 3352 3353 if (vgenp->vsw_portp == ldcp->portp) { 3354 vio_net_report_err_t rep_err = 3355 ldcp->portp->vcb.vio_net_report_err; 3356 rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN); 3357 } 3358 3359 /* cancel handshake watchdog timeout */ 3360 if (ldcp->htid) { 3361 (void) untimeout(ldcp->htid); 3362 ldcp->htid = 0; 3363 } 3364 3365 if (ldcp->cancel_htid) { 3366 (void) untimeout(ldcp->cancel_htid); 3367 ldcp->cancel_htid = 0; 3368 } 3369 3370 /* cancel transmit watchdog timeout */ 3371 if (ldcp->wd_tid) { 3372 (void) untimeout(ldcp->wd_tid); 3373 ldcp->wd_tid = 0; 3374 } 3375 3376 drv_usecwait(1000); 3377 3378 if (ldcp->rcv_thread != NULL) { 3379 /* 3380 * Note that callbacks have been disabled already(above). The 3381 * drain function takes care of the condition when an already 3382 * executing callback signals the worker to start processing or 3383 * the worker has already been signalled and is in the middle of 3384 * processing. 3385 */ 3386 vgen_drain_rcv_thread(ldcp); 3387 } 3388 3389 /* acquire locks again; any pending transmits and callbacks are done */ 3390 LDC_LOCK(ldcp); 3391 3392 vgen_reset_hphase(ldcp); 3393 3394 vgen_uninit_tbufs(ldcp); 3395 3396 /* close the channel - retry on EAGAIN */ 3397 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 3398 if (++retries > vgen_ldccl_retries) { 3399 break; 3400 } 3401 drv_usecwait(VGEN_LDC_CLOSE_DELAY); 3402 } 3403 if (rv != 0) { 3404 cmn_err(CE_NOTE, 3405 "!vnet%d: Error(%d) closing the channel(0x%lx)\n", 3406 vgenp->instance, rv, ldcp->ldc_id); 3407 } 3408 3409 ldcp->ldc_status = LDC_INIT; 3410 ldcp->flags &= ~(CHANNEL_STARTED); 3411 3412 LDC_UNLOCK(ldcp); 3413 3414 DBG1(vgenp, ldcp, "exit\n"); 3415 } 3416 3417 /* Initialize the transmit buffer ring for the channel */ 3418 static int 3419 vgen_init_tbufs(vgen_ldc_t *ldcp) 3420 { 3421 vgen_private_desc_t *tbufp; 3422 vnet_public_desc_t *txdp; 3423 vio_dring_entry_hdr_t *hdrp; 3424 int i; 3425 int rv; 3426 caddr_t datap = NULL; 3427 int ci; 3428 uint32_t ncookies; 3429 size_t data_sz; 3430 vgen_t *vgenp; 3431 3432 vgenp = LDC_TO_VGEN(ldcp); 3433 3434 bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds)); 3435 bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds)); 3436 3437 /* 3438 * In order to ensure that the number of ldc cookies per descriptor is 3439 * limited to be within the default MAX_COOKIES (2), we take the steps 3440 * outlined below: 3441 * 3442 * Align the entire data buffer area to 8K and carve out per descriptor 3443 * data buffers starting from this 8K aligned base address. 3444 * 3445 * We round up the mtu specified to be a multiple of 2K or 4K. 3446 * For sizes up to 12K we round up the size to the next 2K. 3447 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 3448 * 14K could end up needing 3 cookies, with the buffer spread across 3449 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 3450 */ 3451 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 3452 if (data_sz <= VNET_12K) { 3453 data_sz = VNET_ROUNDUP_2K(data_sz); 3454 } else { 3455 data_sz = VNET_ROUNDUP_4K(data_sz); 3456 } 3457 3458 /* allocate extra 8K bytes for alignment */ 3459 ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K; 3460 datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP); 3461 ldcp->tx_datap = datap; 3462 3463 3464 /* align the starting address of the data area to 8K */ 3465 datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap); 3466 3467 /* 3468 * for each private descriptor, allocate a ldc mem_handle which is 3469 * required to map the data during transmit, set the flags 3470 * to free (available for use by transmit routine). 3471 */ 3472 3473 for (i = 0; i < ldcp->num_txds; i++) { 3474 3475 tbufp = &(ldcp->tbufp[i]); 3476 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, 3477 &(tbufp->memhandle)); 3478 if (rv) { 3479 tbufp->memhandle = 0; 3480 goto init_tbufs_failed; 3481 } 3482 3483 /* 3484 * bind ldc memhandle to the corresponding transmit buffer. 3485 */ 3486 ci = ncookies = 0; 3487 rv = ldc_mem_bind_handle(tbufp->memhandle, 3488 (caddr_t)datap, data_sz, LDC_SHADOW_MAP, 3489 LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies); 3490 if (rv != 0) { 3491 goto init_tbufs_failed; 3492 } 3493 3494 /* 3495 * successful in binding the handle to tx data buffer. 3496 * set datap in the private descr to this buffer. 3497 */ 3498 tbufp->datap = datap; 3499 3500 if ((ncookies == 0) || 3501 (ncookies > MAX_COOKIES)) { 3502 goto init_tbufs_failed; 3503 } 3504 3505 for (ci = 1; ci < ncookies; ci++) { 3506 rv = ldc_mem_nextcookie(tbufp->memhandle, 3507 &(tbufp->memcookie[ci])); 3508 if (rv != 0) { 3509 goto init_tbufs_failed; 3510 } 3511 } 3512 3513 tbufp->ncookies = ncookies; 3514 datap += data_sz; 3515 3516 tbufp->flags = VGEN_PRIV_DESC_FREE; 3517 txdp = &(ldcp->txdp[i]); 3518 hdrp = &txdp->hdr; 3519 hdrp->dstate = VIO_DESC_FREE; 3520 hdrp->ack = B_FALSE; 3521 tbufp->descp = txdp; 3522 3523 } 3524 3525 /* reset tbuf walking pointers */ 3526 ldcp->next_tbufp = ldcp->tbufp; 3527 ldcp->cur_tbufp = ldcp->tbufp; 3528 3529 /* initialize tx seqnum and index */ 3530 ldcp->next_txseq = VNET_ISS; 3531 ldcp->next_txi = 0; 3532 3533 ldcp->resched_peer = B_TRUE; 3534 ldcp->resched_peer_txi = 0; 3535 3536 return (DDI_SUCCESS); 3537 3538 init_tbufs_failed:; 3539 vgen_uninit_tbufs(ldcp); 3540 return (DDI_FAILURE); 3541 } 3542 3543 /* Uninitialize transmit buffer ring for the channel */ 3544 static void 3545 vgen_uninit_tbufs(vgen_ldc_t *ldcp) 3546 { 3547 vgen_private_desc_t *tbufp = ldcp->tbufp; 3548 int i; 3549 3550 /* for each tbuf (priv_desc), free ldc mem_handle */ 3551 for (i = 0; i < ldcp->num_txds; i++) { 3552 3553 tbufp = &(ldcp->tbufp[i]); 3554 3555 if (tbufp->datap) { /* if bound to a ldc memhandle */ 3556 (void) ldc_mem_unbind_handle(tbufp->memhandle); 3557 tbufp->datap = NULL; 3558 } 3559 if (tbufp->memhandle) { 3560 (void) ldc_mem_free_handle(tbufp->memhandle); 3561 tbufp->memhandle = 0; 3562 } 3563 } 3564 3565 if (ldcp->tx_datap) { 3566 /* prealloc'd tx data buffer */ 3567 kmem_free(ldcp->tx_datap, ldcp->tx_data_sz); 3568 ldcp->tx_datap = NULL; 3569 ldcp->tx_data_sz = 0; 3570 } 3571 3572 bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds)); 3573 bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds)); 3574 } 3575 3576 /* clobber tx descriptor ring */ 3577 static void 3578 vgen_clobber_tbufs(vgen_ldc_t *ldcp) 3579 { 3580 vnet_public_desc_t *txdp; 3581 vgen_private_desc_t *tbufp; 3582 vio_dring_entry_hdr_t *hdrp; 3583 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3584 int i; 3585 #ifdef DEBUG 3586 int ndone = 0; 3587 #endif 3588 3589 for (i = 0; i < ldcp->num_txds; i++) { 3590 3591 tbufp = &(ldcp->tbufp[i]); 3592 txdp = tbufp->descp; 3593 hdrp = &txdp->hdr; 3594 3595 if (tbufp->flags & VGEN_PRIV_DESC_BUSY) { 3596 tbufp->flags = VGEN_PRIV_DESC_FREE; 3597 #ifdef DEBUG 3598 if (hdrp->dstate == VIO_DESC_DONE) 3599 ndone++; 3600 #endif 3601 hdrp->dstate = VIO_DESC_FREE; 3602 hdrp->ack = B_FALSE; 3603 } 3604 } 3605 /* reset tbuf walking pointers */ 3606 ldcp->next_tbufp = ldcp->tbufp; 3607 ldcp->cur_tbufp = ldcp->tbufp; 3608 3609 /* reset tx seqnum and index */ 3610 ldcp->next_txseq = VNET_ISS; 3611 ldcp->next_txi = 0; 3612 3613 ldcp->resched_peer = B_TRUE; 3614 ldcp->resched_peer_txi = 0; 3615 3616 DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone); 3617 } 3618 3619 /* clobber receive descriptor ring */ 3620 static void 3621 vgen_clobber_rxds(vgen_ldc_t *ldcp) 3622 { 3623 ldcp->rx_dhandle = 0; 3624 bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie)); 3625 ldcp->rxdp = NULL; 3626 ldcp->next_rxi = 0; 3627 ldcp->num_rxds = 0; 3628 ldcp->next_rxseq = VNET_ISS; 3629 } 3630 3631 /* initialize receive descriptor ring */ 3632 static int 3633 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size, 3634 ldc_mem_cookie_t *dcookie, uint32_t ncookies) 3635 { 3636 int rv; 3637 ldc_mem_info_t minfo; 3638 3639 rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc, 3640 desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle)); 3641 if (rv != 0) { 3642 return (DDI_FAILURE); 3643 } 3644 3645 /* 3646 * sucessfully mapped, now try to 3647 * get info about the mapped dring 3648 */ 3649 rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo); 3650 if (rv != 0) { 3651 (void) ldc_mem_dring_unmap(ldcp->rx_dhandle); 3652 return (DDI_FAILURE); 3653 } 3654 3655 /* 3656 * save ring address, number of descriptors. 3657 */ 3658 ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr); 3659 bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie)); 3660 ldcp->num_rxdcookies = ncookies; 3661 ldcp->num_rxds = num_desc; 3662 ldcp->next_rxi = 0; 3663 ldcp->next_rxseq = VNET_ISS; 3664 ldcp->dring_mtype = minfo.mtype; 3665 3666 return (DDI_SUCCESS); 3667 } 3668 3669 /* get channel statistics */ 3670 static uint64_t 3671 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat) 3672 { 3673 vgen_stats_t *statsp; 3674 uint64_t val; 3675 3676 val = 0; 3677 statsp = &ldcp->stats; 3678 switch (stat) { 3679 3680 case MAC_STAT_MULTIRCV: 3681 val = statsp->multircv; 3682 break; 3683 3684 case MAC_STAT_BRDCSTRCV: 3685 val = statsp->brdcstrcv; 3686 break; 3687 3688 case MAC_STAT_MULTIXMT: 3689 val = statsp->multixmt; 3690 break; 3691 3692 case MAC_STAT_BRDCSTXMT: 3693 val = statsp->brdcstxmt; 3694 break; 3695 3696 case MAC_STAT_NORCVBUF: 3697 val = statsp->norcvbuf; 3698 break; 3699 3700 case MAC_STAT_IERRORS: 3701 val = statsp->ierrors; 3702 break; 3703 3704 case MAC_STAT_NOXMTBUF: 3705 val = statsp->noxmtbuf; 3706 break; 3707 3708 case MAC_STAT_OERRORS: 3709 val = statsp->oerrors; 3710 break; 3711 3712 case MAC_STAT_COLLISIONS: 3713 break; 3714 3715 case MAC_STAT_RBYTES: 3716 val = statsp->rbytes; 3717 break; 3718 3719 case MAC_STAT_IPACKETS: 3720 val = statsp->ipackets; 3721 break; 3722 3723 case MAC_STAT_OBYTES: 3724 val = statsp->obytes; 3725 break; 3726 3727 case MAC_STAT_OPACKETS: 3728 val = statsp->opackets; 3729 break; 3730 3731 /* stats not relevant to ldc, return 0 */ 3732 case MAC_STAT_IFSPEED: 3733 case ETHER_STAT_ALIGN_ERRORS: 3734 case ETHER_STAT_FCS_ERRORS: 3735 case ETHER_STAT_FIRST_COLLISIONS: 3736 case ETHER_STAT_MULTI_COLLISIONS: 3737 case ETHER_STAT_DEFER_XMTS: 3738 case ETHER_STAT_TX_LATE_COLLISIONS: 3739 case ETHER_STAT_EX_COLLISIONS: 3740 case ETHER_STAT_MACXMT_ERRORS: 3741 case ETHER_STAT_CARRIER_ERRORS: 3742 case ETHER_STAT_TOOLONG_ERRORS: 3743 case ETHER_STAT_XCVR_ADDR: 3744 case ETHER_STAT_XCVR_ID: 3745 case ETHER_STAT_XCVR_INUSE: 3746 case ETHER_STAT_CAP_1000FDX: 3747 case ETHER_STAT_CAP_1000HDX: 3748 case ETHER_STAT_CAP_100FDX: 3749 case ETHER_STAT_CAP_100HDX: 3750 case ETHER_STAT_CAP_10FDX: 3751 case ETHER_STAT_CAP_10HDX: 3752 case ETHER_STAT_CAP_ASMPAUSE: 3753 case ETHER_STAT_CAP_PAUSE: 3754 case ETHER_STAT_CAP_AUTONEG: 3755 case ETHER_STAT_ADV_CAP_1000FDX: 3756 case ETHER_STAT_ADV_CAP_1000HDX: 3757 case ETHER_STAT_ADV_CAP_100FDX: 3758 case ETHER_STAT_ADV_CAP_100HDX: 3759 case ETHER_STAT_ADV_CAP_10FDX: 3760 case ETHER_STAT_ADV_CAP_10HDX: 3761 case ETHER_STAT_ADV_CAP_ASMPAUSE: 3762 case ETHER_STAT_ADV_CAP_PAUSE: 3763 case ETHER_STAT_ADV_CAP_AUTONEG: 3764 case ETHER_STAT_LP_CAP_1000FDX: 3765 case ETHER_STAT_LP_CAP_1000HDX: 3766 case ETHER_STAT_LP_CAP_100FDX: 3767 case ETHER_STAT_LP_CAP_100HDX: 3768 case ETHER_STAT_LP_CAP_10FDX: 3769 case ETHER_STAT_LP_CAP_10HDX: 3770 case ETHER_STAT_LP_CAP_ASMPAUSE: 3771 case ETHER_STAT_LP_CAP_PAUSE: 3772 case ETHER_STAT_LP_CAP_AUTONEG: 3773 case ETHER_STAT_LINK_ASMPAUSE: 3774 case ETHER_STAT_LINK_PAUSE: 3775 case ETHER_STAT_LINK_AUTONEG: 3776 case ETHER_STAT_LINK_DUPLEX: 3777 default: 3778 val = 0; 3779 break; 3780 3781 } 3782 return (val); 3783 } 3784 3785 /* 3786 * LDC channel is UP, start handshake process with peer. 3787 */ 3788 static void 3789 vgen_handle_evt_up(vgen_ldc_t *ldcp) 3790 { 3791 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3792 3793 DBG1(vgenp, ldcp, "enter\n"); 3794 3795 ASSERT(MUTEX_HELD(&ldcp->cblock)); 3796 3797 if (ldcp->portp != vgenp->vsw_portp) { 3798 /* 3799 * As the channel is up, use this port from now on. 3800 */ 3801 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE); 3802 } 3803 3804 /* Initialize local session id */ 3805 ldcp->local_sid = ddi_get_lbolt(); 3806 3807 /* clear peer session id */ 3808 ldcp->peer_sid = 0; 3809 ldcp->hretries = 0; 3810 3811 if (ldcp->hphase != VH_PHASE0) { 3812 vgen_handshake_reset(ldcp); 3813 } 3814 3815 /* Initiate Handshake process with peer ldc endpoint */ 3816 vgen_handshake(vh_nextphase(ldcp)); 3817 3818 DBG1(vgenp, ldcp, "exit\n"); 3819 } 3820 3821 /* 3822 * LDC channel is Reset, terminate connection with peer and try to 3823 * bring the channel up again. 3824 */ 3825 static void 3826 vgen_handle_evt_reset(vgen_ldc_t *ldcp) 3827 { 3828 ldc_status_t istatus; 3829 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3830 int rv; 3831 3832 DBG1(vgenp, ldcp, "enter\n"); 3833 3834 ASSERT(MUTEX_HELD(&ldcp->cblock)); 3835 3836 if ((ldcp->portp != vgenp->vsw_portp) && 3837 (vgenp->vsw_portp != NULL)) { 3838 /* 3839 * As the channel is down, use the switch port until 3840 * the channel becomes ready to be used. 3841 */ 3842 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE); 3843 } 3844 3845 if (vgenp->vsw_portp == ldcp->portp) { 3846 vio_net_report_err_t rep_err = 3847 ldcp->portp->vcb.vio_net_report_err; 3848 3849 /* Post a reset message */ 3850 rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN); 3851 } 3852 3853 if (ldcp->hphase != VH_PHASE0) { 3854 vgen_handshake_reset(ldcp); 3855 } 3856 3857 /* try to bring the channel up */ 3858 #ifdef VNET_IOC_DEBUG 3859 if (ldcp->link_down_forced == B_FALSE) { 3860 rv = ldc_up(ldcp->ldc_handle); 3861 if (rv != 0) { 3862 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 3863 } 3864 } 3865 #else 3866 rv = ldc_up(ldcp->ldc_handle); 3867 if (rv != 0) { 3868 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 3869 } 3870 #endif 3871 3872 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3873 DWARN(vgenp, ldcp, "ldc_status err\n"); 3874 } else { 3875 ldcp->ldc_status = istatus; 3876 } 3877 3878 /* if channel is already UP - restart handshake */ 3879 if (ldcp->ldc_status == LDC_UP) { 3880 vgen_handle_evt_up(ldcp); 3881 } 3882 3883 DBG1(vgenp, ldcp, "exit\n"); 3884 } 3885 3886 /* Interrupt handler for the channel */ 3887 static uint_t 3888 vgen_ldc_cb(uint64_t event, caddr_t arg) 3889 { 3890 _NOTE(ARGUNUSED(event)) 3891 vgen_ldc_t *ldcp; 3892 vgen_t *vgenp; 3893 ldc_status_t istatus; 3894 vgen_stats_t *statsp; 3895 timeout_id_t cancel_htid = 0; 3896 uint_t ret = LDC_SUCCESS; 3897 3898 ldcp = (vgen_ldc_t *)arg; 3899 vgenp = LDC_TO_VGEN(ldcp); 3900 statsp = &ldcp->stats; 3901 3902 DBG1(vgenp, ldcp, "enter\n"); 3903 3904 mutex_enter(&ldcp->cblock); 3905 statsp->callbacks++; 3906 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 3907 DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n", 3908 ldcp->ldc_status); 3909 mutex_exit(&ldcp->cblock); 3910 return (LDC_SUCCESS); 3911 } 3912 3913 /* 3914 * cache cancel_htid before the events specific 3915 * code may overwrite it. Do not clear ldcp->cancel_htid 3916 * as it is also used to indicate the timer to quit immediately. 3917 */ 3918 cancel_htid = ldcp->cancel_htid; 3919 3920 /* 3921 * NOTE: not using switch() as event could be triggered by 3922 * a state change and a read request. Also the ordering of the 3923 * check for the event types is deliberate. 3924 */ 3925 if (event & LDC_EVT_UP) { 3926 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3927 DWARN(vgenp, ldcp, "ldc_status err\n"); 3928 /* status couldn't be determined */ 3929 ret = LDC_FAILURE; 3930 goto ldc_cb_ret; 3931 } 3932 ldcp->ldc_status = istatus; 3933 if (ldcp->ldc_status != LDC_UP) { 3934 DWARN(vgenp, ldcp, "LDC_EVT_UP received " 3935 " but ldc status is not UP(0x%x)\n", 3936 ldcp->ldc_status); 3937 /* spurious interrupt, return success */ 3938 goto ldc_cb_ret; 3939 } 3940 DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n", 3941 event, ldcp->ldc_status); 3942 3943 vgen_handle_evt_up(ldcp); 3944 3945 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3946 } 3947 3948 /* Handle RESET/DOWN before READ event */ 3949 if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) { 3950 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3951 DWARN(vgenp, ldcp, "ldc_status error\n"); 3952 /* status couldn't be determined */ 3953 ret = LDC_FAILURE; 3954 goto ldc_cb_ret; 3955 } 3956 ldcp->ldc_status = istatus; 3957 DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n", 3958 event, ldcp->ldc_status); 3959 3960 vgen_handle_evt_reset(ldcp); 3961 3962 /* 3963 * As the channel is down/reset, ignore READ event 3964 * but print a debug warning message. 3965 */ 3966 if (event & LDC_EVT_READ) { 3967 DWARN(vgenp, ldcp, 3968 "LDC_EVT_READ set along with RESET/DOWN\n"); 3969 event &= ~LDC_EVT_READ; 3970 } 3971 } 3972 3973 if (event & LDC_EVT_READ) { 3974 DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n", 3975 event, ldcp->ldc_status); 3976 3977 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3978 3979 if (ldcp->rcv_thread != NULL) { 3980 /* 3981 * If the receive thread is enabled, then 3982 * wakeup the receive thread to process the 3983 * LDC messages. 3984 */ 3985 mutex_exit(&ldcp->cblock); 3986 mutex_enter(&ldcp->rcv_thr_lock); 3987 if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) { 3988 ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD; 3989 cv_signal(&ldcp->rcv_thr_cv); 3990 } 3991 mutex_exit(&ldcp->rcv_thr_lock); 3992 mutex_enter(&ldcp->cblock); 3993 } else { 3994 vgen_handle_evt_read(ldcp); 3995 } 3996 } 3997 3998 ldc_cb_ret: 3999 /* 4000 * Check to see if the status of cancel_htid has 4001 * changed. If another timer needs to be cancelled, 4002 * then let the next callback to clear it. 4003 */ 4004 if (cancel_htid == 0) { 4005 cancel_htid = ldcp->cancel_htid; 4006 } 4007 mutex_exit(&ldcp->cblock); 4008 4009 if (cancel_htid) { 4010 /* 4011 * Cancel handshake timer. 4012 * untimeout(9F) will not return until the pending callback is 4013 * cancelled or has run. No problems will result from calling 4014 * untimeout if the handler has already completed. 4015 * If the timeout handler did run, then it would just 4016 * return as cancel_htid is set. 4017 */ 4018 DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid); 4019 (void) untimeout(cancel_htid); 4020 mutex_enter(&ldcp->cblock); 4021 /* clear it only if its the same as the one we cancelled */ 4022 if (ldcp->cancel_htid == cancel_htid) { 4023 ldcp->cancel_htid = 0; 4024 } 4025 mutex_exit(&ldcp->cblock); 4026 } 4027 DBG1(vgenp, ldcp, "exit\n"); 4028 return (ret); 4029 } 4030 4031 static void 4032 vgen_handle_evt_read(vgen_ldc_t *ldcp) 4033 { 4034 int rv; 4035 uint64_t *ldcmsg; 4036 size_t msglen; 4037 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4038 vio_msg_tag_t *tagp; 4039 ldc_status_t istatus; 4040 boolean_t has_data; 4041 4042 DBG1(vgenp, ldcp, "enter\n"); 4043 4044 ldcmsg = ldcp->ldcmsg; 4045 /* 4046 * If the receive thread is enabled, then the cblock 4047 * need to be acquired here. If not, the vgen_ldc_cb() 4048 * calls this function with cblock held already. 4049 */ 4050 if (ldcp->rcv_thread != NULL) { 4051 mutex_enter(&ldcp->cblock); 4052 } else { 4053 ASSERT(MUTEX_HELD(&ldcp->cblock)); 4054 } 4055 4056 vgen_evt_read: 4057 do { 4058 msglen = ldcp->msglen; 4059 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 4060 4061 if (rv != 0) { 4062 DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n", 4063 rv, msglen); 4064 if (rv == ECONNRESET) 4065 goto vgen_evtread_error; 4066 break; 4067 } 4068 if (msglen == 0) { 4069 DBG2(vgenp, ldcp, "ldc_read NODATA"); 4070 break; 4071 } 4072 DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen); 4073 4074 tagp = (vio_msg_tag_t *)ldcmsg; 4075 4076 if (ldcp->peer_sid) { 4077 /* 4078 * check sid only after we have received peer's sid 4079 * in the version negotiate msg. 4080 */ 4081 #ifdef DEBUG 4082 if (vgen_hdbg & HDBG_BAD_SID) { 4083 /* simulate bad sid condition */ 4084 tagp->vio_sid = 0; 4085 vgen_hdbg &= ~(HDBG_BAD_SID); 4086 } 4087 #endif 4088 rv = vgen_check_sid(ldcp, tagp); 4089 if (rv != VGEN_SUCCESS) { 4090 /* 4091 * If sid mismatch is detected, 4092 * reset the channel. 4093 */ 4094 goto vgen_evtread_error; 4095 } 4096 } 4097 4098 switch (tagp->vio_msgtype) { 4099 case VIO_TYPE_CTRL: 4100 rv = vgen_handle_ctrlmsg(ldcp, tagp); 4101 break; 4102 4103 case VIO_TYPE_DATA: 4104 rv = vgen_handle_datamsg(ldcp, tagp, msglen); 4105 break; 4106 4107 case VIO_TYPE_ERR: 4108 vgen_handle_errmsg(ldcp, tagp); 4109 break; 4110 4111 default: 4112 DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n", 4113 tagp->vio_msgtype); 4114 break; 4115 } 4116 4117 /* 4118 * If an error is encountered, stop processing and 4119 * handle the error. 4120 */ 4121 if (rv != 0) { 4122 goto vgen_evtread_error; 4123 } 4124 4125 } while (msglen); 4126 4127 /* check once more before exiting */ 4128 rv = ldc_chkq(ldcp->ldc_handle, &has_data); 4129 if ((rv == 0) && (has_data == B_TRUE)) { 4130 DTRACE_PROBE(vgen_chkq); 4131 goto vgen_evt_read; 4132 } 4133 4134 vgen_evtread_error: 4135 if (rv == ECONNRESET) { 4136 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 4137 DWARN(vgenp, ldcp, "ldc_status err\n"); 4138 } else { 4139 ldcp->ldc_status = istatus; 4140 } 4141 vgen_handle_evt_reset(ldcp); 4142 } else if (rv) { 4143 vgen_ldc_reset(ldcp); 4144 } 4145 4146 /* 4147 * If the receive thread is enabled, then cancel the 4148 * handshake timeout here. 4149 */ 4150 if (ldcp->rcv_thread != NULL) { 4151 timeout_id_t cancel_htid = ldcp->cancel_htid; 4152 4153 mutex_exit(&ldcp->cblock); 4154 if (cancel_htid) { 4155 /* 4156 * Cancel handshake timer. untimeout(9F) will 4157 * not return until the pending callback is cancelled 4158 * or has run. No problems will result from calling 4159 * untimeout if the handler has already completed. 4160 * If the timeout handler did run, then it would just 4161 * return as cancel_htid is set. 4162 */ 4163 DBG2(vgenp, ldcp, "cancel_htid =0x%X \n", cancel_htid); 4164 (void) untimeout(cancel_htid); 4165 4166 /* 4167 * clear it only if its the same as the one we 4168 * cancelled 4169 */ 4170 mutex_enter(&ldcp->cblock); 4171 if (ldcp->cancel_htid == cancel_htid) { 4172 ldcp->cancel_htid = 0; 4173 } 4174 mutex_exit(&ldcp->cblock); 4175 } 4176 } 4177 4178 DBG1(vgenp, ldcp, "exit\n"); 4179 } 4180 4181 /* vgen handshake functions */ 4182 4183 /* change the hphase for the channel to the next phase */ 4184 static vgen_ldc_t * 4185 vh_nextphase(vgen_ldc_t *ldcp) 4186 { 4187 if (ldcp->hphase == VH_PHASE3) { 4188 ldcp->hphase = VH_DONE; 4189 } else { 4190 ldcp->hphase++; 4191 } 4192 return (ldcp); 4193 } 4194 4195 /* 4196 * wrapper routine to send the given message over ldc using ldc_write(). 4197 */ 4198 static int 4199 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, 4200 boolean_t caller_holds_lock) 4201 { 4202 int rv; 4203 size_t len; 4204 uint32_t retries = 0; 4205 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4206 vio_msg_tag_t *tagp = (vio_msg_tag_t *)msg; 4207 vio_dring_msg_t *dmsg; 4208 vio_raw_data_msg_t *rmsg; 4209 boolean_t data_msg = B_FALSE; 4210 4211 len = msglen; 4212 if ((len == 0) || (msg == NULL)) 4213 return (VGEN_FAILURE); 4214 4215 if (!caller_holds_lock) { 4216 mutex_enter(&ldcp->wrlock); 4217 } 4218 4219 if (tagp->vio_subtype == VIO_SUBTYPE_INFO) { 4220 if (tagp->vio_subtype_env == VIO_DRING_DATA) { 4221 dmsg = (vio_dring_msg_t *)tagp; 4222 dmsg->seq_num = ldcp->next_txseq; 4223 data_msg = B_TRUE; 4224 } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { 4225 rmsg = (vio_raw_data_msg_t *)tagp; 4226 rmsg->seq_num = ldcp->next_txseq; 4227 data_msg = B_TRUE; 4228 } 4229 } 4230 4231 do { 4232 len = msglen; 4233 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); 4234 if (retries++ >= vgen_ldcwr_retries) 4235 break; 4236 } while (rv == EWOULDBLOCK); 4237 4238 if (rv == 0 && data_msg == B_TRUE) { 4239 ldcp->next_txseq++; 4240 } 4241 4242 if (!caller_holds_lock) { 4243 mutex_exit(&ldcp->wrlock); 4244 } 4245 4246 if (rv != 0) { 4247 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n", 4248 rv, msglen); 4249 return (rv); 4250 } 4251 4252 if (len != msglen) { 4253 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", 4254 rv, msglen); 4255 return (VGEN_FAILURE); 4256 } 4257 4258 return (VGEN_SUCCESS); 4259 } 4260 4261 /* send version negotiate message to the peer over ldc */ 4262 static int 4263 vgen_send_version_negotiate(vgen_ldc_t *ldcp) 4264 { 4265 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4266 vio_ver_msg_t vermsg; 4267 vio_msg_tag_t *tagp = &vermsg.tag; 4268 int rv; 4269 4270 bzero(&vermsg, sizeof (vermsg)); 4271 4272 tagp->vio_msgtype = VIO_TYPE_CTRL; 4273 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4274 tagp->vio_subtype_env = VIO_VER_INFO; 4275 tagp->vio_sid = ldcp->local_sid; 4276 4277 /* get version msg payload from ldcp->local */ 4278 vermsg.ver_major = ldcp->local_hparams.ver_major; 4279 vermsg.ver_minor = ldcp->local_hparams.ver_minor; 4280 vermsg.dev_class = ldcp->local_hparams.dev_class; 4281 4282 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE); 4283 if (rv != VGEN_SUCCESS) { 4284 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4285 return (rv); 4286 } 4287 4288 ldcp->hstate |= VER_INFO_SENT; 4289 DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n", 4290 vermsg.ver_major, vermsg.ver_minor); 4291 4292 return (VGEN_SUCCESS); 4293 } 4294 4295 /* send attr info message to the peer over ldc */ 4296 static int 4297 vgen_send_attr_info(vgen_ldc_t *ldcp) 4298 { 4299 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4300 vnet_attr_msg_t attrmsg; 4301 vio_msg_tag_t *tagp = &attrmsg.tag; 4302 int rv; 4303 4304 bzero(&attrmsg, sizeof (attrmsg)); 4305 4306 tagp->vio_msgtype = VIO_TYPE_CTRL; 4307 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4308 tagp->vio_subtype_env = VIO_ATTR_INFO; 4309 tagp->vio_sid = ldcp->local_sid; 4310 4311 /* get attr msg payload from ldcp->local */ 4312 attrmsg.mtu = ldcp->local_hparams.mtu; 4313 attrmsg.addr = ldcp->local_hparams.addr; 4314 attrmsg.addr_type = ldcp->local_hparams.addr_type; 4315 attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode; 4316 attrmsg.ack_freq = ldcp->local_hparams.ack_freq; 4317 attrmsg.physlink_update = ldcp->local_hparams.physlink_update; 4318 4319 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE); 4320 if (rv != VGEN_SUCCESS) { 4321 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4322 return (rv); 4323 } 4324 4325 ldcp->hstate |= ATTR_INFO_SENT; 4326 DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n"); 4327 4328 return (VGEN_SUCCESS); 4329 } 4330 4331 /* send descriptor ring register message to the peer over ldc */ 4332 static int 4333 vgen_send_dring_reg(vgen_ldc_t *ldcp) 4334 { 4335 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4336 vio_dring_reg_msg_t msg; 4337 vio_msg_tag_t *tagp = &msg.tag; 4338 int rv; 4339 4340 bzero(&msg, sizeof (msg)); 4341 4342 tagp->vio_msgtype = VIO_TYPE_CTRL; 4343 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4344 tagp->vio_subtype_env = VIO_DRING_REG; 4345 tagp->vio_sid = ldcp->local_sid; 4346 4347 /* get dring info msg payload from ldcp->local */ 4348 bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie), 4349 sizeof (ldc_mem_cookie_t)); 4350 msg.ncookies = ldcp->local_hparams.num_dcookies; 4351 msg.num_descriptors = ldcp->local_hparams.num_desc; 4352 msg.descriptor_size = ldcp->local_hparams.desc_size; 4353 4354 /* 4355 * dring_ident is set to 0. After mapping the dring, peer sets this 4356 * value and sends it in the ack, which is saved in 4357 * vgen_handle_dring_reg(). 4358 */ 4359 msg.dring_ident = 0; 4360 4361 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE); 4362 if (rv != VGEN_SUCCESS) { 4363 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4364 return (rv); 4365 } 4366 4367 ldcp->hstate |= DRING_INFO_SENT; 4368 DBG2(vgenp, ldcp, "DRING_INFO_SENT \n"); 4369 4370 return (VGEN_SUCCESS); 4371 } 4372 4373 static int 4374 vgen_send_rdx_info(vgen_ldc_t *ldcp) 4375 { 4376 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4377 vio_rdx_msg_t rdxmsg; 4378 vio_msg_tag_t *tagp = &rdxmsg.tag; 4379 int rv; 4380 4381 bzero(&rdxmsg, sizeof (rdxmsg)); 4382 4383 tagp->vio_msgtype = VIO_TYPE_CTRL; 4384 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4385 tagp->vio_subtype_env = VIO_RDX; 4386 tagp->vio_sid = ldcp->local_sid; 4387 4388 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE); 4389 if (rv != VGEN_SUCCESS) { 4390 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4391 return (rv); 4392 } 4393 4394 ldcp->hstate |= RDX_INFO_SENT; 4395 DBG2(vgenp, ldcp, "RDX_INFO_SENT\n"); 4396 4397 return (VGEN_SUCCESS); 4398 } 4399 4400 /* send descriptor ring data message to the peer over ldc */ 4401 static int 4402 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end) 4403 { 4404 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4405 vio_dring_msg_t dringmsg, *msgp = &dringmsg; 4406 vio_msg_tag_t *tagp = &msgp->tag; 4407 vgen_stats_t *statsp = &ldcp->stats; 4408 int rv; 4409 4410 bzero(msgp, sizeof (*msgp)); 4411 4412 tagp->vio_msgtype = VIO_TYPE_DATA; 4413 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4414 tagp->vio_subtype_env = VIO_DRING_DATA; 4415 tagp->vio_sid = ldcp->local_sid; 4416 4417 msgp->dring_ident = ldcp->local_hparams.dring_ident; 4418 msgp->start_idx = start; 4419 msgp->end_idx = end; 4420 4421 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE); 4422 if (rv != VGEN_SUCCESS) { 4423 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4424 return (rv); 4425 } 4426 4427 statsp->dring_data_msgs++; 4428 4429 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); 4430 4431 return (VGEN_SUCCESS); 4432 } 4433 4434 /* send multicast addr info message to vsw */ 4435 static int 4436 vgen_send_mcast_info(vgen_ldc_t *ldcp) 4437 { 4438 vnet_mcast_msg_t mcastmsg; 4439 vnet_mcast_msg_t *msgp; 4440 vio_msg_tag_t *tagp; 4441 vgen_t *vgenp; 4442 struct ether_addr *mca; 4443 int rv; 4444 int i; 4445 uint32_t size; 4446 uint32_t mccount; 4447 uint32_t n; 4448 4449 msgp = &mcastmsg; 4450 tagp = &msgp->tag; 4451 vgenp = LDC_TO_VGEN(ldcp); 4452 4453 mccount = vgenp->mccount; 4454 i = 0; 4455 4456 do { 4457 tagp->vio_msgtype = VIO_TYPE_CTRL; 4458 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4459 tagp->vio_subtype_env = VNET_MCAST_INFO; 4460 tagp->vio_sid = ldcp->local_sid; 4461 4462 n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount); 4463 size = n * sizeof (struct ether_addr); 4464 4465 mca = &(vgenp->mctab[i]); 4466 bcopy(mca, (msgp->mca), size); 4467 msgp->set = B_TRUE; 4468 msgp->count = n; 4469 4470 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), 4471 B_FALSE); 4472 if (rv != VGEN_SUCCESS) { 4473 DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv); 4474 return (rv); 4475 } 4476 4477 mccount -= n; 4478 i += n; 4479 4480 } while (mccount); 4481 4482 return (VGEN_SUCCESS); 4483 } 4484 4485 /* Initiate Phase 2 of handshake */ 4486 static int 4487 vgen_handshake_phase2(vgen_ldc_t *ldcp) 4488 { 4489 int rv; 4490 uint32_t ncookies = 0; 4491 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4492 4493 #ifdef DEBUG 4494 if (vgen_hdbg & HDBG_OUT_STATE) { 4495 /* simulate out of state condition */ 4496 vgen_hdbg &= ~(HDBG_OUT_STATE); 4497 rv = vgen_send_rdx_info(ldcp); 4498 return (rv); 4499 } 4500 if (vgen_hdbg & HDBG_TIMEOUT) { 4501 /* simulate timeout condition */ 4502 vgen_hdbg &= ~(HDBG_TIMEOUT); 4503 return (VGEN_SUCCESS); 4504 } 4505 #endif 4506 rv = vgen_send_attr_info(ldcp); 4507 if (rv != VGEN_SUCCESS) { 4508 return (rv); 4509 } 4510 4511 /* Bind descriptor ring to the channel */ 4512 if (ldcp->num_txdcookies == 0) { 4513 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle, 4514 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 4515 &ldcp->tx_dcookie, &ncookies); 4516 if (rv != 0) { 4517 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " 4518 "rv(%x)\n", rv); 4519 return (rv); 4520 } 4521 ASSERT(ncookies == 1); 4522 ldcp->num_txdcookies = ncookies; 4523 } 4524 4525 /* update local dring_info params */ 4526 bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie), 4527 sizeof (ldc_mem_cookie_t)); 4528 ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies; 4529 ldcp->local_hparams.num_desc = ldcp->num_txds; 4530 ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t); 4531 4532 rv = vgen_send_dring_reg(ldcp); 4533 if (rv != VGEN_SUCCESS) { 4534 return (rv); 4535 } 4536 4537 return (VGEN_SUCCESS); 4538 } 4539 4540 /* 4541 * Set vnet-protocol-version dependent functions based on version. 4542 */ 4543 static void 4544 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp) 4545 { 4546 vgen_hparams_t *lp = &ldcp->local_hparams; 4547 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4548 4549 if (VGEN_VER_GTEQ(ldcp, 1, 5)) { 4550 vgen_port_t *portp = ldcp->portp; 4551 vnet_t *vnetp = vgenp->vnetp; 4552 /* 4553 * If the version negotiated with vswitch is >= 1.5 (link 4554 * status update support), set the required bits in our 4555 * attributes if this vnet device has been configured to get 4556 * physical link state updates. 4557 */ 4558 if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) { 4559 lp->physlink_update = PHYSLINK_UPDATE_STATE; 4560 } else { 4561 lp->physlink_update = PHYSLINK_UPDATE_NONE; 4562 } 4563 } 4564 4565 if (VGEN_VER_GTEQ(ldcp, 1, 4)) { 4566 /* 4567 * If the version negotiated with peer is >= 1.4(Jumbo Frame 4568 * Support), set the mtu in our attributes to max_frame_size. 4569 */ 4570 lp->mtu = vgenp->max_frame_size; 4571 } else if (VGEN_VER_EQ(ldcp, 1, 3)) { 4572 /* 4573 * If the version negotiated with peer is == 1.3 (Vlan Tag 4574 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 4575 */ 4576 lp->mtu = ETHERMAX + VLAN_TAGSZ; 4577 } else { 4578 vgen_port_t *portp = ldcp->portp; 4579 vnet_t *vnetp = vgenp->vnetp; 4580 /* 4581 * Pre-1.3 peers expect max frame size of ETHERMAX. 4582 * We can negotiate that size with those peers provided the 4583 * following conditions are true: 4584 * - Only pvid is defined for our peer and there are no vids. 4585 * - pvids are equal. 4586 * If the above conditions are true, then we can send/recv only 4587 * untagged frames of max size ETHERMAX. 4588 */ 4589 if (portp->nvids == 0 && portp->pvid == vnetp->pvid) { 4590 lp->mtu = ETHERMAX; 4591 } 4592 } 4593 4594 if (VGEN_VER_GTEQ(ldcp, 1, 2)) { 4595 /* Versions >= 1.2 */ 4596 4597 if (VGEN_PRI_ETH_DEFINED(vgenp)) { 4598 /* 4599 * enable priority routines and pkt mode only if 4600 * at least one pri-eth-type is specified in MD. 4601 */ 4602 4603 ldcp->tx = vgen_ldcsend; 4604 ldcp->rx_pktdata = vgen_handle_pkt_data; 4605 4606 /* set xfer mode for vgen_send_attr_info() */ 4607 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 4608 4609 } else { 4610 /* no priority eth types defined in MD */ 4611 4612 ldcp->tx = vgen_ldcsend_dring; 4613 ldcp->rx_pktdata = vgen_handle_pkt_data_nop; 4614 4615 /* set xfer mode for vgen_send_attr_info() */ 4616 lp->xfer_mode = VIO_DRING_MODE_V1_2; 4617 4618 } 4619 } else { 4620 /* Versions prior to 1.2 */ 4621 4622 vgen_reset_vnet_proto_ops(ldcp); 4623 } 4624 } 4625 4626 /* 4627 * Reset vnet-protocol-version dependent functions to pre-v1.2. 4628 */ 4629 static void 4630 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp) 4631 { 4632 vgen_hparams_t *lp = &ldcp->local_hparams; 4633 4634 ldcp->tx = vgen_ldcsend_dring; 4635 ldcp->rx_pktdata = vgen_handle_pkt_data_nop; 4636 4637 /* set xfer mode for vgen_send_attr_info() */ 4638 lp->xfer_mode = VIO_DRING_MODE_V1_0; 4639 } 4640 4641 static void 4642 vgen_vlan_unaware_port_reset(vgen_port_t *portp) 4643 { 4644 vgen_ldclist_t *ldclp; 4645 vgen_ldc_t *ldcp; 4646 vgen_t *vgenp = portp->vgenp; 4647 vnet_t *vnetp = vgenp->vnetp; 4648 4649 ldclp = &portp->ldclist; 4650 4651 READ_ENTER(&ldclp->rwlock); 4652 4653 /* 4654 * NOTE: for now, we will assume we have a single channel. 4655 */ 4656 if (ldclp->headp == NULL) { 4657 RW_EXIT(&ldclp->rwlock); 4658 return; 4659 } 4660 ldcp = ldclp->headp; 4661 4662 mutex_enter(&ldcp->cblock); 4663 4664 /* 4665 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 4666 * the connection. See comments in vgen_set_vnet_proto_ops(). 4667 */ 4668 if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) && 4669 (portp->nvids != 0 || portp->pvid != vnetp->pvid)) { 4670 vgen_ldc_reset(ldcp); 4671 } 4672 4673 mutex_exit(&ldcp->cblock); 4674 4675 RW_EXIT(&ldclp->rwlock); 4676 } 4677 4678 static void 4679 vgen_port_reset(vgen_port_t *portp) 4680 { 4681 vgen_ldclist_t *ldclp; 4682 vgen_ldc_t *ldcp; 4683 4684 ldclp = &portp->ldclist; 4685 4686 READ_ENTER(&ldclp->rwlock); 4687 4688 /* 4689 * NOTE: for now, we will assume we have a single channel. 4690 */ 4691 if (ldclp->headp == NULL) { 4692 RW_EXIT(&ldclp->rwlock); 4693 return; 4694 } 4695 ldcp = ldclp->headp; 4696 4697 mutex_enter(&ldcp->cblock); 4698 4699 vgen_ldc_reset(ldcp); 4700 4701 mutex_exit(&ldcp->cblock); 4702 4703 RW_EXIT(&ldclp->rwlock); 4704 } 4705 4706 static void 4707 vgen_reset_vlan_unaware_ports(vgen_t *vgenp) 4708 { 4709 vgen_port_t *portp; 4710 vgen_portlist_t *plistp; 4711 4712 plistp = &(vgenp->vgenports); 4713 READ_ENTER(&plistp->rwlock); 4714 4715 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 4716 4717 vgen_vlan_unaware_port_reset(portp); 4718 4719 } 4720 4721 RW_EXIT(&plistp->rwlock); 4722 } 4723 4724 static void 4725 vgen_reset_vsw_port(vgen_t *vgenp) 4726 { 4727 vgen_port_t *portp; 4728 4729 if ((portp = vgenp->vsw_portp) != NULL) { 4730 vgen_port_reset(portp); 4731 } 4732 } 4733 4734 /* 4735 * This function resets the handshake phase to VH_PHASE0(pre-handshake phase). 4736 * This can happen after a channel comes up (status: LDC_UP) or 4737 * when handshake gets terminated due to various conditions. 4738 */ 4739 static void 4740 vgen_reset_hphase(vgen_ldc_t *ldcp) 4741 { 4742 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4743 ldc_status_t istatus; 4744 int rv; 4745 4746 DBG1(vgenp, ldcp, "enter\n"); 4747 /* reset hstate and hphase */ 4748 ldcp->hstate = 0; 4749 ldcp->hphase = VH_PHASE0; 4750 4751 vgen_reset_vnet_proto_ops(ldcp); 4752 4753 /* 4754 * Save the id of pending handshake timer in cancel_htid. 4755 * This will be checked in vgen_ldc_cb() and the handshake timer will 4756 * be cancelled after releasing cblock. 4757 */ 4758 if (ldcp->htid) { 4759 ldcp->cancel_htid = ldcp->htid; 4760 ldcp->htid = 0; 4761 } 4762 4763 if (ldcp->local_hparams.dring_ready) { 4764 ldcp->local_hparams.dring_ready = B_FALSE; 4765 } 4766 4767 /* Unbind tx descriptor ring from the channel */ 4768 if (ldcp->num_txdcookies) { 4769 rv = ldc_mem_dring_unbind(ldcp->tx_dhandle); 4770 if (rv != 0) { 4771 DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n"); 4772 } 4773 ldcp->num_txdcookies = 0; 4774 } 4775 4776 if (ldcp->peer_hparams.dring_ready) { 4777 ldcp->peer_hparams.dring_ready = B_FALSE; 4778 /* Unmap peer's dring */ 4779 (void) ldc_mem_dring_unmap(ldcp->rx_dhandle); 4780 vgen_clobber_rxds(ldcp); 4781 } 4782 4783 vgen_clobber_tbufs(ldcp); 4784 4785 /* 4786 * clear local handshake params and initialize. 4787 */ 4788 bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams)); 4789 4790 /* set version to the highest version supported */ 4791 ldcp->local_hparams.ver_major = 4792 ldcp->vgen_versions[0].ver_major; 4793 ldcp->local_hparams.ver_minor = 4794 ldcp->vgen_versions[0].ver_minor; 4795 ldcp->local_hparams.dev_class = VDEV_NETWORK; 4796 4797 /* set attr_info params */ 4798 ldcp->local_hparams.mtu = vgenp->max_frame_size; 4799 ldcp->local_hparams.addr = 4800 vnet_macaddr_strtoul(vgenp->macaddr); 4801 ldcp->local_hparams.addr_type = ADDR_TYPE_MAC; 4802 ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0; 4803 ldcp->local_hparams.ack_freq = 0; /* don't need acks */ 4804 ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE; 4805 4806 /* 4807 * Note: dring is created, but not bound yet. 4808 * local dring_info params will be updated when we bind the dring in 4809 * vgen_handshake_phase2(). 4810 * dring_ident is set to 0. After mapping the dring, peer sets this 4811 * value and sends it in the ack, which is saved in 4812 * vgen_handle_dring_reg(). 4813 */ 4814 ldcp->local_hparams.dring_ident = 0; 4815 4816 /* clear peer_hparams */ 4817 bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams)); 4818 4819 /* reset the channel if required */ 4820 #ifdef VNET_IOC_DEBUG 4821 if (ldcp->need_ldc_reset && !ldcp->link_down_forced) { 4822 #else 4823 if (ldcp->need_ldc_reset) { 4824 #endif 4825 DWARN(vgenp, ldcp, "Doing Channel Reset...\n"); 4826 ldcp->need_ldc_reset = B_FALSE; 4827 (void) ldc_down(ldcp->ldc_handle); 4828 (void) ldc_status(ldcp->ldc_handle, &istatus); 4829 DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus); 4830 ldcp->ldc_status = istatus; 4831 4832 /* clear sids */ 4833 ldcp->local_sid = 0; 4834 ldcp->peer_sid = 0; 4835 4836 /* try to bring the channel up */ 4837 rv = ldc_up(ldcp->ldc_handle); 4838 if (rv != 0) { 4839 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 4840 } 4841 4842 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 4843 DWARN(vgenp, ldcp, "ldc_status err\n"); 4844 } else { 4845 ldcp->ldc_status = istatus; 4846 } 4847 } 4848 } 4849 4850 /* wrapper function for vgen_reset_hphase */ 4851 static void 4852 vgen_handshake_reset(vgen_ldc_t *ldcp) 4853 { 4854 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4855 4856 ASSERT(MUTEX_HELD(&ldcp->cblock)); 4857 mutex_enter(&ldcp->rxlock); 4858 mutex_enter(&ldcp->wrlock); 4859 mutex_enter(&ldcp->txlock); 4860 mutex_enter(&ldcp->tclock); 4861 4862 vgen_reset_hphase(ldcp); 4863 4864 mutex_exit(&ldcp->tclock); 4865 mutex_exit(&ldcp->txlock); 4866 mutex_exit(&ldcp->wrlock); 4867 mutex_exit(&ldcp->rxlock); 4868 4869 /* 4870 * As the connection is now reset, mark the channel 4871 * link_state as 'down' and notify the stack if needed. 4872 */ 4873 if (ldcp->link_state != LINK_STATE_DOWN) { 4874 ldcp->link_state = LINK_STATE_DOWN; 4875 4876 if (ldcp->portp == vgenp->vsw_portp) { /* vswitch port ? */ 4877 /* 4878 * As the channel link is down, mark physical link also 4879 * as down. After the channel comes back up and 4880 * handshake completes, we will get an update on the 4881 * physlink state from vswitch (if this device has been 4882 * configured to get phys link updates). 4883 */ 4884 vgenp->phys_link_state = LINK_STATE_DOWN; 4885 4886 /* Now update the stack */ 4887 mutex_exit(&ldcp->cblock); 4888 vgen_link_update(vgenp, ldcp->link_state); 4889 mutex_enter(&ldcp->cblock); 4890 } 4891 } 4892 } 4893 4894 /* 4895 * Initiate handshake with the peer by sending various messages 4896 * based on the handshake-phase that the channel is currently in. 4897 */ 4898 static void 4899 vgen_handshake(vgen_ldc_t *ldcp) 4900 { 4901 uint32_t hphase = ldcp->hphase; 4902 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4903 ldc_status_t istatus; 4904 int rv = 0; 4905 4906 switch (hphase) { 4907 4908 case VH_PHASE1: 4909 4910 /* 4911 * start timer, for entire handshake process, turn this timer 4912 * off if all phases of handshake complete successfully and 4913 * hphase goes to VH_DONE(below) or 4914 * vgen_reset_hphase() gets called or 4915 * channel is reset due to errors or 4916 * vgen_ldc_uninit() is invoked(vgen_stop). 4917 */ 4918 ASSERT(ldcp->htid == 0); 4919 ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp, 4920 drv_usectohz(vgen_hwd_interval * MICROSEC)); 4921 4922 /* Phase 1 involves negotiating the version */ 4923 rv = vgen_send_version_negotiate(ldcp); 4924 break; 4925 4926 case VH_PHASE2: 4927 rv = vgen_handshake_phase2(ldcp); 4928 break; 4929 4930 case VH_PHASE3: 4931 rv = vgen_send_rdx_info(ldcp); 4932 break; 4933 4934 case VH_DONE: 4935 /* 4936 * Save the id of pending handshake timer in cancel_htid. 4937 * This will be checked in vgen_ldc_cb() and the handshake 4938 * timer will be cancelled after releasing cblock. 4939 */ 4940 if (ldcp->htid) { 4941 ldcp->cancel_htid = ldcp->htid; 4942 ldcp->htid = 0; 4943 } 4944 ldcp->hretries = 0; 4945 DBG1(vgenp, ldcp, "Handshake Done\n"); 4946 4947 /* 4948 * The channel is up and handshake is done successfully. Now we 4949 * can mark the channel link_state as 'up'. We also notify the 4950 * stack if the channel is connected to vswitch. 4951 */ 4952 ldcp->link_state = LINK_STATE_UP; 4953 4954 if (ldcp->portp == vgenp->vsw_portp) { 4955 /* 4956 * If this channel(port) is connected to vsw, 4957 * need to sync multicast table with vsw. 4958 */ 4959 mutex_exit(&ldcp->cblock); 4960 4961 mutex_enter(&vgenp->lock); 4962 rv = vgen_send_mcast_info(ldcp); 4963 mutex_exit(&vgenp->lock); 4964 4965 if (vgenp->pls_negotiated == B_FALSE) { 4966 /* 4967 * We haven't negotiated with vswitch to get 4968 * physical link state updates. We can update 4969 * update the stack at this point as the 4970 * channel to vswitch is up and the handshake 4971 * is done successfully. 4972 * 4973 * If we have negotiated to get physical link 4974 * state updates, then we won't notify the 4975 * the stack here; we do that as soon as 4976 * vswitch sends us the initial phys link state 4977 * (see vgen_handle_physlink_info()). 4978 */ 4979 vgen_link_update(vgenp, ldcp->link_state); 4980 } 4981 4982 mutex_enter(&ldcp->cblock); 4983 if (rv != VGEN_SUCCESS) 4984 break; 4985 } 4986 4987 /* 4988 * Check if mac layer should be notified to restart 4989 * transmissions. This can happen if the channel got 4990 * reset and vgen_clobber_tbufs() is called, while 4991 * need_resched is set. 4992 */ 4993 mutex_enter(&ldcp->tclock); 4994 if (ldcp->need_resched) { 4995 vio_net_tx_update_t vtx_update = 4996 ldcp->portp->vcb.vio_net_tx_update; 4997 4998 ldcp->need_resched = B_FALSE; 4999 vtx_update(ldcp->portp->vhp); 5000 } 5001 mutex_exit(&ldcp->tclock); 5002 5003 break; 5004 5005 default: 5006 break; 5007 } 5008 5009 if (rv == ECONNRESET) { 5010 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 5011 DWARN(vgenp, ldcp, "ldc_status err\n"); 5012 } else { 5013 ldcp->ldc_status = istatus; 5014 } 5015 vgen_handle_evt_reset(ldcp); 5016 } else if (rv) { 5017 vgen_handshake_reset(ldcp); 5018 } 5019 } 5020 5021 /* 5022 * Check if the current handshake phase has completed successfully and 5023 * return the status. 5024 */ 5025 static int 5026 vgen_handshake_done(vgen_ldc_t *ldcp) 5027 { 5028 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5029 uint32_t hphase = ldcp->hphase; 5030 int status = 0; 5031 5032 switch (hphase) { 5033 5034 case VH_PHASE1: 5035 /* 5036 * Phase1 is done, if version negotiation 5037 * completed successfully. 5038 */ 5039 status = ((ldcp->hstate & VER_NEGOTIATED) == 5040 VER_NEGOTIATED); 5041 break; 5042 5043 case VH_PHASE2: 5044 /* 5045 * Phase 2 is done, if attr info and dring info 5046 * have been exchanged successfully. 5047 */ 5048 status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) == 5049 ATTR_INFO_EXCHANGED) && 5050 ((ldcp->hstate & DRING_INFO_EXCHANGED) == 5051 DRING_INFO_EXCHANGED)); 5052 break; 5053 5054 case VH_PHASE3: 5055 /* Phase 3 is done, if rdx msg has been exchanged */ 5056 status = ((ldcp->hstate & RDX_EXCHANGED) == 5057 RDX_EXCHANGED); 5058 break; 5059 5060 default: 5061 break; 5062 } 5063 5064 if (status == 0) { 5065 return (VGEN_FAILURE); 5066 } 5067 DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase); 5068 return (VGEN_SUCCESS); 5069 } 5070 5071 /* retry handshake on failure */ 5072 static void 5073 vgen_handshake_retry(vgen_ldc_t *ldcp) 5074 { 5075 /* reset handshake phase */ 5076 vgen_handshake_reset(ldcp); 5077 5078 /* handshake retry is specified and the channel is UP */ 5079 if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) { 5080 if (ldcp->hretries++ < vgen_max_hretries) { 5081 ldcp->local_sid = ddi_get_lbolt(); 5082 vgen_handshake(vh_nextphase(ldcp)); 5083 } 5084 } 5085 } 5086 5087 5088 /* 5089 * Link State Update Notes: 5090 * The link state of the channel connected to vswitch is reported as the link 5091 * state of the vnet device, by default. If the channel is down or reset, then 5092 * the link state is marked 'down'. If the channel is 'up' *and* handshake 5093 * between the vnet and vswitch is successful, then the link state is marked 5094 * 'up'. If physical network link state is desired, then the vnet device must 5095 * be configured to get physical link updates and the 'linkprop' property 5096 * in the virtual-device MD node indicates this. As part of attribute exchange 5097 * the vnet device negotiates with the vswitch to obtain physical link state 5098 * updates. If it successfully negotiates, vswitch sends an initial physlink 5099 * msg once the handshake is done and further whenever the physical link state 5100 * changes. Currently we don't have mac layer interfaces to report two distinct 5101 * link states - virtual and physical. Thus, if the vnet has been configured to 5102 * get physical link updates, then the link status will be reported as 'up' 5103 * only when both the virtual and physical links are up. 5104 */ 5105 static void 5106 vgen_link_update(vgen_t *vgenp, link_state_t link_state) 5107 { 5108 vnet_link_update(vgenp->vnetp, link_state); 5109 } 5110 5111 /* 5112 * Handle a version info msg from the peer or an ACK/NACK from the peer 5113 * to a version info msg that we sent. 5114 */ 5115 static int 5116 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5117 { 5118 vgen_t *vgenp; 5119 vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp; 5120 int ack = 0; 5121 int failed = 0; 5122 int idx; 5123 vgen_ver_t *versions = ldcp->vgen_versions; 5124 int rv = 0; 5125 5126 vgenp = LDC_TO_VGEN(ldcp); 5127 DBG1(vgenp, ldcp, "enter\n"); 5128 switch (tagp->vio_subtype) { 5129 case VIO_SUBTYPE_INFO: 5130 5131 /* Cache sid of peer if this is the first time */ 5132 if (ldcp->peer_sid == 0) { 5133 DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n", 5134 tagp->vio_sid); 5135 ldcp->peer_sid = tagp->vio_sid; 5136 } 5137 5138 if (ldcp->hphase != VH_PHASE1) { 5139 /* 5140 * If we are not already in VH_PHASE1, reset to 5141 * pre-handshake state, and initiate handshake 5142 * to the peer too. 5143 */ 5144 vgen_handshake_reset(ldcp); 5145 vgen_handshake(vh_nextphase(ldcp)); 5146 } 5147 ldcp->hstate |= VER_INFO_RCVD; 5148 5149 /* save peer's requested values */ 5150 ldcp->peer_hparams.ver_major = vermsg->ver_major; 5151 ldcp->peer_hparams.ver_minor = vermsg->ver_minor; 5152 ldcp->peer_hparams.dev_class = vermsg->dev_class; 5153 5154 if ((vermsg->dev_class != VDEV_NETWORK) && 5155 (vermsg->dev_class != VDEV_NETWORK_SWITCH)) { 5156 /* unsupported dev_class, send NACK */ 5157 5158 DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); 5159 5160 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5161 tagp->vio_sid = ldcp->local_sid; 5162 /* send reply msg back to peer */ 5163 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, 5164 sizeof (*vermsg), B_FALSE); 5165 if (rv != VGEN_SUCCESS) { 5166 return (rv); 5167 } 5168 return (VGEN_FAILURE); 5169 } 5170 5171 DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n", 5172 vermsg->ver_major, vermsg->ver_minor); 5173 5174 idx = 0; 5175 5176 for (;;) { 5177 5178 if (vermsg->ver_major > versions[idx].ver_major) { 5179 5180 /* nack with next lower version */ 5181 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5182 vermsg->ver_major = versions[idx].ver_major; 5183 vermsg->ver_minor = versions[idx].ver_minor; 5184 break; 5185 } 5186 5187 if (vermsg->ver_major == versions[idx].ver_major) { 5188 5189 /* major version match - ACK version */ 5190 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5191 ack = 1; 5192 5193 /* 5194 * lower minor version to the one this endpt 5195 * supports, if necessary 5196 */ 5197 if (vermsg->ver_minor > 5198 versions[idx].ver_minor) { 5199 vermsg->ver_minor = 5200 versions[idx].ver_minor; 5201 ldcp->peer_hparams.ver_minor = 5202 versions[idx].ver_minor; 5203 } 5204 break; 5205 } 5206 5207 idx++; 5208 5209 if (idx == VGEN_NUM_VER) { 5210 5211 /* no version match - send NACK */ 5212 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5213 vermsg->ver_major = 0; 5214 vermsg->ver_minor = 0; 5215 failed = 1; 5216 break; 5217 } 5218 5219 } 5220 5221 tagp->vio_sid = ldcp->local_sid; 5222 5223 /* send reply msg back to peer */ 5224 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg), 5225 B_FALSE); 5226 if (rv != VGEN_SUCCESS) { 5227 return (rv); 5228 } 5229 5230 if (ack) { 5231 ldcp->hstate |= VER_ACK_SENT; 5232 DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n", 5233 vermsg->ver_major, vermsg->ver_minor); 5234 } 5235 if (failed) { 5236 DWARN(vgenp, ldcp, "Negotiation Failed\n"); 5237 return (VGEN_FAILURE); 5238 } 5239 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5240 5241 /* VER_ACK_SENT and VER_ACK_RCVD */ 5242 5243 /* local and peer versions match? */ 5244 ASSERT((ldcp->local_hparams.ver_major == 5245 ldcp->peer_hparams.ver_major) && 5246 (ldcp->local_hparams.ver_minor == 5247 ldcp->peer_hparams.ver_minor)); 5248 5249 vgen_set_vnet_proto_ops(ldcp); 5250 5251 /* move to the next phase */ 5252 vgen_handshake(vh_nextphase(ldcp)); 5253 } 5254 5255 break; 5256 5257 case VIO_SUBTYPE_ACK: 5258 5259 if (ldcp->hphase != VH_PHASE1) { 5260 /* This should not happen. */ 5261 DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase); 5262 return (VGEN_FAILURE); 5263 } 5264 5265 /* SUCCESS - we have agreed on a version */ 5266 ldcp->local_hparams.ver_major = vermsg->ver_major; 5267 ldcp->local_hparams.ver_minor = vermsg->ver_minor; 5268 ldcp->hstate |= VER_ACK_RCVD; 5269 5270 DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n", 5271 vermsg->ver_major, vermsg->ver_minor); 5272 5273 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5274 5275 /* VER_ACK_SENT and VER_ACK_RCVD */ 5276 5277 /* local and peer versions match? */ 5278 ASSERT((ldcp->local_hparams.ver_major == 5279 ldcp->peer_hparams.ver_major) && 5280 (ldcp->local_hparams.ver_minor == 5281 ldcp->peer_hparams.ver_minor)); 5282 5283 vgen_set_vnet_proto_ops(ldcp); 5284 5285 /* move to the next phase */ 5286 vgen_handshake(vh_nextphase(ldcp)); 5287 } 5288 break; 5289 5290 case VIO_SUBTYPE_NACK: 5291 5292 if (ldcp->hphase != VH_PHASE1) { 5293 /* This should not happen. */ 5294 DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid " 5295 "Phase(%u)\n", ldcp->hphase); 5296 return (VGEN_FAILURE); 5297 } 5298 5299 DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n", 5300 vermsg->ver_major, vermsg->ver_minor); 5301 5302 /* check if version in NACK is zero */ 5303 if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) { 5304 /* 5305 * Version Negotiation has failed. 5306 */ 5307 DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); 5308 return (VGEN_FAILURE); 5309 } 5310 5311 idx = 0; 5312 5313 for (;;) { 5314 5315 if (vermsg->ver_major > versions[idx].ver_major) { 5316 /* select next lower version */ 5317 5318 ldcp->local_hparams.ver_major = 5319 versions[idx].ver_major; 5320 ldcp->local_hparams.ver_minor = 5321 versions[idx].ver_minor; 5322 break; 5323 } 5324 5325 if (vermsg->ver_major == versions[idx].ver_major) { 5326 /* major version match */ 5327 5328 ldcp->local_hparams.ver_major = 5329 versions[idx].ver_major; 5330 5331 ldcp->local_hparams.ver_minor = 5332 versions[idx].ver_minor; 5333 break; 5334 } 5335 5336 idx++; 5337 5338 if (idx == VGEN_NUM_VER) { 5339 /* 5340 * no version match. 5341 * Version Negotiation has failed. 5342 */ 5343 DWARN(vgenp, ldcp, 5344 "Version Negotiation Failed\n"); 5345 return (VGEN_FAILURE); 5346 } 5347 5348 } 5349 5350 rv = vgen_send_version_negotiate(ldcp); 5351 if (rv != VGEN_SUCCESS) { 5352 return (rv); 5353 } 5354 5355 break; 5356 } 5357 5358 DBG1(vgenp, ldcp, "exit\n"); 5359 return (VGEN_SUCCESS); 5360 } 5361 5362 /* Check if the attributes are supported */ 5363 static int 5364 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg) 5365 { 5366 vgen_hparams_t *lp = &ldcp->local_hparams; 5367 5368 if ((msg->addr_type != ADDR_TYPE_MAC) || 5369 (msg->ack_freq > 64) || 5370 (msg->xfer_mode != lp->xfer_mode)) { 5371 return (VGEN_FAILURE); 5372 } 5373 5374 if (VGEN_VER_LT(ldcp, 1, 4)) { 5375 /* versions < 1.4, mtu must match */ 5376 if (msg->mtu != lp->mtu) { 5377 return (VGEN_FAILURE); 5378 } 5379 } else { 5380 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5381 if (msg->mtu < ETHERMAX) { 5382 return (VGEN_FAILURE); 5383 } 5384 } 5385 5386 return (VGEN_SUCCESS); 5387 } 5388 5389 /* 5390 * Handle an attribute info msg from the peer or an ACK/NACK from the peer 5391 * to an attr info msg that we sent. 5392 */ 5393 static int 5394 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5395 { 5396 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5397 vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp; 5398 vgen_hparams_t *lp = &ldcp->local_hparams; 5399 vgen_hparams_t *rp = &ldcp->peer_hparams; 5400 int ack = 1; 5401 int rv = 0; 5402 uint32_t mtu; 5403 5404 DBG1(vgenp, ldcp, "enter\n"); 5405 if (ldcp->hphase != VH_PHASE2) { 5406 DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d)," 5407 " Invalid Phase(%u)\n", 5408 tagp->vio_subtype, ldcp->hphase); 5409 return (VGEN_FAILURE); 5410 } 5411 switch (tagp->vio_subtype) { 5412 case VIO_SUBTYPE_INFO: 5413 5414 DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n"); 5415 ldcp->hstate |= ATTR_INFO_RCVD; 5416 5417 /* save peer's values */ 5418 rp->mtu = msg->mtu; 5419 rp->addr = msg->addr; 5420 rp->addr_type = msg->addr_type; 5421 rp->xfer_mode = msg->xfer_mode; 5422 rp->ack_freq = msg->ack_freq; 5423 5424 rv = vgen_check_attr_info(ldcp, msg); 5425 if (rv == VGEN_FAILURE) { 5426 /* unsupported attr, send NACK */ 5427 ack = 0; 5428 } else { 5429 5430 if (VGEN_VER_GTEQ(ldcp, 1, 4)) { 5431 5432 /* 5433 * Versions >= 1.4: 5434 * The mtu is negotiated down to the 5435 * minimum of our mtu and peer's mtu. 5436 */ 5437 mtu = MIN(msg->mtu, vgenp->max_frame_size); 5438 5439 /* 5440 * If we have received an ack for the attr info 5441 * that we sent, then check if the mtu computed 5442 * above matches the mtu that the peer had ack'd 5443 * (saved in local hparams). If they don't 5444 * match, we fail the handshake. 5445 */ 5446 if (ldcp->hstate & ATTR_ACK_RCVD) { 5447 if (mtu != lp->mtu) { 5448 /* send NACK */ 5449 ack = 0; 5450 } 5451 } else { 5452 /* 5453 * Save the mtu computed above in our 5454 * attr parameters, so it gets sent in 5455 * the attr info from us to the peer. 5456 */ 5457 lp->mtu = mtu; 5458 } 5459 5460 /* save the MIN mtu in the msg to be replied */ 5461 msg->mtu = mtu; 5462 5463 } 5464 } 5465 5466 5467 if (ack) { 5468 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5469 } else { 5470 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5471 } 5472 tagp->vio_sid = ldcp->local_sid; 5473 5474 /* send reply msg back to peer */ 5475 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg), 5476 B_FALSE); 5477 if (rv != VGEN_SUCCESS) { 5478 return (rv); 5479 } 5480 5481 if (ack) { 5482 ldcp->hstate |= ATTR_ACK_SENT; 5483 DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n"); 5484 } else { 5485 /* failed */ 5486 DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n"); 5487 return (VGEN_FAILURE); 5488 } 5489 5490 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5491 vgen_handshake(vh_nextphase(ldcp)); 5492 } 5493 5494 break; 5495 5496 case VIO_SUBTYPE_ACK: 5497 5498 if (VGEN_VER_GTEQ(ldcp, 1, 5) && 5499 ldcp->portp == vgenp->vsw_portp) { 5500 /* 5501 * Versions >= 1.5: 5502 * If the vnet device has been configured to get 5503 * physical link state updates, check the corresponding 5504 * bits in the ack msg, if the peer is vswitch. 5505 */ 5506 if (((lp->physlink_update & 5507 PHYSLINK_UPDATE_STATE_MASK) == 5508 PHYSLINK_UPDATE_STATE) && 5509 5510 ((msg->physlink_update & 5511 PHYSLINK_UPDATE_STATE_MASK) == 5512 PHYSLINK_UPDATE_STATE_ACK)) { 5513 vgenp->pls_negotiated = B_TRUE; 5514 } else { 5515 vgenp->pls_negotiated = B_FALSE; 5516 } 5517 } 5518 5519 if (VGEN_VER_GTEQ(ldcp, 1, 4)) { 5520 /* 5521 * Versions >= 1.4: 5522 * The ack msg sent by the peer contains the minimum of 5523 * our mtu (that we had sent in our attr info) and the 5524 * peer's mtu. 5525 * 5526 * If we have sent an ack for the attr info msg from 5527 * the peer, check if the mtu that was computed then 5528 * (saved in local hparams) matches the mtu that the 5529 * peer has ack'd. If they don't match, we fail the 5530 * handshake. 5531 */ 5532 if (ldcp->hstate & ATTR_ACK_SENT) { 5533 if (lp->mtu != msg->mtu) { 5534 return (VGEN_FAILURE); 5535 } 5536 } else { 5537 /* 5538 * If the mtu ack'd by the peer is > our mtu 5539 * fail handshake. Otherwise, save the mtu, so 5540 * we can validate it when we receive attr info 5541 * from our peer. 5542 */ 5543 if (msg->mtu > lp->mtu) { 5544 return (VGEN_FAILURE); 5545 } 5546 if (msg->mtu <= lp->mtu) { 5547 lp->mtu = msg->mtu; 5548 } 5549 } 5550 } 5551 5552 ldcp->hstate |= ATTR_ACK_RCVD; 5553 5554 DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n"); 5555 5556 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5557 vgen_handshake(vh_nextphase(ldcp)); 5558 } 5559 break; 5560 5561 case VIO_SUBTYPE_NACK: 5562 5563 DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n"); 5564 return (VGEN_FAILURE); 5565 } 5566 DBG1(vgenp, ldcp, "exit\n"); 5567 return (VGEN_SUCCESS); 5568 } 5569 5570 /* Check if the dring info msg is ok */ 5571 static int 5572 vgen_check_dring_reg(vio_dring_reg_msg_t *msg) 5573 { 5574 /* check if msg contents are ok */ 5575 if ((msg->num_descriptors < 128) || (msg->descriptor_size < 5576 sizeof (vnet_public_desc_t))) { 5577 return (VGEN_FAILURE); 5578 } 5579 return (VGEN_SUCCESS); 5580 } 5581 5582 /* 5583 * Handle a descriptor ring register msg from the peer or an ACK/NACK from 5584 * the peer to a dring register msg that we sent. 5585 */ 5586 static int 5587 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5588 { 5589 vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp; 5590 ldc_mem_cookie_t dcookie; 5591 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5592 int ack = 0; 5593 int rv = 0; 5594 5595 DBG1(vgenp, ldcp, "enter\n"); 5596 if (ldcp->hphase < VH_PHASE2) { 5597 /* dring_info can be rcvd in any of the phases after Phase1 */ 5598 DWARN(vgenp, ldcp, 5599 "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n", 5600 tagp->vio_subtype, ldcp->hphase); 5601 return (VGEN_FAILURE); 5602 } 5603 switch (tagp->vio_subtype) { 5604 case VIO_SUBTYPE_INFO: 5605 5606 DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n"); 5607 ldcp->hstate |= DRING_INFO_RCVD; 5608 bcopy((msg->cookie), &dcookie, sizeof (dcookie)); 5609 5610 ASSERT(msg->ncookies == 1); 5611 5612 if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) { 5613 /* 5614 * verified dring info msg to be ok, 5615 * now try to map the remote dring. 5616 */ 5617 rv = vgen_init_rxds(ldcp, msg->num_descriptors, 5618 msg->descriptor_size, &dcookie, 5619 msg->ncookies); 5620 if (rv == DDI_SUCCESS) { 5621 /* now we can ack the peer */ 5622 ack = 1; 5623 } 5624 } 5625 if (ack == 0) { 5626 /* failed, send NACK */ 5627 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5628 } else { 5629 if (!(ldcp->peer_hparams.dring_ready)) { 5630 5631 /* save peer's dring_info values */ 5632 bcopy(&dcookie, 5633 &(ldcp->peer_hparams.dring_cookie), 5634 sizeof (dcookie)); 5635 ldcp->peer_hparams.num_desc = 5636 msg->num_descriptors; 5637 ldcp->peer_hparams.desc_size = 5638 msg->descriptor_size; 5639 ldcp->peer_hparams.num_dcookies = 5640 msg->ncookies; 5641 5642 /* set dring_ident for the peer */ 5643 ldcp->peer_hparams.dring_ident = 5644 (uint64_t)ldcp->rxdp; 5645 /* return the dring_ident in ack msg */ 5646 msg->dring_ident = 5647 (uint64_t)ldcp->rxdp; 5648 5649 ldcp->peer_hparams.dring_ready = B_TRUE; 5650 } 5651 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5652 } 5653 tagp->vio_sid = ldcp->local_sid; 5654 /* send reply msg back to peer */ 5655 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg), 5656 B_FALSE); 5657 if (rv != VGEN_SUCCESS) { 5658 return (rv); 5659 } 5660 5661 if (ack) { 5662 ldcp->hstate |= DRING_ACK_SENT; 5663 DBG2(vgenp, ldcp, "DRING_ACK_SENT"); 5664 } else { 5665 DWARN(vgenp, ldcp, "DRING_NACK_SENT"); 5666 return (VGEN_FAILURE); 5667 } 5668 5669 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5670 vgen_handshake(vh_nextphase(ldcp)); 5671 } 5672 5673 break; 5674 5675 case VIO_SUBTYPE_ACK: 5676 5677 ldcp->hstate |= DRING_ACK_RCVD; 5678 5679 DBG2(vgenp, ldcp, "DRING_ACK_RCVD"); 5680 5681 if (!(ldcp->local_hparams.dring_ready)) { 5682 /* local dring is now ready */ 5683 ldcp->local_hparams.dring_ready = B_TRUE; 5684 5685 /* save dring_ident acked by peer */ 5686 ldcp->local_hparams.dring_ident = 5687 msg->dring_ident; 5688 } 5689 5690 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5691 vgen_handshake(vh_nextphase(ldcp)); 5692 } 5693 5694 break; 5695 5696 case VIO_SUBTYPE_NACK: 5697 5698 DBG2(vgenp, ldcp, "DRING_NACK_RCVD"); 5699 return (VGEN_FAILURE); 5700 } 5701 DBG1(vgenp, ldcp, "exit\n"); 5702 return (VGEN_SUCCESS); 5703 } 5704 5705 /* 5706 * Handle a rdx info msg from the peer or an ACK/NACK 5707 * from the peer to a rdx info msg that we sent. 5708 */ 5709 static int 5710 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5711 { 5712 int rv = 0; 5713 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5714 5715 DBG1(vgenp, ldcp, "enter\n"); 5716 if (ldcp->hphase != VH_PHASE3) { 5717 DWARN(vgenp, ldcp, 5718 "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n", 5719 tagp->vio_subtype, ldcp->hphase); 5720 return (VGEN_FAILURE); 5721 } 5722 switch (tagp->vio_subtype) { 5723 case VIO_SUBTYPE_INFO: 5724 5725 DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n"); 5726 ldcp->hstate |= RDX_INFO_RCVD; 5727 5728 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5729 tagp->vio_sid = ldcp->local_sid; 5730 /* send reply msg back to peer */ 5731 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t), 5732 B_FALSE); 5733 if (rv != VGEN_SUCCESS) { 5734 return (rv); 5735 } 5736 5737 ldcp->hstate |= RDX_ACK_SENT; 5738 DBG2(vgenp, ldcp, "RDX_ACK_SENT \n"); 5739 5740 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5741 vgen_handshake(vh_nextphase(ldcp)); 5742 } 5743 5744 break; 5745 5746 case VIO_SUBTYPE_ACK: 5747 5748 ldcp->hstate |= RDX_ACK_RCVD; 5749 5750 DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n"); 5751 5752 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5753 vgen_handshake(vh_nextphase(ldcp)); 5754 } 5755 break; 5756 5757 case VIO_SUBTYPE_NACK: 5758 5759 DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n"); 5760 return (VGEN_FAILURE); 5761 } 5762 DBG1(vgenp, ldcp, "exit\n"); 5763 return (VGEN_SUCCESS); 5764 } 5765 5766 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */ 5767 static int 5768 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5769 { 5770 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5771 vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp; 5772 struct ether_addr *addrp; 5773 int count; 5774 int i; 5775 5776 DBG1(vgenp, ldcp, "enter\n"); 5777 switch (tagp->vio_subtype) { 5778 5779 case VIO_SUBTYPE_INFO: 5780 5781 /* vnet shouldn't recv set mcast msg, only vsw handles it */ 5782 DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n"); 5783 break; 5784 5785 case VIO_SUBTYPE_ACK: 5786 5787 /* success adding/removing multicast addr */ 5788 DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n"); 5789 break; 5790 5791 case VIO_SUBTYPE_NACK: 5792 5793 DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n"); 5794 if (!(msgp->set)) { 5795 /* multicast remove request failed */ 5796 break; 5797 } 5798 5799 /* multicast add request failed */ 5800 for (count = 0; count < msgp->count; count++) { 5801 addrp = &(msgp->mca[count]); 5802 5803 /* delete address from the table */ 5804 for (i = 0; i < vgenp->mccount; i++) { 5805 if (ether_cmp(addrp, 5806 &(vgenp->mctab[i])) == 0) { 5807 if (vgenp->mccount > 1) { 5808 int t = vgenp->mccount - 1; 5809 vgenp->mctab[i] = 5810 vgenp->mctab[t]; 5811 } 5812 vgenp->mccount--; 5813 break; 5814 } 5815 } 5816 } 5817 break; 5818 5819 } 5820 DBG1(vgenp, ldcp, "exit\n"); 5821 5822 return (VGEN_SUCCESS); 5823 } 5824 5825 /* 5826 * Physical link information message from the peer. Only vswitch should send 5827 * us this message; if the vnet device has been configured to get physical link 5828 * state updates. Note that we must have already negotiated this with the 5829 * vswitch during attribute exchange phase of handshake. 5830 */ 5831 static int 5832 vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5833 { 5834 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5835 vnet_physlink_msg_t *msgp = (vnet_physlink_msg_t *)tagp; 5836 link_state_t link_state; 5837 int rv; 5838 5839 if (ldcp->portp != vgenp->vsw_portp) { 5840 /* 5841 * drop the message and don't process; as we should 5842 * receive physlink_info message from only vswitch. 5843 */ 5844 return (VGEN_SUCCESS); 5845 } 5846 5847 if (vgenp->pls_negotiated == B_FALSE) { 5848 /* 5849 * drop the message and don't process; as we should receive 5850 * physlink_info message only if physlink update is enabled for 5851 * the device and negotiated with vswitch. 5852 */ 5853 return (VGEN_SUCCESS); 5854 } 5855 5856 switch (tagp->vio_subtype) { 5857 5858 case VIO_SUBTYPE_INFO: 5859 5860 if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) == 5861 VNET_PHYSLINK_STATE_UP) { 5862 link_state = LINK_STATE_UP; 5863 } else { 5864 link_state = LINK_STATE_DOWN; 5865 } 5866 5867 if (vgenp->phys_link_state != link_state) { 5868 vgenp->phys_link_state = link_state; 5869 mutex_exit(&ldcp->cblock); 5870 5871 /* Now update the stack */ 5872 vgen_link_update(vgenp, link_state); 5873 5874 mutex_enter(&ldcp->cblock); 5875 } 5876 5877 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5878 tagp->vio_sid = ldcp->local_sid; 5879 5880 /* send reply msg back to peer */ 5881 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, 5882 sizeof (vnet_physlink_msg_t), B_FALSE); 5883 if (rv != VGEN_SUCCESS) { 5884 return (rv); 5885 } 5886 break; 5887 5888 case VIO_SUBTYPE_ACK: 5889 5890 /* vnet shouldn't recv physlink acks */ 5891 DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n"); 5892 break; 5893 5894 case VIO_SUBTYPE_NACK: 5895 5896 /* vnet shouldn't recv physlink nacks */ 5897 DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n"); 5898 break; 5899 5900 } 5901 DBG1(vgenp, ldcp, "exit\n"); 5902 5903 return (VGEN_SUCCESS); 5904 } 5905 5906 /* handler for control messages received from the peer ldc end-point */ 5907 static int 5908 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5909 { 5910 int rv = 0; 5911 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5912 5913 DBG1(vgenp, ldcp, "enter\n"); 5914 switch (tagp->vio_subtype_env) { 5915 5916 case VIO_VER_INFO: 5917 rv = vgen_handle_version_negotiate(ldcp, tagp); 5918 break; 5919 5920 case VIO_ATTR_INFO: 5921 rv = vgen_handle_attr_info(ldcp, tagp); 5922 break; 5923 5924 case VIO_DRING_REG: 5925 rv = vgen_handle_dring_reg(ldcp, tagp); 5926 break; 5927 5928 case VIO_RDX: 5929 rv = vgen_handle_rdx_info(ldcp, tagp); 5930 break; 5931 5932 case VNET_MCAST_INFO: 5933 rv = vgen_handle_mcast_info(ldcp, tagp); 5934 break; 5935 5936 case VIO_DDS_INFO: 5937 /* 5938 * If we are in the process of resetting the vswitch channel, 5939 * drop the dds message. A new handshake will be initiated 5940 * when the channel comes back up after the reset and dds 5941 * negotiation can then continue. 5942 */ 5943 if (ldcp->need_ldc_reset == B_TRUE) { 5944 break; 5945 } 5946 rv = vgen_dds_rx(ldcp, tagp); 5947 break; 5948 5949 case VNET_PHYSLINK_INFO: 5950 rv = vgen_handle_physlink_info(ldcp, tagp); 5951 break; 5952 } 5953 5954 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 5955 return (rv); 5956 } 5957 5958 /* handler for data messages received from the peer ldc end-point */ 5959 static int 5960 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen) 5961 { 5962 int rv = 0; 5963 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5964 5965 DBG1(vgenp, ldcp, "enter\n"); 5966 5967 if (ldcp->hphase != VH_DONE) 5968 return (rv); 5969 5970 if (tagp->vio_subtype == VIO_SUBTYPE_INFO) { 5971 rv = vgen_check_datamsg_seq(ldcp, tagp); 5972 if (rv != 0) { 5973 return (rv); 5974 } 5975 } 5976 5977 switch (tagp->vio_subtype_env) { 5978 case VIO_DRING_DATA: 5979 rv = vgen_handle_dring_data(ldcp, tagp); 5980 break; 5981 5982 case VIO_PKT_DATA: 5983 ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen); 5984 break; 5985 default: 5986 break; 5987 } 5988 5989 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 5990 return (rv); 5991 } 5992 5993 /* 5994 * dummy pkt data handler function for vnet protocol version 1.0 5995 */ 5996 static void 5997 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 5998 { 5999 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 6000 } 6001 6002 /* 6003 * This function handles raw pkt data messages received over the channel. 6004 * Currently, only priority-eth-type frames are received through this mechanism. 6005 * In this case, the frame(data) is present within the message itself which 6006 * is copied into an mblk before sending it up the stack. 6007 */ 6008 static void 6009 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen) 6010 { 6011 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; 6012 vio_raw_data_msg_t *pkt = (vio_raw_data_msg_t *)arg2; 6013 uint32_t size; 6014 mblk_t *mp; 6015 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6016 vgen_stats_t *statsp = &ldcp->stats; 6017 vgen_hparams_t *lp = &ldcp->local_hparams; 6018 vio_net_rx_cb_t vrx_cb; 6019 6020 ASSERT(MUTEX_HELD(&ldcp->cblock)); 6021 6022 mutex_exit(&ldcp->cblock); 6023 6024 size = msglen - VIO_PKT_DATA_HDRSIZE; 6025 if (size < ETHERMIN || size > lp->mtu) { 6026 (void) atomic_inc_32(&statsp->rx_pri_fail); 6027 goto exit; 6028 } 6029 6030 mp = vio_multipool_allocb(&ldcp->vmp, size); 6031 if (mp == NULL) { 6032 mp = allocb(size, BPRI_MED); 6033 if (mp == NULL) { 6034 (void) atomic_inc_32(&statsp->rx_pri_fail); 6035 DWARN(vgenp, ldcp, "allocb failure, " 6036 "unable to process priority frame\n"); 6037 goto exit; 6038 } 6039 } 6040 6041 /* copy the frame from the payload of raw data msg into the mblk */ 6042 bcopy(pkt->data, mp->b_rptr, size); 6043 mp->b_wptr = mp->b_rptr + size; 6044 6045 /* update stats */ 6046 (void) atomic_inc_64(&statsp->rx_pri_packets); 6047 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 6048 6049 /* send up; call vrx_cb() as cblock is already released */ 6050 vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 6051 vrx_cb(ldcp->portp->vhp, mp); 6052 6053 exit: 6054 mutex_enter(&ldcp->cblock); 6055 } 6056 6057 static int 6058 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, 6059 int32_t end, uint8_t pstate) 6060 { 6061 int rv = 0; 6062 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6063 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; 6064 6065 tagp->vio_subtype = VIO_SUBTYPE_ACK; 6066 tagp->vio_sid = ldcp->local_sid; 6067 msgp->start_idx = start; 6068 msgp->end_idx = end; 6069 msgp->dring_process_state = pstate; 6070 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE); 6071 if (rv != VGEN_SUCCESS) { 6072 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 6073 } 6074 return (rv); 6075 } 6076 6077 static int 6078 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6079 { 6080 int rv = 0; 6081 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6082 6083 6084 DBG1(vgenp, ldcp, "enter\n"); 6085 switch (tagp->vio_subtype) { 6086 6087 case VIO_SUBTYPE_INFO: 6088 /* 6089 * To reduce the locking contention, release the 6090 * cblock here and re-acquire it once we are done 6091 * receiving packets. 6092 */ 6093 mutex_exit(&ldcp->cblock); 6094 mutex_enter(&ldcp->rxlock); 6095 rv = vgen_handle_dring_data_info(ldcp, tagp); 6096 mutex_exit(&ldcp->rxlock); 6097 mutex_enter(&ldcp->cblock); 6098 break; 6099 6100 case VIO_SUBTYPE_ACK: 6101 rv = vgen_handle_dring_data_ack(ldcp, tagp); 6102 break; 6103 6104 case VIO_SUBTYPE_NACK: 6105 rv = vgen_handle_dring_data_nack(ldcp, tagp); 6106 break; 6107 } 6108 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6109 return (rv); 6110 } 6111 6112 static int 6113 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6114 { 6115 uint32_t start; 6116 int32_t end; 6117 int rv = 0; 6118 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 6119 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6120 #ifdef VGEN_HANDLE_LOST_PKTS 6121 vgen_stats_t *statsp = &ldcp->stats; 6122 uint32_t rxi; 6123 int n; 6124 #endif 6125 6126 DBG1(vgenp, ldcp, "enter\n"); 6127 6128 start = dringmsg->start_idx; 6129 end = dringmsg->end_idx; 6130 /* 6131 * received a data msg, which contains the start and end 6132 * indices of the descriptors within the rx ring holding data, 6133 * the seq_num of data packet corresponding to the start index, 6134 * and the dring_ident. 6135 * We can now read the contents of each of these descriptors 6136 * and gather data from it. 6137 */ 6138 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", 6139 start, end); 6140 6141 /* validate rx start and end indeces */ 6142 if (!(CHECK_RXI(start, ldcp)) || ((end != -1) && 6143 !(CHECK_RXI(end, ldcp)))) { 6144 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", 6145 start, end); 6146 /* drop the message if invalid index */ 6147 return (rv); 6148 } 6149 6150 /* validate dring_ident */ 6151 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { 6152 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 6153 dringmsg->dring_ident); 6154 /* invalid dring_ident, drop the msg */ 6155 return (rv); 6156 } 6157 #ifdef DEBUG 6158 if (vgen_trigger_rxlost) { 6159 /* drop this msg to simulate lost pkts for debugging */ 6160 vgen_trigger_rxlost = 0; 6161 return (rv); 6162 } 6163 #endif 6164 6165 #ifdef VGEN_HANDLE_LOST_PKTS 6166 6167 /* receive start index doesn't match expected index */ 6168 if (ldcp->next_rxi != start) { 6169 DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n", 6170 ldcp->next_rxi, start); 6171 6172 /* calculate the number of pkts lost */ 6173 if (start >= ldcp->next_rxi) { 6174 n = start - ldcp->next_rxi; 6175 } else { 6176 n = ldcp->num_rxds - (ldcp->next_rxi - start); 6177 } 6178 6179 statsp->rx_lost_pkts += n; 6180 tagp->vio_subtype = VIO_SUBTYPE_NACK; 6181 tagp->vio_sid = ldcp->local_sid; 6182 /* indicate the range of lost descriptors */ 6183 dringmsg->start_idx = ldcp->next_rxi; 6184 rxi = start; 6185 DECR_RXI(rxi, ldcp); 6186 dringmsg->end_idx = rxi; 6187 /* dring ident is left unchanged */ 6188 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, 6189 sizeof (*dringmsg), B_FALSE); 6190 if (rv != VGEN_SUCCESS) { 6191 DWARN(vgenp, ldcp, 6192 "vgen_sendmsg failed, stype:NACK\n"); 6193 return (rv); 6194 } 6195 /* 6196 * treat this range of descrs/pkts as dropped 6197 * and set the new expected value of next_rxi 6198 * and continue(below) to process from the new 6199 * start index. 6200 */ 6201 ldcp->next_rxi = start; 6202 } 6203 6204 #endif /* VGEN_HANDLE_LOST_PKTS */ 6205 6206 /* Now receive messages */ 6207 rv = vgen_process_dring_data(ldcp, tagp); 6208 6209 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6210 return (rv); 6211 } 6212 6213 static int 6214 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6215 { 6216 boolean_t set_ack_start = B_FALSE; 6217 uint32_t start; 6218 uint32_t ack_end; 6219 uint32_t next_rxi; 6220 uint32_t rxi; 6221 int count = 0; 6222 int rv = 0; 6223 uint32_t retries = 0; 6224 vgen_stats_t *statsp; 6225 vnet_public_desc_t rxd; 6226 vio_dring_entry_hdr_t *hdrp; 6227 mblk_t *bp = NULL; 6228 mblk_t *bpt = NULL; 6229 uint32_t ack_start; 6230 boolean_t rxd_err = B_FALSE; 6231 mblk_t *mp = NULL; 6232 size_t nbytes; 6233 boolean_t ack_needed = B_FALSE; 6234 size_t nread; 6235 uint64_t off = 0; 6236 struct ether_header *ehp; 6237 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 6238 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6239 vgen_hparams_t *lp = &ldcp->local_hparams; 6240 6241 DBG1(vgenp, ldcp, "enter\n"); 6242 6243 statsp = &ldcp->stats; 6244 start = dringmsg->start_idx; 6245 6246 /* 6247 * start processing the descriptors from the specified 6248 * start index, up to the index a descriptor is not ready 6249 * to be processed or we process the entire descriptor ring 6250 * and wrap around upto the start index. 6251 */ 6252 6253 /* need to set the start index of descriptors to be ack'd */ 6254 set_ack_start = B_TRUE; 6255 6256 /* index upto which we have ack'd */ 6257 ack_end = start; 6258 DECR_RXI(ack_end, ldcp); 6259 6260 next_rxi = rxi = start; 6261 do { 6262 vgen_recv_retry: 6263 rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd, 6264 ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi); 6265 if (rv != 0) { 6266 DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed" 6267 " rv(%d)\n", rv); 6268 statsp->ierrors++; 6269 return (rv); 6270 } 6271 6272 hdrp = &rxd.hdr; 6273 6274 if (hdrp->dstate != VIO_DESC_READY) { 6275 /* 6276 * Before waiting and retry here, send up 6277 * the packets that are received already 6278 */ 6279 if (bp != NULL) { 6280 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 6281 vgen_rx(ldcp, bp); 6282 count = 0; 6283 bp = bpt = NULL; 6284 } 6285 /* 6286 * descriptor is not ready. 6287 * retry descriptor acquire, stop processing 6288 * after max # retries. 6289 */ 6290 if (retries == vgen_recv_retries) 6291 break; 6292 retries++; 6293 drv_usecwait(vgen_recv_delay); 6294 goto vgen_recv_retry; 6295 } 6296 retries = 0; 6297 6298 if (set_ack_start) { 6299 /* 6300 * initialize the start index of the range 6301 * of descriptors to be ack'd. 6302 */ 6303 ack_start = rxi; 6304 set_ack_start = B_FALSE; 6305 } 6306 6307 if ((rxd.nbytes < ETHERMIN) || 6308 (rxd.nbytes > lp->mtu) || 6309 (rxd.ncookies == 0) || 6310 (rxd.ncookies > MAX_COOKIES)) { 6311 rxd_err = B_TRUE; 6312 } else { 6313 /* 6314 * Try to allocate an mblk from the free pool 6315 * of recv mblks for the channel. 6316 * If this fails, use allocb(). 6317 */ 6318 nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7; 6319 if (nbytes > ldcp->max_rxpool_size) { 6320 mp = allocb(VNET_IPALIGN + rxd.nbytes + 8, 6321 BPRI_MED); 6322 } else { 6323 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 6324 if (mp == NULL) { 6325 statsp->rx_vio_allocb_fail++; 6326 /* 6327 * Data buffer returned by allocb(9F) 6328 * is 8byte aligned. We allocate extra 6329 * 8 bytes to ensure size is multiple 6330 * of 8 bytes for ldc_mem_copy(). 6331 */ 6332 mp = allocb(VNET_IPALIGN + 6333 rxd.nbytes + 8, BPRI_MED); 6334 } 6335 } 6336 } 6337 if ((rxd_err) || (mp == NULL)) { 6338 /* 6339 * rxd_err or allocb() failure, 6340 * drop this packet, get next. 6341 */ 6342 if (rxd_err) { 6343 statsp->ierrors++; 6344 rxd_err = B_FALSE; 6345 } else { 6346 statsp->rx_allocb_fail++; 6347 } 6348 6349 ack_needed = hdrp->ack; 6350 6351 /* set descriptor done bit */ 6352 rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]), 6353 ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi, 6354 VIO_DESC_DONE); 6355 if (rv != 0) { 6356 DWARN(vgenp, ldcp, 6357 "vnet_dring_entry_set_dstate err rv(%d)\n", 6358 rv); 6359 return (rv); 6360 } 6361 6362 if (ack_needed) { 6363 ack_needed = B_FALSE; 6364 /* 6365 * sender needs ack for this packet, 6366 * ack pkts upto this index. 6367 */ 6368 ack_end = rxi; 6369 6370 rv = vgen_send_dring_ack(ldcp, tagp, 6371 ack_start, ack_end, 6372 VIO_DP_ACTIVE); 6373 if (rv != VGEN_SUCCESS) { 6374 goto error_ret; 6375 } 6376 6377 /* need to set new ack start index */ 6378 set_ack_start = B_TRUE; 6379 } 6380 goto vgen_next_rxi; 6381 } 6382 6383 nread = nbytes; 6384 rv = ldc_mem_copy(ldcp->ldc_handle, 6385 (caddr_t)mp->b_rptr, off, &nread, 6386 rxd.memcookie, rxd.ncookies, LDC_COPY_IN); 6387 6388 /* if ldc_mem_copy() failed */ 6389 if (rv) { 6390 DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv); 6391 statsp->ierrors++; 6392 freemsg(mp); 6393 goto error_ret; 6394 } 6395 6396 ack_needed = hdrp->ack; 6397 6398 rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]), 6399 ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi, 6400 VIO_DESC_DONE); 6401 if (rv != 0) { 6402 DWARN(vgenp, ldcp, 6403 "vnet_dring_entry_set_dstate err rv(%d)\n", rv); 6404 goto error_ret; 6405 } 6406 6407 mp->b_rptr += VNET_IPALIGN; 6408 6409 if (ack_needed) { 6410 ack_needed = B_FALSE; 6411 /* 6412 * sender needs ack for this packet, 6413 * ack pkts upto this index. 6414 */ 6415 ack_end = rxi; 6416 6417 rv = vgen_send_dring_ack(ldcp, tagp, 6418 ack_start, ack_end, VIO_DP_ACTIVE); 6419 if (rv != VGEN_SUCCESS) { 6420 goto error_ret; 6421 } 6422 6423 /* need to set new ack start index */ 6424 set_ack_start = B_TRUE; 6425 } 6426 6427 if (nread != nbytes) { 6428 DWARN(vgenp, ldcp, 6429 "ldc_mem_copy nread(%lx), nbytes(%lx)\n", 6430 nread, nbytes); 6431 statsp->ierrors++; 6432 freemsg(mp); 6433 goto vgen_next_rxi; 6434 } 6435 6436 /* point to the actual end of data */ 6437 mp->b_wptr = mp->b_rptr + rxd.nbytes; 6438 6439 /* update stats */ 6440 statsp->ipackets++; 6441 statsp->rbytes += rxd.nbytes; 6442 ehp = (struct ether_header *)mp->b_rptr; 6443 if (IS_BROADCAST(ehp)) 6444 statsp->brdcstrcv++; 6445 else if (IS_MULTICAST(ehp)) 6446 statsp->multircv++; 6447 6448 /* build a chain of received packets */ 6449 if (bp == NULL) { 6450 /* first pkt */ 6451 bp = mp; 6452 bpt = bp; 6453 bpt->b_next = NULL; 6454 } else { 6455 mp->b_next = NULL; 6456 bpt->b_next = mp; 6457 bpt = mp; 6458 } 6459 6460 if (count++ > vgen_chain_len) { 6461 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 6462 vgen_rx(ldcp, bp); 6463 count = 0; 6464 bp = bpt = NULL; 6465 } 6466 6467 vgen_next_rxi: 6468 /* update end index of range of descrs to be ack'd */ 6469 ack_end = rxi; 6470 6471 /* update the next index to be processed */ 6472 INCR_RXI(next_rxi, ldcp); 6473 if (next_rxi == start) { 6474 /* 6475 * processed the entire descriptor ring upto 6476 * the index at which we started. 6477 */ 6478 break; 6479 } 6480 6481 rxi = next_rxi; 6482 6483 _NOTE(CONSTCOND) 6484 } while (1); 6485 6486 /* 6487 * send an ack message to peer indicating that we have stopped 6488 * processing descriptors. 6489 */ 6490 if (set_ack_start) { 6491 /* 6492 * We have ack'd upto some index and we have not 6493 * processed any descriptors beyond that index. 6494 * Use the last ack'd index as both the start and 6495 * end of range of descrs being ack'd. 6496 * Note: This results in acking the last index twice 6497 * and should be harmless. 6498 */ 6499 ack_start = ack_end; 6500 } 6501 6502 rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end, 6503 VIO_DP_STOPPED); 6504 if (rv != VGEN_SUCCESS) { 6505 goto error_ret; 6506 } 6507 6508 /* save new recv index of next dring msg */ 6509 ldcp->next_rxi = next_rxi; 6510 6511 error_ret: 6512 /* send up packets received so far */ 6513 if (bp != NULL) { 6514 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 6515 vgen_rx(ldcp, bp); 6516 bp = bpt = NULL; 6517 } 6518 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6519 return (rv); 6520 6521 } 6522 6523 static int 6524 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6525 { 6526 int rv = 0; 6527 uint32_t start; 6528 int32_t end; 6529 uint32_t txi; 6530 boolean_t ready_txd = B_FALSE; 6531 vgen_stats_t *statsp; 6532 vgen_private_desc_t *tbufp; 6533 vnet_public_desc_t *txdp; 6534 vio_dring_entry_hdr_t *hdrp; 6535 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6536 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 6537 6538 DBG1(vgenp, ldcp, "enter\n"); 6539 start = dringmsg->start_idx; 6540 end = dringmsg->end_idx; 6541 statsp = &ldcp->stats; 6542 6543 /* 6544 * received an ack corresponding to a specific descriptor for 6545 * which we had set the ACK bit in the descriptor (during 6546 * transmit). This enables us to reclaim descriptors. 6547 */ 6548 6549 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); 6550 6551 /* validate start and end indeces in the tx ack msg */ 6552 if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { 6553 /* drop the message if invalid index */ 6554 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", 6555 start, end); 6556 return (rv); 6557 } 6558 /* validate dring_ident */ 6559 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 6560 /* invalid dring_ident, drop the msg */ 6561 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 6562 dringmsg->dring_ident); 6563 return (rv); 6564 } 6565 statsp->dring_data_acks++; 6566 6567 /* reclaim descriptors that are done */ 6568 vgen_reclaim(ldcp); 6569 6570 if (dringmsg->dring_process_state != VIO_DP_STOPPED) { 6571 /* 6572 * receiver continued processing descriptors after 6573 * sending us the ack. 6574 */ 6575 return (rv); 6576 } 6577 6578 statsp->dring_stopped_acks++; 6579 6580 /* receiver stopped processing descriptors */ 6581 mutex_enter(&ldcp->wrlock); 6582 mutex_enter(&ldcp->tclock); 6583 6584 /* 6585 * determine if there are any pending tx descriptors 6586 * ready to be processed by the receiver(peer) and if so, 6587 * send a message to the peer to restart receiving. 6588 */ 6589 ready_txd = B_FALSE; 6590 6591 /* 6592 * using the end index of the descriptor range for which 6593 * we received the ack, check if the next descriptor is 6594 * ready. 6595 */ 6596 txi = end; 6597 INCR_TXI(txi, ldcp); 6598 tbufp = &ldcp->tbufp[txi]; 6599 txdp = tbufp->descp; 6600 hdrp = &txdp->hdr; 6601 if (hdrp->dstate == VIO_DESC_READY) { 6602 ready_txd = B_TRUE; 6603 } else { 6604 /* 6605 * descr next to the end of ack'd descr range is not 6606 * ready. 6607 * starting from the current reclaim index, check 6608 * if any descriptor is ready. 6609 */ 6610 6611 txi = ldcp->cur_tbufp - ldcp->tbufp; 6612 tbufp = &ldcp->tbufp[txi]; 6613 6614 txdp = tbufp->descp; 6615 hdrp = &txdp->hdr; 6616 if (hdrp->dstate == VIO_DESC_READY) { 6617 ready_txd = B_TRUE; 6618 } 6619 6620 } 6621 6622 if (ready_txd) { 6623 /* 6624 * we have tx descriptor(s) ready to be 6625 * processed by the receiver. 6626 * send a message to the peer with the start index 6627 * of ready descriptors. 6628 */ 6629 rv = vgen_send_dring_data(ldcp, txi, -1); 6630 if (rv != VGEN_SUCCESS) { 6631 ldcp->resched_peer = B_TRUE; 6632 ldcp->resched_peer_txi = txi; 6633 mutex_exit(&ldcp->tclock); 6634 mutex_exit(&ldcp->wrlock); 6635 return (rv); 6636 } 6637 } else { 6638 /* 6639 * no ready tx descriptors. set the flag to send a 6640 * message to peer when tx descriptors are ready in 6641 * transmit routine. 6642 */ 6643 ldcp->resched_peer = B_TRUE; 6644 ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp; 6645 } 6646 6647 mutex_exit(&ldcp->tclock); 6648 mutex_exit(&ldcp->wrlock); 6649 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6650 return (rv); 6651 } 6652 6653 static int 6654 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6655 { 6656 int rv = 0; 6657 uint32_t start; 6658 int32_t end; 6659 uint32_t txi; 6660 vnet_public_desc_t *txdp; 6661 vio_dring_entry_hdr_t *hdrp; 6662 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6663 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 6664 6665 DBG1(vgenp, ldcp, "enter\n"); 6666 start = dringmsg->start_idx; 6667 end = dringmsg->end_idx; 6668 6669 /* 6670 * peer sent a NACK msg to indicate lost packets. 6671 * The start and end correspond to the range of descriptors 6672 * for which the peer didn't receive a dring data msg and so 6673 * didn't receive the corresponding data. 6674 */ 6675 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); 6676 6677 /* validate start and end indeces in the tx nack msg */ 6678 if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { 6679 /* drop the message if invalid index */ 6680 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", 6681 start, end); 6682 return (rv); 6683 } 6684 /* validate dring_ident */ 6685 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 6686 /* invalid dring_ident, drop the msg */ 6687 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 6688 dringmsg->dring_ident); 6689 return (rv); 6690 } 6691 mutex_enter(&ldcp->txlock); 6692 mutex_enter(&ldcp->tclock); 6693 6694 if (ldcp->next_tbufp == ldcp->cur_tbufp) { 6695 /* no busy descriptors, bogus nack ? */ 6696 mutex_exit(&ldcp->tclock); 6697 mutex_exit(&ldcp->txlock); 6698 return (rv); 6699 } 6700 6701 /* we just mark the descrs as done so they can be reclaimed */ 6702 for (txi = start; txi <= end; ) { 6703 txdp = &(ldcp->txdp[txi]); 6704 hdrp = &txdp->hdr; 6705 if (hdrp->dstate == VIO_DESC_READY) 6706 hdrp->dstate = VIO_DESC_DONE; 6707 INCR_TXI(txi, ldcp); 6708 } 6709 mutex_exit(&ldcp->tclock); 6710 mutex_exit(&ldcp->txlock); 6711 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6712 return (rv); 6713 } 6714 6715 static void 6716 vgen_reclaim(vgen_ldc_t *ldcp) 6717 { 6718 mutex_enter(&ldcp->tclock); 6719 6720 vgen_reclaim_dring(ldcp); 6721 ldcp->reclaim_lbolt = ddi_get_lbolt(); 6722 6723 mutex_exit(&ldcp->tclock); 6724 } 6725 6726 /* 6727 * transmit reclaim function. starting from the current reclaim index 6728 * look for descriptors marked DONE and reclaim the descriptor and the 6729 * corresponding buffers (tbuf). 6730 */ 6731 static void 6732 vgen_reclaim_dring(vgen_ldc_t *ldcp) 6733 { 6734 int count = 0; 6735 vnet_public_desc_t *txdp; 6736 vgen_private_desc_t *tbufp; 6737 vio_dring_entry_hdr_t *hdrp; 6738 6739 #ifdef DEBUG 6740 if (vgen_trigger_txtimeout) 6741 return; 6742 #endif 6743 6744 tbufp = ldcp->cur_tbufp; 6745 txdp = tbufp->descp; 6746 hdrp = &txdp->hdr; 6747 6748 while ((hdrp->dstate == VIO_DESC_DONE) && 6749 (tbufp != ldcp->next_tbufp)) { 6750 tbufp->flags = VGEN_PRIV_DESC_FREE; 6751 hdrp->dstate = VIO_DESC_FREE; 6752 hdrp->ack = B_FALSE; 6753 6754 tbufp = NEXTTBUF(ldcp, tbufp); 6755 txdp = tbufp->descp; 6756 hdrp = &txdp->hdr; 6757 count++; 6758 } 6759 6760 ldcp->cur_tbufp = tbufp; 6761 6762 /* 6763 * Check if mac layer should be notified to restart transmissions 6764 */ 6765 if ((ldcp->need_resched) && (count > 0)) { 6766 vio_net_tx_update_t vtx_update = 6767 ldcp->portp->vcb.vio_net_tx_update; 6768 6769 ldcp->need_resched = B_FALSE; 6770 vtx_update(ldcp->portp->vhp); 6771 } 6772 } 6773 6774 /* return the number of pending transmits for the channel */ 6775 static int 6776 vgen_num_txpending(vgen_ldc_t *ldcp) 6777 { 6778 int n; 6779 6780 if (ldcp->next_tbufp >= ldcp->cur_tbufp) { 6781 n = ldcp->next_tbufp - ldcp->cur_tbufp; 6782 } else { 6783 /* cur_tbufp > next_tbufp */ 6784 n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp); 6785 } 6786 6787 return (n); 6788 } 6789 6790 /* determine if the transmit descriptor ring is full */ 6791 static int 6792 vgen_tx_dring_full(vgen_ldc_t *ldcp) 6793 { 6794 vgen_private_desc_t *tbufp; 6795 vgen_private_desc_t *ntbufp; 6796 6797 tbufp = ldcp->next_tbufp; 6798 ntbufp = NEXTTBUF(ldcp, tbufp); 6799 if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */ 6800 return (VGEN_SUCCESS); 6801 } 6802 return (VGEN_FAILURE); 6803 } 6804 6805 /* determine if timeout condition has occured */ 6806 static int 6807 vgen_ldc_txtimeout(vgen_ldc_t *ldcp) 6808 { 6809 if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) > 6810 drv_usectohz(vnet_ldcwd_txtimeout * 1000)) && 6811 (vnet_ldcwd_txtimeout) && 6812 (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) { 6813 return (VGEN_SUCCESS); 6814 } else { 6815 return (VGEN_FAILURE); 6816 } 6817 } 6818 6819 /* transmit watchdog timeout handler */ 6820 static void 6821 vgen_ldc_watchdog(void *arg) 6822 { 6823 vgen_ldc_t *ldcp; 6824 vgen_t *vgenp; 6825 int rv; 6826 6827 ldcp = (vgen_ldc_t *)arg; 6828 vgenp = LDC_TO_VGEN(ldcp); 6829 6830 rv = vgen_ldc_txtimeout(ldcp); 6831 if (rv == VGEN_SUCCESS) { 6832 DWARN(vgenp, ldcp, "transmit timeout\n"); 6833 #ifdef DEBUG 6834 if (vgen_trigger_txtimeout) { 6835 /* tx timeout triggered for debugging */ 6836 vgen_trigger_txtimeout = 0; 6837 } 6838 #endif 6839 mutex_enter(&ldcp->cblock); 6840 vgen_ldc_reset(ldcp); 6841 mutex_exit(&ldcp->cblock); 6842 if (ldcp->need_resched) { 6843 vio_net_tx_update_t vtx_update = 6844 ldcp->portp->vcb.vio_net_tx_update; 6845 6846 ldcp->need_resched = B_FALSE; 6847 vtx_update(ldcp->portp->vhp); 6848 } 6849 } 6850 6851 ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp, 6852 drv_usectohz(vnet_ldcwd_interval * 1000)); 6853 } 6854 6855 /* handler for error messages received from the peer ldc end-point */ 6856 static void 6857 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6858 { 6859 _NOTE(ARGUNUSED(ldcp, tagp)) 6860 } 6861 6862 static int 6863 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6864 { 6865 vio_raw_data_msg_t *rmsg; 6866 vio_dring_msg_t *dmsg; 6867 uint64_t seq_num; 6868 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6869 6870 if (tagp->vio_subtype_env == VIO_DRING_DATA) { 6871 dmsg = (vio_dring_msg_t *)tagp; 6872 seq_num = dmsg->seq_num; 6873 } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { 6874 rmsg = (vio_raw_data_msg_t *)tagp; 6875 seq_num = rmsg->seq_num; 6876 } else { 6877 return (EINVAL); 6878 } 6879 6880 if (seq_num != ldcp->next_rxseq) { 6881 6882 /* seqnums don't match */ 6883 DWARN(vgenp, ldcp, 6884 "next_rxseq(0x%lx) != seq_num(0x%lx)\n", 6885 ldcp->next_rxseq, seq_num); 6886 6887 return (EINVAL); 6888 6889 } 6890 6891 ldcp->next_rxseq++; 6892 6893 return (0); 6894 } 6895 6896 /* Check if the session id in the received message is valid */ 6897 static int 6898 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6899 { 6900 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6901 6902 if (tagp->vio_sid != ldcp->peer_sid) { 6903 DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n", 6904 ldcp->peer_sid, tagp->vio_sid); 6905 return (VGEN_FAILURE); 6906 } 6907 else 6908 return (VGEN_SUCCESS); 6909 } 6910 6911 static caddr_t 6912 vgen_print_ethaddr(uint8_t *a, char *ebuf) 6913 { 6914 (void) sprintf(ebuf, 6915 "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]); 6916 return (ebuf); 6917 } 6918 6919 /* Handshake watchdog timeout handler */ 6920 static void 6921 vgen_hwatchdog(void *arg) 6922 { 6923 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 6924 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6925 6926 DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n", 6927 ldcp->hphase, ldcp->hstate); 6928 6929 mutex_enter(&ldcp->cblock); 6930 if (ldcp->cancel_htid) { 6931 ldcp->cancel_htid = 0; 6932 mutex_exit(&ldcp->cblock); 6933 return; 6934 } 6935 ldcp->htid = 0; 6936 vgen_ldc_reset(ldcp); 6937 mutex_exit(&ldcp->cblock); 6938 } 6939 6940 static void 6941 vgen_print_hparams(vgen_hparams_t *hp) 6942 { 6943 uint8_t addr[6]; 6944 char ea[6]; 6945 ldc_mem_cookie_t *dc; 6946 6947 cmn_err(CE_CONT, "version_info:\n"); 6948 cmn_err(CE_CONT, 6949 "\tver_major: %d, ver_minor: %d, dev_class: %d\n", 6950 hp->ver_major, hp->ver_minor, hp->dev_class); 6951 6952 vnet_macaddr_ultostr(hp->addr, addr); 6953 cmn_err(CE_CONT, "attr_info:\n"); 6954 cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu, 6955 vgen_print_ethaddr(addr, ea)); 6956 cmn_err(CE_CONT, 6957 "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n", 6958 hp->addr_type, hp->xfer_mode, hp->ack_freq); 6959 6960 dc = &hp->dring_cookie; 6961 cmn_err(CE_CONT, "dring_info:\n"); 6962 cmn_err(CE_CONT, 6963 "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size); 6964 cmn_err(CE_CONT, 6965 "\tldc_addr: 0x%lx, ldc_size: %ld\n", 6966 dc->addr, dc->size); 6967 cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident); 6968 } 6969 6970 static void 6971 vgen_print_ldcinfo(vgen_ldc_t *ldcp) 6972 { 6973 vgen_hparams_t *hp; 6974 6975 cmn_err(CE_CONT, "Channel Information:\n"); 6976 cmn_err(CE_CONT, 6977 "\tldc_id: 0x%lx, ldc_status: 0x%x\n", 6978 ldcp->ldc_id, ldcp->ldc_status); 6979 cmn_err(CE_CONT, 6980 "\tlocal_sid: 0x%x, peer_sid: 0x%x\n", 6981 ldcp->local_sid, ldcp->peer_sid); 6982 cmn_err(CE_CONT, 6983 "\thphase: 0x%x, hstate: 0x%x\n", 6984 ldcp->hphase, ldcp->hstate); 6985 6986 cmn_err(CE_CONT, "Local handshake params:\n"); 6987 hp = &ldcp->local_hparams; 6988 vgen_print_hparams(hp); 6989 6990 cmn_err(CE_CONT, "Peer handshake params:\n"); 6991 hp = &ldcp->peer_hparams; 6992 vgen_print_hparams(hp); 6993 } 6994 6995 /* 6996 * Send received packets up the stack. 6997 */ 6998 static void 6999 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp) 7000 { 7001 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 7002 7003 if (ldcp->rcv_thread != NULL) { 7004 ASSERT(MUTEX_HELD(&ldcp->rxlock)); 7005 mutex_exit(&ldcp->rxlock); 7006 } else { 7007 ASSERT(MUTEX_HELD(&ldcp->cblock)); 7008 mutex_exit(&ldcp->cblock); 7009 } 7010 7011 vrx_cb(ldcp->portp->vhp, bp); 7012 7013 if (ldcp->rcv_thread != NULL) { 7014 mutex_enter(&ldcp->rxlock); 7015 } else { 7016 mutex_enter(&ldcp->cblock); 7017 } 7018 } 7019 7020 /* 7021 * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data. 7022 * This thread is woken up by the LDC interrupt handler to process 7023 * LDC packets and receive data. 7024 */ 7025 static void 7026 vgen_ldc_rcv_worker(void *arg) 7027 { 7028 callb_cpr_t cprinfo; 7029 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 7030 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 7031 7032 DBG1(vgenp, ldcp, "enter\n"); 7033 CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr, 7034 "vnet_rcv_thread"); 7035 mutex_enter(&ldcp->rcv_thr_lock); 7036 while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) { 7037 7038 CALLB_CPR_SAFE_BEGIN(&cprinfo); 7039 /* 7040 * Wait until the data is received or a stop 7041 * request is received. 7042 */ 7043 while (!(ldcp->rcv_thr_flags & 7044 (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) { 7045 cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock); 7046 } 7047 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock) 7048 7049 /* 7050 * First process the stop request. 7051 */ 7052 if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) { 7053 DBG2(vgenp, ldcp, "stopped\n"); 7054 break; 7055 } 7056 ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD; 7057 ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING; 7058 mutex_exit(&ldcp->rcv_thr_lock); 7059 DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n"); 7060 vgen_handle_evt_read(ldcp); 7061 mutex_enter(&ldcp->rcv_thr_lock); 7062 ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING; 7063 } 7064 7065 /* 7066 * Update the run status and wakeup the thread that 7067 * has sent the stop request. 7068 */ 7069 ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP; 7070 ldcp->rcv_thread = NULL; 7071 CALLB_CPR_EXIT(&cprinfo); 7072 7073 thread_exit(); 7074 DBG1(vgenp, ldcp, "exit\n"); 7075 } 7076 7077 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */ 7078 static void 7079 vgen_stop_rcv_thread(vgen_ldc_t *ldcp) 7080 { 7081 kt_did_t tid = 0; 7082 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 7083 7084 DBG1(vgenp, ldcp, "enter\n"); 7085 /* 7086 * Send a stop request by setting the stop flag and 7087 * wait until the receive thread stops. 7088 */ 7089 mutex_enter(&ldcp->rcv_thr_lock); 7090 if (ldcp->rcv_thread != NULL) { 7091 tid = ldcp->rcv_thread->t_did; 7092 ldcp->rcv_thr_flags |= VGEN_WTHR_STOP; 7093 cv_signal(&ldcp->rcv_thr_cv); 7094 } 7095 mutex_exit(&ldcp->rcv_thr_lock); 7096 7097 if (tid != 0) { 7098 thread_join(tid); 7099 } 7100 DBG1(vgenp, ldcp, "exit\n"); 7101 } 7102 7103 /* 7104 * Wait for the channel rx-queue to be drained by allowing the receive 7105 * worker thread to read all messages from the rx-queue of the channel. 7106 * Assumption: further callbacks are disabled at this time. 7107 */ 7108 static void 7109 vgen_drain_rcv_thread(vgen_ldc_t *ldcp) 7110 { 7111 clock_t tm; 7112 clock_t wt; 7113 clock_t rv; 7114 7115 /* 7116 * If there is data in ldc rx queue, wait until the rx 7117 * worker thread runs and drains all msgs in the queue. 7118 */ 7119 wt = drv_usectohz(MILLISEC); 7120 7121 mutex_enter(&ldcp->rcv_thr_lock); 7122 7123 tm = ddi_get_lbolt() + wt; 7124 7125 /* 7126 * We need to check both bits - DATARCVD and PROCESSING, to be cleared. 7127 * If DATARCVD is set, that means the callback has signalled the worker 7128 * thread, but the worker hasn't started processing yet. If PROCESSING 7129 * is set, that means the thread is awake and processing. Note that the 7130 * DATARCVD state can only be seen once, as the assumption is that 7131 * further callbacks have been disabled at this point. 7132 */ 7133 while (ldcp->rcv_thr_flags & 7134 (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) { 7135 rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm); 7136 if (rv == -1) { /* timeout */ 7137 /* 7138 * Note that the only way we return is due to a timeout; 7139 * we set the new time to wait, before we go back and 7140 * check the condition. The other(unlikely) possibility 7141 * is a premature wakeup(see cv_timedwait(9F)) in which 7142 * case we just continue to use the same time to wait. 7143 */ 7144 tm = ddi_get_lbolt() + wt; 7145 } 7146 } 7147 7148 mutex_exit(&ldcp->rcv_thr_lock); 7149 } 7150 7151 /* 7152 * vgen_dds_rx -- post DDS messages to vnet. 7153 */ 7154 static int 7155 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 7156 { 7157 vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp; 7158 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 7159 7160 if (dmsg->dds_class != DDS_VNET_NIU) { 7161 DWARN(vgenp, ldcp, "Unknown DDS class, dropping"); 7162 return (EBADMSG); 7163 } 7164 vnet_dds_rx(vgenp->vnetp, dmsg); 7165 return (0); 7166 } 7167 7168 /* 7169 * vgen_dds_tx -- an interface called by vnet to send DDS messages. 7170 */ 7171 int 7172 vgen_dds_tx(void *arg, void *msg) 7173 { 7174 vgen_t *vgenp = arg; 7175 vio_dds_msg_t *dmsg = msg; 7176 vgen_portlist_t *plistp = &vgenp->vgenports; 7177 vgen_ldc_t *ldcp; 7178 vgen_ldclist_t *ldclp; 7179 int rv = EIO; 7180 7181 7182 READ_ENTER(&plistp->rwlock); 7183 ldclp = &(vgenp->vsw_portp->ldclist); 7184 READ_ENTER(&ldclp->rwlock); 7185 ldcp = ldclp->headp; 7186 if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) { 7187 goto vgen_dsend_exit; 7188 } 7189 7190 dmsg->tag.vio_sid = ldcp->local_sid; 7191 rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE); 7192 if (rv != VGEN_SUCCESS) { 7193 rv = EIO; 7194 } else { 7195 rv = 0; 7196 } 7197 7198 vgen_dsend_exit: 7199 RW_EXIT(&ldclp->rwlock); 7200 RW_EXIT(&plistp->rwlock); 7201 return (rv); 7202 7203 } 7204 7205 static void 7206 vgen_ldc_reset(vgen_ldc_t *ldcp) 7207 { 7208 vnet_t *vnetp = LDC_TO_VNET(ldcp); 7209 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 7210 7211 ASSERT(MUTEX_HELD(&ldcp->cblock)); 7212 7213 if (ldcp->need_ldc_reset == B_TRUE) { 7214 /* another thread is already in the process of resetting */ 7215 return; 7216 } 7217 7218 /* Set the flag to indicate reset is in progress */ 7219 ldcp->need_ldc_reset = B_TRUE; 7220 7221 if (ldcp->portp == vgenp->vsw_portp) { 7222 mutex_exit(&ldcp->cblock); 7223 /* 7224 * Now cleanup any HIO resources; the above flag also tells 7225 * the code that handles dds messages to drop any new msgs 7226 * that arrive while we are cleaning up and resetting the 7227 * channel. 7228 */ 7229 vnet_dds_cleanup_hio(vnetp); 7230 mutex_enter(&ldcp->cblock); 7231 } 7232 7233 vgen_handshake_retry(ldcp); 7234 } 7235 7236 #if DEBUG 7237 7238 /* 7239 * Print debug messages - set to 0xf to enable all msgs 7240 */ 7241 static void 7242 debug_printf(const char *fname, vgen_t *vgenp, 7243 vgen_ldc_t *ldcp, const char *fmt, ...) 7244 { 7245 char buf[256]; 7246 char *bufp = buf; 7247 va_list ap; 7248 7249 if ((vgenp != NULL) && (vgenp->vnetp != NULL)) { 7250 (void) sprintf(bufp, "vnet%d:", 7251 ((vnet_t *)(vgenp->vnetp))->instance); 7252 bufp += strlen(bufp); 7253 } 7254 if (ldcp != NULL) { 7255 (void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id); 7256 bufp += strlen(bufp); 7257 } 7258 (void) sprintf(bufp, "%s: ", fname); 7259 bufp += strlen(bufp); 7260 7261 va_start(ap, fmt); 7262 (void) vsprintf(bufp, fmt, ap); 7263 va_end(ap); 7264 7265 if ((ldcp == NULL) ||(vgendbg_ldcid == -1) || 7266 (vgendbg_ldcid == ldcp->ldc_id)) { 7267 cmn_err(CE_CONT, "%s\n", buf); 7268 } 7269 } 7270 #endif 7271 7272 #ifdef VNET_IOC_DEBUG 7273 7274 static void 7275 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp) 7276 { 7277 struct iocblk *iocp; 7278 vgen_port_t *portp; 7279 enum ioc_reply { 7280 IOC_INVAL = -1, /* bad, NAK with EINVAL */ 7281 IOC_ACK /* OK, just send ACK */ 7282 } status; 7283 int rv; 7284 7285 iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; 7286 iocp->ioc_error = 0; 7287 portp = (vgen_port_t *)arg; 7288 7289 if (portp == NULL) { 7290 status = IOC_INVAL; 7291 goto vgen_ioc_exit; 7292 } 7293 7294 mutex_enter(&portp->lock); 7295 7296 switch (iocp->ioc_cmd) { 7297 7298 case VNET_FORCE_LINK_DOWN: 7299 case VNET_FORCE_LINK_UP: 7300 rv = vgen_force_link_state(portp, iocp->ioc_cmd); 7301 (rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL); 7302 break; 7303 7304 default: 7305 status = IOC_INVAL; 7306 break; 7307 7308 } 7309 7310 mutex_exit(&portp->lock); 7311 7312 vgen_ioc_exit: 7313 7314 switch (status) { 7315 default: 7316 case IOC_INVAL: 7317 /* Error, reply with a NAK and EINVAL error */ 7318 miocnak(q, mp, 0, EINVAL); 7319 break; 7320 case IOC_ACK: 7321 /* OK, reply with an ACK */ 7322 miocack(q, mp, 0, 0); 7323 break; 7324 } 7325 } 7326 7327 static int 7328 vgen_force_link_state(vgen_port_t *portp, int cmd) 7329 { 7330 ldc_status_t istatus; 7331 vgen_ldclist_t *ldclp; 7332 vgen_ldc_t *ldcp; 7333 vgen_t *vgenp = portp->vgenp; 7334 int rv; 7335 7336 ldclp = &portp->ldclist; 7337 READ_ENTER(&ldclp->rwlock); 7338 7339 /* 7340 * NOTE: for now, we will assume we have a single channel. 7341 */ 7342 if (ldclp->headp == NULL) { 7343 RW_EXIT(&ldclp->rwlock); 7344 return (1); 7345 } 7346 ldcp = ldclp->headp; 7347 mutex_enter(&ldcp->cblock); 7348 7349 switch (cmd) { 7350 7351 case VNET_FORCE_LINK_DOWN: 7352 (void) ldc_down(ldcp->ldc_handle); 7353 ldcp->link_down_forced = B_TRUE; 7354 break; 7355 7356 case VNET_FORCE_LINK_UP: 7357 rv = ldc_up(ldcp->ldc_handle); 7358 if (rv != 0) { 7359 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 7360 } 7361 ldcp->link_down_forced = B_FALSE; 7362 7363 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 7364 DWARN(vgenp, ldcp, "ldc_status err\n"); 7365 } else { 7366 ldcp->ldc_status = istatus; 7367 } 7368 7369 /* if channel is already UP - restart handshake */ 7370 if (ldcp->ldc_status == LDC_UP) { 7371 vgen_handle_evt_up(ldcp); 7372 } 7373 break; 7374 7375 } 7376 7377 mutex_exit(&ldcp->cblock); 7378 RW_EXIT(&ldclp->rwlock); 7379 7380 return (0); 7381 } 7382 7383 #else 7384 7385 static void 7386 vgen_ioctl(void *arg, queue_t *q, mblk_t *mp) 7387 { 7388 vgen_port_t *portp; 7389 7390 portp = (vgen_port_t *)arg; 7391 7392 if (portp == NULL) { 7393 miocnak(q, mp, 0, EINVAL); 7394 return; 7395 } 7396 7397 miocnak(q, mp, 0, ENOTSUP); 7398 } 7399 7400 #endif 7401