1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/sysmacros.h> 30 #include <sys/param.h> 31 #include <sys/stream.h> 32 #include <sys/strsubr.h> 33 #include <sys/kmem.h> 34 #include <sys/conf.h> 35 #include <sys/devops.h> 36 #include <sys/ksynch.h> 37 #include <sys/stat.h> 38 #include <sys/modctl.h> 39 #include <sys/debug.h> 40 #include <sys/ethernet.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/strsun.h> 44 #include <sys/note.h> 45 #include <sys/mac_provider.h> 46 #include <sys/mac_ether.h> 47 #include <sys/ldc.h> 48 #include <sys/mach_descrip.h> 49 #include <sys/mdeg.h> 50 #include <net/if.h> 51 #include <sys/vnet.h> 52 #include <sys/vio_mailbox.h> 53 #include <sys/vio_common.h> 54 #include <sys/vnet_common.h> 55 #include <sys/vnet_mailbox.h> 56 #include <sys/vio_util.h> 57 #include <sys/vnet_gen.h> 58 #include <sys/atomic.h> 59 #include <sys/callb.h> 60 #include <sys/sdt.h> 61 #include <sys/intr.h> 62 #include <sys/pattr.h> 63 #include <sys/vlan.h> 64 65 /* 66 * Implementation of the mac functionality for vnet using the 67 * generic(default) transport layer of sun4v Logical Domain Channels(LDC). 68 */ 69 70 /* 71 * Function prototypes. 72 */ 73 /* vgen proxy entry points */ 74 int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 75 const uint8_t *macaddr, void **vgenhdl); 76 int vgen_uninit(void *arg); 77 int vgen_dds_tx(void *arg, void *dmsg); 78 void vgen_mod_init(void); 79 int vgen_mod_cleanup(void); 80 void vgen_mod_fini(void); 81 static int vgen_start(void *arg); 82 static void vgen_stop(void *arg); 83 static mblk_t *vgen_tx(void *arg, mblk_t *mp); 84 static int vgen_multicst(void *arg, boolean_t add, 85 const uint8_t *mca); 86 static int vgen_promisc(void *arg, boolean_t on); 87 static int vgen_unicst(void *arg, const uint8_t *mca); 88 static int vgen_stat(void *arg, uint_t stat, uint64_t *val); 89 static void vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp); 90 91 /* vgen internal functions */ 92 static int vgen_read_mdprops(vgen_t *vgenp); 93 static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); 94 static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, 95 mde_cookie_t node); 96 static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, 97 uint32_t *mtu); 98 static void vgen_detach_ports(vgen_t *vgenp); 99 static void vgen_port_detach(vgen_port_t *portp); 100 static void vgen_port_list_insert(vgen_port_t *portp); 101 static void vgen_port_list_remove(vgen_port_t *portp); 102 static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp, 103 int port_num); 104 static int vgen_mdeg_reg(vgen_t *vgenp); 105 static void vgen_mdeg_unreg(vgen_t *vgenp); 106 static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp); 107 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp); 108 static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); 109 static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp, 110 mde_cookie_t mdex); 111 static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); 112 static int vgen_port_attach(vgen_port_t *portp); 113 static void vgen_port_detach_mdeg(vgen_port_t *portp); 114 static void vgen_port_detach_mdeg(vgen_port_t *portp); 115 static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, 116 mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex); 117 static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat); 118 119 static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id); 120 static void vgen_ldc_detach(vgen_ldc_t *ldcp); 121 static int vgen_alloc_tx_ring(vgen_ldc_t *ldcp); 122 static void vgen_free_tx_ring(vgen_ldc_t *ldcp); 123 static void vgen_init_ports(vgen_t *vgenp); 124 static void vgen_port_init(vgen_port_t *portp); 125 static void vgen_uninit_ports(vgen_t *vgenp); 126 static void vgen_port_uninit(vgen_port_t *portp); 127 static void vgen_init_ldcs(vgen_port_t *portp); 128 static void vgen_uninit_ldcs(vgen_port_t *portp); 129 static int vgen_ldc_init(vgen_ldc_t *ldcp); 130 static void vgen_ldc_uninit(vgen_ldc_t *ldcp); 131 static int vgen_init_tbufs(vgen_ldc_t *ldcp); 132 static void vgen_uninit_tbufs(vgen_ldc_t *ldcp); 133 static void vgen_clobber_tbufs(vgen_ldc_t *ldcp); 134 static void vgen_clobber_rxds(vgen_ldc_t *ldcp); 135 static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat); 136 static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg); 137 static int vgen_portsend(vgen_port_t *portp, mblk_t *mp); 138 static int vgen_ldcsend(void *arg, mblk_t *mp); 139 static void vgen_ldcsend_pkt(void *arg, mblk_t *mp); 140 static int vgen_ldcsend_dring(void *arg, mblk_t *mp); 141 static void vgen_reclaim(vgen_ldc_t *ldcp); 142 static void vgen_reclaim_dring(vgen_ldc_t *ldcp); 143 static int vgen_num_txpending(vgen_ldc_t *ldcp); 144 static int vgen_tx_dring_full(vgen_ldc_t *ldcp); 145 static int vgen_ldc_txtimeout(vgen_ldc_t *ldcp); 146 static void vgen_ldc_watchdog(void *arg); 147 148 /* vgen handshake functions */ 149 static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp); 150 static int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, 151 boolean_t caller_holds_lock); 152 static int vgen_send_version_negotiate(vgen_ldc_t *ldcp); 153 static int vgen_send_attr_info(vgen_ldc_t *ldcp); 154 static int vgen_send_dring_reg(vgen_ldc_t *ldcp); 155 static int vgen_send_rdx_info(vgen_ldc_t *ldcp); 156 static int vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end); 157 static int vgen_send_mcast_info(vgen_ldc_t *ldcp); 158 static int vgen_handshake_phase2(vgen_ldc_t *ldcp); 159 static void vgen_handshake_reset(vgen_ldc_t *ldcp); 160 static void vgen_reset_hphase(vgen_ldc_t *ldcp); 161 static void vgen_handshake(vgen_ldc_t *ldcp); 162 static int vgen_handshake_done(vgen_ldc_t *ldcp); 163 static void vgen_handshake_retry(vgen_ldc_t *ldcp); 164 static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp, 165 vio_msg_tag_t *tagp); 166 static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 167 static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 168 static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 169 static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 170 static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 171 static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen); 172 static void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); 173 static int vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 174 static int vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 175 static int vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 176 static int vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 177 static int vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 178 static int vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 179 uint32_t start, int32_t end, uint8_t pstate); 180 static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, 181 uint32_t msglen); 182 static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 183 static void vgen_handle_evt_up(vgen_ldc_t *ldcp); 184 static void vgen_handle_evt_reset(vgen_ldc_t *ldcp); 185 static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 186 static int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 187 static caddr_t vgen_print_ethaddr(uint8_t *a, char *ebuf); 188 static void vgen_hwatchdog(void *arg); 189 static void vgen_print_attr_info(vgen_ldc_t *ldcp, int endpoint); 190 static void vgen_print_hparams(vgen_hparams_t *hp); 191 static void vgen_print_ldcinfo(vgen_ldc_t *ldcp); 192 static void vgen_stop_rcv_thread(vgen_ldc_t *ldcp); 193 static void vgen_drain_rcv_thread(vgen_ldc_t *ldcp); 194 static void vgen_ldc_rcv_worker(void *arg); 195 static void vgen_handle_evt_read(vgen_ldc_t *ldcp); 196 static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp); 197 static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp); 198 static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp); 199 200 /* VLAN routines */ 201 static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp, 202 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 203 uint16_t *nvidsp, uint16_t *default_idp); 204 static void vgen_vlan_create_hash(vgen_port_t *portp); 205 static void vgen_vlan_destroy_hash(vgen_port_t *portp); 206 static void vgen_vlan_add_ids(vgen_port_t *portp); 207 static void vgen_vlan_remove_ids(vgen_port_t *portp); 208 static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 209 static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, 210 uint16_t *vidp); 211 static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, 212 boolean_t is_tagged, uint16_t vid); 213 static void vgen_vlan_unaware_port_reset(vgen_port_t *portp); 214 static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp); 215 static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); 216 217 /* externs */ 218 extern void vnet_dds_rx(void *arg, void *dmsg); 219 extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); 220 221 /* 222 * The handshake process consists of 5 phases defined below, with VH_PHASE0 223 * being the pre-handshake phase and VH_DONE is the phase to indicate 224 * successful completion of all phases. 225 * Each phase may have one to several handshake states which are required 226 * to complete successfully to move to the next phase. 227 * Refer to the functions vgen_handshake() and vgen_handshake_done() for 228 * more details. 229 */ 230 /* handshake phases */ 231 enum { VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_DONE = 0x80 }; 232 233 /* handshake states */ 234 enum { 235 236 VER_INFO_SENT = 0x1, 237 VER_ACK_RCVD = 0x2, 238 VER_INFO_RCVD = 0x4, 239 VER_ACK_SENT = 0x8, 240 VER_NEGOTIATED = (VER_ACK_RCVD | VER_ACK_SENT), 241 242 ATTR_INFO_SENT = 0x10, 243 ATTR_ACK_RCVD = 0x20, 244 ATTR_INFO_RCVD = 0x40, 245 ATTR_ACK_SENT = 0x80, 246 ATTR_INFO_EXCHANGED = (ATTR_ACK_RCVD | ATTR_ACK_SENT), 247 248 DRING_INFO_SENT = 0x100, 249 DRING_ACK_RCVD = 0x200, 250 DRING_INFO_RCVD = 0x400, 251 DRING_ACK_SENT = 0x800, 252 DRING_INFO_EXCHANGED = (DRING_ACK_RCVD | DRING_ACK_SENT), 253 254 RDX_INFO_SENT = 0x1000, 255 RDX_ACK_RCVD = 0x2000, 256 RDX_INFO_RCVD = 0x4000, 257 RDX_ACK_SENT = 0x8000, 258 RDX_EXCHANGED = (RDX_ACK_RCVD | RDX_ACK_SENT) 259 260 }; 261 262 #define VGEN_PRI_ETH_DEFINED(vgenp) ((vgenp)->pri_num_types != 0) 263 264 #define LDC_LOCK(ldcp) \ 265 mutex_enter(&((ldcp)->cblock));\ 266 mutex_enter(&((ldcp)->rxlock));\ 267 mutex_enter(&((ldcp)->wrlock));\ 268 mutex_enter(&((ldcp)->txlock));\ 269 mutex_enter(&((ldcp)->tclock)); 270 #define LDC_UNLOCK(ldcp) \ 271 mutex_exit(&((ldcp)->tclock));\ 272 mutex_exit(&((ldcp)->txlock));\ 273 mutex_exit(&((ldcp)->wrlock));\ 274 mutex_exit(&((ldcp)->rxlock));\ 275 mutex_exit(&((ldcp)->cblock)); 276 277 #define VGEN_VER_EQ(ldcp, major, minor) \ 278 ((ldcp)->local_hparams.ver_major == (major) && \ 279 (ldcp)->local_hparams.ver_minor == (minor)) 280 281 #define VGEN_VER_LT(ldcp, major, minor) \ 282 (((ldcp)->local_hparams.ver_major < (major)) || \ 283 ((ldcp)->local_hparams.ver_major == (major) && \ 284 (ldcp)->local_hparams.ver_minor < (minor))) 285 286 #define VGEN_VER_GTEQ(ldcp, major, minor) \ 287 (((ldcp)->local_hparams.ver_major > (major)) || \ 288 ((ldcp)->local_hparams.ver_major == (major) && \ 289 (ldcp)->local_hparams.ver_minor >= (minor))) 290 291 static struct ether_addr etherbroadcastaddr = { 292 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 293 }; 294 /* 295 * MIB II broadcast/multicast packets 296 */ 297 #define IS_BROADCAST(ehp) \ 298 (ether_cmp(&ehp->ether_dhost, ðerbroadcastaddr) == 0) 299 #define IS_MULTICAST(ehp) \ 300 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 301 302 /* 303 * Property names 304 */ 305 static char macaddr_propname[] = "mac-address"; 306 static char rmacaddr_propname[] = "remote-mac-address"; 307 static char channel_propname[] = "channel-endpoint"; 308 static char reg_propname[] = "reg"; 309 static char port_propname[] = "port"; 310 static char swport_propname[] = "switch-port"; 311 static char id_propname[] = "id"; 312 static char vdev_propname[] = "virtual-device"; 313 static char vnet_propname[] = "network"; 314 static char pri_types_propname[] = "priority-ether-types"; 315 static char vgen_pvid_propname[] = "port-vlan-id"; 316 static char vgen_vid_propname[] = "vlan-id"; 317 static char vgen_dvid_propname[] = "default-vlan-id"; 318 static char port_pvid_propname[] = "remote-port-vlan-id"; 319 static char port_vid_propname[] = "remote-vlan-id"; 320 static char vgen_mtu_propname[] = "mtu"; 321 322 /* versions supported - in decreasing order */ 323 static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 4} }; 324 325 /* Tunables */ 326 uint32_t vgen_hwd_interval = 5; /* handshake watchdog freq in sec */ 327 uint32_t vgen_max_hretries = VNET_NUM_HANDSHAKES; /* # of handshake retries */ 328 uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */ 329 uint32_t vgen_ldcup_retries = 5; /* max # of ldc_up() retries */ 330 uint32_t vgen_ldccl_retries = 5; /* max # of ldc_close() retries */ 331 uint32_t vgen_recv_delay = 1; /* delay when rx descr not ready */ 332 uint32_t vgen_recv_retries = 10; /* retry when rx descr not ready */ 333 uint32_t vgen_tx_retries = 0x4; /* retry when tx descr not available */ 334 uint32_t vgen_tx_delay = 0x30; /* delay when tx descr not available */ 335 336 int vgen_rcv_thread_enabled = 1; /* Enable Recieve thread */ 337 338 static vio_mblk_pool_t *vgen_rx_poolp = NULL; 339 static krwlock_t vgen_rw; 340 341 /* 342 * max # of packets accumulated prior to sending them up. It is best 343 * to keep this at 60% of the number of recieve buffers. 344 */ 345 uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6); 346 347 /* 348 * Internal tunables for receive buffer pools, that is, the size and number of 349 * mblks for each pool. At least 3 sizes must be specified if these are used. 350 * The sizes must be specified in increasing order. Non-zero value of the first 351 * size will be used as a hint to use these values instead of the algorithm 352 * that determines the sizes based on MTU. 353 */ 354 uint32_t vgen_rbufsz1 = 0; 355 uint32_t vgen_rbufsz2 = 0; 356 uint32_t vgen_rbufsz3 = 0; 357 uint32_t vgen_rbufsz4 = 0; 358 359 uint32_t vgen_nrbufs1 = VGEN_NRBUFS; 360 uint32_t vgen_nrbufs2 = VGEN_NRBUFS; 361 uint32_t vgen_nrbufs3 = VGEN_NRBUFS; 362 uint32_t vgen_nrbufs4 = VGEN_NRBUFS; 363 364 /* 365 * In the absence of "priority-ether-types" property in MD, the following 366 * internal tunable can be set to specify a single priority ethertype. 367 */ 368 uint64_t vgen_pri_eth_type = 0; 369 370 /* 371 * Number of transmit priority buffers that are preallocated per device. 372 * This number is chosen to be a small value to throttle transmission 373 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 374 */ 375 uint32_t vgen_pri_tx_nmblks = 64; 376 377 uint32_t vgen_vlan_nchains = 4; /* # of chains in vlan id hash table */ 378 379 #ifdef DEBUG 380 /* flags to simulate error conditions for debugging */ 381 int vgen_trigger_txtimeout = 0; 382 int vgen_trigger_rxlost = 0; 383 #endif 384 385 /* 386 * Matching criteria passed to the MDEG to register interest 387 * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified 388 * by their 'name' and 'cfg-handle' properties. 389 */ 390 static md_prop_match_t vdev_prop_match[] = { 391 { MDET_PROP_STR, "name" }, 392 { MDET_PROP_VAL, "cfg-handle" }, 393 { MDET_LIST_END, NULL } 394 }; 395 396 static mdeg_node_match_t vdev_match = { "virtual-device", 397 vdev_prop_match }; 398 399 /* MD update matching structure */ 400 static md_prop_match_t vport_prop_match[] = { 401 { MDET_PROP_VAL, "id" }, 402 { MDET_LIST_END, NULL } 403 }; 404 405 static mdeg_node_match_t vport_match = { "virtual-device-port", 406 vport_prop_match }; 407 408 /* template for matching a particular vnet instance */ 409 static mdeg_prop_spec_t vgen_prop_template[] = { 410 { MDET_PROP_STR, "name", "network" }, 411 { MDET_PROP_VAL, "cfg-handle", NULL }, 412 { MDET_LIST_END, NULL, NULL } 413 }; 414 415 #define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val) 416 417 static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp); 418 419 static mac_callbacks_t vgen_m_callbacks = { 420 0, 421 vgen_stat, 422 vgen_start, 423 vgen_stop, 424 vgen_promisc, 425 vgen_multicst, 426 vgen_unicst, 427 vgen_tx, 428 NULL, 429 NULL, 430 NULL 431 }; 432 433 /* externs */ 434 extern pri_t maxclsyspri; 435 extern proc_t p0; 436 extern uint32_t vnet_ntxds; 437 extern uint32_t vnet_ldcwd_interval; 438 extern uint32_t vnet_ldcwd_txtimeout; 439 extern uint32_t vnet_ldc_mtu; 440 extern uint32_t vnet_nrbufs; 441 extern uint32_t vnet_ethermtu; 442 extern uint16_t vnet_default_vlan_id; 443 extern boolean_t vnet_jumbo_rxpools; 444 445 #ifdef DEBUG 446 447 extern int vnet_dbglevel; 448 static void debug_printf(const char *fname, vgen_t *vgenp, 449 vgen_ldc_t *ldcp, const char *fmt, ...); 450 451 /* -1 for all LDCs info, or ldc_id for a specific LDC info */ 452 int vgendbg_ldcid = -1; 453 454 /* simulate handshake error conditions for debug */ 455 uint32_t vgen_hdbg; 456 #define HDBG_VERSION 0x1 457 #define HDBG_TIMEOUT 0x2 458 #define HDBG_BAD_SID 0x4 459 #define HDBG_OUT_STATE 0x8 460 461 #endif 462 463 /* 464 * vgen_init() is called by an instance of vnet driver to initialize the 465 * corresponding generic proxy transport layer. The arguments passed by vnet 466 * are - an opaque pointer to the vnet instance, pointers to dev_info_t and 467 * the mac address of the vnet device, and a pointer to vgen_t is passed 468 * back as a handle to vnet. 469 */ 470 int 471 vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, 472 const uint8_t *macaddr, void **vgenhdl) 473 { 474 vgen_t *vgenp; 475 int instance; 476 int rv; 477 478 if ((vnetp == NULL) || (vnetdip == NULL)) 479 return (DDI_FAILURE); 480 481 instance = ddi_get_instance(vnetdip); 482 483 DBG1(NULL, NULL, "vnet(%d): enter\n", instance); 484 485 vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP); 486 487 vgenp->vnetp = vnetp; 488 vgenp->instance = instance; 489 vgenp->regprop = regprop; 490 vgenp->vnetdip = vnetdip; 491 bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL); 492 493 /* allocate multicast table */ 494 vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE * 495 sizeof (struct ether_addr), KM_SLEEP); 496 vgenp->mccount = 0; 497 vgenp->mcsize = VGEN_INIT_MCTAB_SIZE; 498 499 mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL); 500 rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL); 501 502 rv = vgen_read_mdprops(vgenp); 503 if (rv != 0) { 504 goto vgen_init_fail; 505 } 506 507 /* register with MD event generator */ 508 rv = vgen_mdeg_reg(vgenp); 509 if (rv != DDI_SUCCESS) { 510 goto vgen_init_fail; 511 } 512 513 *vgenhdl = (void *)vgenp; 514 515 DBG1(NULL, NULL, "vnet(%d): exit\n", instance); 516 return (DDI_SUCCESS); 517 518 vgen_init_fail: 519 rw_destroy(&vgenp->vgenports.rwlock); 520 mutex_destroy(&vgenp->lock); 521 kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE * 522 sizeof (struct ether_addr)); 523 if (VGEN_PRI_ETH_DEFINED(vgenp)) { 524 kmem_free(vgenp->pri_types, 525 sizeof (uint16_t) * vgenp->pri_num_types); 526 (void) vio_destroy_mblks(vgenp->pri_tx_vmp); 527 } 528 KMEM_FREE(vgenp); 529 return (DDI_FAILURE); 530 } 531 532 /* 533 * Called by vnet to undo the initializations done by vgen_init(). 534 * The handle provided by generic transport during vgen_init() is the argument. 535 */ 536 int 537 vgen_uninit(void *arg) 538 { 539 vgen_t *vgenp = (vgen_t *)arg; 540 vio_mblk_pool_t *rp; 541 vio_mblk_pool_t *nrp; 542 543 if (vgenp == NULL) { 544 return (DDI_FAILURE); 545 } 546 547 DBG1(vgenp, NULL, "enter\n"); 548 549 /* unregister with MD event generator */ 550 vgen_mdeg_unreg(vgenp); 551 552 mutex_enter(&vgenp->lock); 553 554 /* detach all ports from the device */ 555 vgen_detach_ports(vgenp); 556 557 /* 558 * free any pending rx mblk pools, 559 * that couldn't be freed previously during channel detach. 560 */ 561 rp = vgenp->rmp; 562 while (rp != NULL) { 563 nrp = vgenp->rmp = rp->nextp; 564 if (vio_destroy_mblks(rp)) { 565 WRITE_ENTER(&vgen_rw); 566 rp->nextp = vgen_rx_poolp; 567 vgen_rx_poolp = rp; 568 RW_EXIT(&vgen_rw); 569 } 570 rp = nrp; 571 } 572 573 /* free multicast table */ 574 kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr)); 575 576 /* free pri_types table */ 577 if (VGEN_PRI_ETH_DEFINED(vgenp)) { 578 kmem_free(vgenp->pri_types, 579 sizeof (uint16_t) * vgenp->pri_num_types); 580 (void) vio_destroy_mblks(vgenp->pri_tx_vmp); 581 } 582 583 mutex_exit(&vgenp->lock); 584 585 rw_destroy(&vgenp->vgenports.rwlock); 586 mutex_destroy(&vgenp->lock); 587 588 DBG1(vgenp, NULL, "exit\n"); 589 KMEM_FREE(vgenp); 590 591 return (DDI_SUCCESS); 592 } 593 594 /* 595 * module specific initialization common to all instances of vnet/vgen. 596 */ 597 void 598 vgen_mod_init(void) 599 { 600 rw_init(&vgen_rw, NULL, RW_DRIVER, NULL); 601 } 602 603 /* 604 * module specific cleanup common to all instances of vnet/vgen. 605 */ 606 int 607 vgen_mod_cleanup(void) 608 { 609 vio_mblk_pool_t *poolp, *npoolp; 610 611 /* 612 * If any rx mblk pools are still in use, return 613 * error and stop the module from unloading. 614 */ 615 WRITE_ENTER(&vgen_rw); 616 poolp = vgen_rx_poolp; 617 while (poolp != NULL) { 618 npoolp = vgen_rx_poolp = poolp->nextp; 619 if (vio_destroy_mblks(poolp) != 0) { 620 vgen_rx_poolp = poolp; 621 RW_EXIT(&vgen_rw); 622 return (EBUSY); 623 } 624 poolp = npoolp; 625 } 626 RW_EXIT(&vgen_rw); 627 628 return (0); 629 } 630 631 /* 632 * module specific uninitialization common to all instances of vnet/vgen. 633 */ 634 void 635 vgen_mod_fini(void) 636 { 637 rw_destroy(&vgen_rw); 638 } 639 640 /* enable transmit/receive for the device */ 641 int 642 vgen_start(void *arg) 643 { 644 vgen_port_t *portp = (vgen_port_t *)arg; 645 vgen_t *vgenp = portp->vgenp; 646 647 DBG1(vgenp, NULL, "enter\n"); 648 mutex_enter(&portp->lock); 649 vgen_port_init(portp); 650 portp->flags |= VGEN_STARTED; 651 mutex_exit(&portp->lock); 652 DBG1(vgenp, NULL, "exit\n"); 653 654 return (DDI_SUCCESS); 655 } 656 657 /* stop transmit/receive */ 658 void 659 vgen_stop(void *arg) 660 { 661 vgen_port_t *portp = (vgen_port_t *)arg; 662 vgen_t *vgenp = portp->vgenp; 663 664 DBG1(vgenp, NULL, "enter\n"); 665 666 mutex_enter(&portp->lock); 667 vgen_port_uninit(portp); 668 portp->flags &= ~(VGEN_STARTED); 669 mutex_exit(&portp->lock); 670 DBG1(vgenp, NULL, "exit\n"); 671 672 } 673 674 /* vgen transmit function */ 675 static mblk_t * 676 vgen_tx(void *arg, mblk_t *mp) 677 { 678 int i; 679 vgen_port_t *portp; 680 int status = VGEN_FAILURE; 681 682 portp = (vgen_port_t *)arg; 683 /* 684 * Retry so that we avoid reporting a failure 685 * to the upper layer. Returning a failure may cause the 686 * upper layer to go into single threaded mode there by 687 * causing performance degradation, especially for a large 688 * number of connections. 689 */ 690 for (i = 0; i < vgen_tx_retries; ) { 691 status = vgen_portsend(portp, mp); 692 if (status == VGEN_SUCCESS) { 693 break; 694 } 695 if (++i < vgen_tx_retries) 696 delay(drv_usectohz(vgen_tx_delay)); 697 } 698 if (status != VGEN_SUCCESS) { 699 /* failure */ 700 return (mp); 701 } 702 /* success */ 703 return (NULL); 704 } 705 706 /* 707 * This function provides any necessary tagging/untagging of the frames 708 * that are being transmitted over the port. It first verifies the vlan 709 * membership of the destination(port) and drops the packet if the 710 * destination doesn't belong to the given vlan. 711 * 712 * Arguments: 713 * portp: port over which the frames should be transmitted 714 * mp: frame to be transmitted 715 * is_tagged: 716 * B_TRUE: indicates frame header contains the vlan tag already. 717 * B_FALSE: indicates frame is untagged. 718 * vid: vlan in which the frame should be transmitted. 719 * 720 * Returns: 721 * Sucess: frame(mblk_t *) after doing the necessary tag/untag. 722 * Failure: NULL 723 */ 724 static mblk_t * 725 vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged, 726 uint16_t vid) 727 { 728 vgen_t *vgenp; 729 boolean_t dst_tagged; 730 int rv; 731 732 vgenp = portp->vgenp; 733 734 /* 735 * If the packet is going to a vnet: 736 * Check if the destination vnet is in the same vlan. 737 * Check the frame header if tag or untag is needed. 738 * 739 * We do not check the above conditions if the packet is going to vsw: 740 * vsw must be present implicitly in all the vlans that a vnet device 741 * is configured into; even if vsw itself is not assigned to those 742 * vlans as an interface. For instance, the packet might be destined 743 * to another vnet(indirectly through vsw) or to an external host 744 * which is in the same vlan as this vnet and vsw itself may not be 745 * present in that vlan. Similarly packets going to vsw must be 746 * always tagged(unless in the default-vlan) if not already tagged, 747 * as we do not know the final destination. This is needed because 748 * vsw must always invoke its switching function only after tagging 749 * the packet; otherwise after switching function determines the 750 * destination we cannot figure out if the destination belongs to the 751 * the same vlan that the frame originated from and if it needs tag/ 752 * untag. Note that vsw will tag the packet itself when it receives 753 * it over the channel from a client if needed. However, that is 754 * needed only in the case of vlan unaware clients such as obp or 755 * earlier versions of vnet. 756 * 757 */ 758 if (portp != vgenp->vsw_portp) { 759 /* 760 * Packet going to a vnet. Check if the destination vnet is in 761 * the same vlan. Then check the frame header if tag/untag is 762 * needed. 763 */ 764 rv = vgen_vlan_lookup(portp->vlan_hashp, vid); 765 if (rv == B_FALSE) { 766 /* drop the packet */ 767 freemsg(mp); 768 return (NULL); 769 } 770 771 /* is the destination tagged or untagged in this vlan? */ 772 (vid == portp->pvid) ? (dst_tagged = B_FALSE) : 773 (dst_tagged = B_TRUE); 774 775 if (is_tagged == dst_tagged) { 776 /* no tagging/untagging needed */ 777 return (mp); 778 } 779 780 if (is_tagged == B_TRUE) { 781 /* frame is tagged; destination needs untagged */ 782 mp = vnet_vlan_remove_tag(mp); 783 return (mp); 784 } 785 786 /* (is_tagged == B_FALSE): fallthru to tag tx packet: */ 787 } 788 789 /* 790 * Packet going to a vnet needs tagging. 791 * OR 792 * If the packet is going to vsw, then it must be tagged in all cases: 793 * unknown unicast, broadcast/multicast or to vsw interface. 794 */ 795 796 if (is_tagged == B_FALSE) { 797 mp = vnet_vlan_insert_tag(mp, vid); 798 } 799 800 return (mp); 801 } 802 803 /* transmit packets over the given port */ 804 static int 805 vgen_portsend(vgen_port_t *portp, mblk_t *mp) 806 { 807 vgen_ldclist_t *ldclp; 808 vgen_ldc_t *ldcp; 809 int status; 810 int rv = VGEN_SUCCESS; 811 vgen_t *vgenp = portp->vgenp; 812 vnet_t *vnetp = vgenp->vnetp; 813 boolean_t is_tagged; 814 boolean_t dec_refcnt = B_FALSE; 815 uint16_t vlan_id; 816 struct ether_header *ehp; 817 818 if (portp->use_vsw_port) { 819 (void) atomic_inc_32(&vgenp->vsw_port_refcnt); 820 portp = portp->vgenp->vsw_portp; 821 dec_refcnt = B_TRUE; 822 } 823 if (portp == NULL) { 824 return (VGEN_FAILURE); 825 } 826 827 /* 828 * Determine the vlan id that the frame belongs to. 829 */ 830 ehp = (struct ether_header *)mp->b_rptr; 831 is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id); 832 833 if (vlan_id == vnetp->default_vlan_id) { 834 835 /* Frames in default vlan must be untagged */ 836 ASSERT(is_tagged == B_FALSE); 837 838 /* 839 * If the destination is a vnet-port verify it belongs to the 840 * default vlan; otherwise drop the packet. We do not need 841 * this check for vsw-port, as it should implicitly belong to 842 * this vlan; see comments in vgen_vlan_frame_fixtag(). 843 */ 844 if (portp != vgenp->vsw_portp && 845 portp->pvid != vnetp->default_vlan_id) { 846 freemsg(mp); 847 goto portsend_ret; 848 } 849 850 } else { /* frame not in default-vlan */ 851 852 mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id); 853 if (mp == NULL) { 854 goto portsend_ret; 855 } 856 857 } 858 859 ldclp = &portp->ldclist; 860 READ_ENTER(&ldclp->rwlock); 861 /* 862 * NOTE: for now, we will assume we have a single channel. 863 */ 864 if (ldclp->headp == NULL) { 865 RW_EXIT(&ldclp->rwlock); 866 rv = VGEN_FAILURE; 867 goto portsend_ret; 868 } 869 ldcp = ldclp->headp; 870 871 status = ldcp->tx(ldcp, mp); 872 873 RW_EXIT(&ldclp->rwlock); 874 875 if (status != VGEN_TX_SUCCESS) { 876 rv = VGEN_FAILURE; 877 } 878 879 portsend_ret: 880 if (dec_refcnt == B_TRUE) { 881 (void) atomic_dec_32(&vgenp->vsw_port_refcnt); 882 } 883 return (rv); 884 } 885 886 /* 887 * Wrapper function to transmit normal and/or priority frames over the channel. 888 */ 889 static int 890 vgen_ldcsend(void *arg, mblk_t *mp) 891 { 892 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 893 int status; 894 struct ether_header *ehp; 895 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 896 uint32_t num_types; 897 uint16_t *types; 898 int i; 899 900 ASSERT(VGEN_PRI_ETH_DEFINED(vgenp)); 901 902 num_types = vgenp->pri_num_types; 903 types = vgenp->pri_types; 904 ehp = (struct ether_header *)mp->b_rptr; 905 906 for (i = 0; i < num_types; i++) { 907 908 if (ehp->ether_type == types[i]) { 909 /* priority frame, use pri tx function */ 910 vgen_ldcsend_pkt(ldcp, mp); 911 return (VGEN_SUCCESS); 912 } 913 914 } 915 916 status = vgen_ldcsend_dring(ldcp, mp); 917 918 return (status); 919 } 920 921 /* 922 * This functions handles ldc channel reset while in the context 923 * of transmit routines: vgen_ldcsend_pkt() or vgen_ldcsend_dring(). 924 */ 925 static void 926 vgen_ldcsend_process_reset(vgen_ldc_t *ldcp) 927 { 928 ldc_status_t istatus; 929 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 930 931 if (mutex_tryenter(&ldcp->cblock)) { 932 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 933 DWARN(vgenp, ldcp, "ldc_status() error\n"); 934 } else { 935 ldcp->ldc_status = istatus; 936 } 937 if (ldcp->ldc_status != LDC_UP) { 938 vgen_handle_evt_reset(ldcp); 939 } 940 mutex_exit(&ldcp->cblock); 941 } 942 } 943 944 /* 945 * This function transmits the frame in the payload of a raw data 946 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to 947 * send special frames with high priorities, without going through 948 * the normal data path which uses descriptor ring mechanism. 949 */ 950 static void 951 vgen_ldcsend_pkt(void *arg, mblk_t *mp) 952 { 953 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 954 vio_raw_data_msg_t *pkt; 955 mblk_t *bp; 956 mblk_t *nmp = NULL; 957 caddr_t dst; 958 uint32_t mblksz; 959 uint32_t size; 960 uint32_t nbytes; 961 int rv; 962 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 963 vgen_stats_t *statsp = &ldcp->stats; 964 965 /* drop the packet if ldc is not up or handshake is not done */ 966 if (ldcp->ldc_status != LDC_UP) { 967 (void) atomic_inc_32(&statsp->tx_pri_fail); 968 DWARN(vgenp, ldcp, "status(%d), dropping packet\n", 969 ldcp->ldc_status); 970 goto send_pkt_exit; 971 } 972 973 if (ldcp->hphase != VH_DONE) { 974 (void) atomic_inc_32(&statsp->tx_pri_fail); 975 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 976 ldcp->hphase); 977 goto send_pkt_exit; 978 } 979 980 size = msgsize(mp); 981 982 /* frame size bigger than available payload len of raw data msg ? */ 983 if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { 984 (void) atomic_inc_32(&statsp->tx_pri_fail); 985 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 986 goto send_pkt_exit; 987 } 988 989 if (size < ETHERMIN) 990 size = ETHERMIN; 991 992 /* alloc space for a raw data message */ 993 nmp = vio_allocb(vgenp->pri_tx_vmp); 994 if (nmp == NULL) { 995 (void) atomic_inc_32(&statsp->tx_pri_fail); 996 DWARN(vgenp, ldcp, "vio_allocb failed\n"); 997 goto send_pkt_exit; 998 } 999 pkt = (vio_raw_data_msg_t *)nmp->b_rptr; 1000 1001 /* copy frame into the payload of raw data message */ 1002 dst = (caddr_t)pkt->data; 1003 for (bp = mp; bp != NULL; bp = bp->b_cont) { 1004 mblksz = MBLKL(bp); 1005 bcopy(bp->b_rptr, dst, mblksz); 1006 dst += mblksz; 1007 } 1008 1009 /* setup the raw data msg */ 1010 pkt->tag.vio_msgtype = VIO_TYPE_DATA; 1011 pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 1012 pkt->tag.vio_subtype_env = VIO_PKT_DATA; 1013 pkt->tag.vio_sid = ldcp->local_sid; 1014 nbytes = VIO_PKT_DATA_HDRSIZE + size; 1015 1016 /* send the msg over ldc */ 1017 rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE); 1018 if (rv != VGEN_SUCCESS) { 1019 (void) atomic_inc_32(&statsp->tx_pri_fail); 1020 DWARN(vgenp, ldcp, "Error sending priority frame\n"); 1021 if (rv == ECONNRESET) { 1022 vgen_ldcsend_process_reset(ldcp); 1023 } 1024 goto send_pkt_exit; 1025 } 1026 1027 /* update stats */ 1028 (void) atomic_inc_64(&statsp->tx_pri_packets); 1029 (void) atomic_add_64(&statsp->tx_pri_bytes, size); 1030 1031 send_pkt_exit: 1032 if (nmp != NULL) 1033 freemsg(nmp); 1034 freemsg(mp); 1035 } 1036 1037 /* 1038 * This function transmits normal (non-priority) data frames over 1039 * the channel. It queues the frame into the transmit descriptor ring 1040 * and sends a VIO_DRING_DATA message if needed, to wake up the 1041 * peer to (re)start processing. 1042 */ 1043 static int 1044 vgen_ldcsend_dring(void *arg, mblk_t *mp) 1045 { 1046 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 1047 vgen_private_desc_t *tbufp; 1048 vgen_private_desc_t *rtbufp; 1049 vnet_public_desc_t *rtxdp; 1050 vgen_private_desc_t *ntbufp; 1051 vnet_public_desc_t *txdp; 1052 vio_dring_entry_hdr_t *hdrp; 1053 vgen_stats_t *statsp; 1054 struct ether_header *ehp; 1055 boolean_t is_bcast = B_FALSE; 1056 boolean_t is_mcast = B_FALSE; 1057 size_t mblksz; 1058 caddr_t dst; 1059 mblk_t *bp; 1060 size_t size; 1061 int rv = 0; 1062 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 1063 vgen_hparams_t *lp = &ldcp->local_hparams; 1064 1065 statsp = &ldcp->stats; 1066 size = msgsize(mp); 1067 1068 DBG1(vgenp, ldcp, "enter\n"); 1069 1070 if (ldcp->ldc_status != LDC_UP) { 1071 DWARN(vgenp, ldcp, "status(%d), dropping packet\n", 1072 ldcp->ldc_status); 1073 /* retry ldc_up() if needed */ 1074 if (ldcp->flags & CHANNEL_STARTED) 1075 (void) ldc_up(ldcp->ldc_handle); 1076 goto send_dring_exit; 1077 } 1078 1079 /* drop the packet if ldc is not up or handshake is not done */ 1080 if (ldcp->hphase != VH_DONE) { 1081 DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", 1082 ldcp->hphase); 1083 goto send_dring_exit; 1084 } 1085 1086 if (size > (size_t)lp->mtu) { 1087 DWARN(vgenp, ldcp, "invalid size(%d)\n", size); 1088 goto send_dring_exit; 1089 } 1090 if (size < ETHERMIN) 1091 size = ETHERMIN; 1092 1093 ehp = (struct ether_header *)mp->b_rptr; 1094 is_bcast = IS_BROADCAST(ehp); 1095 is_mcast = IS_MULTICAST(ehp); 1096 1097 mutex_enter(&ldcp->txlock); 1098 /* 1099 * allocate a descriptor 1100 */ 1101 tbufp = ldcp->next_tbufp; 1102 ntbufp = NEXTTBUF(ldcp, tbufp); 1103 if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */ 1104 1105 mutex_enter(&ldcp->tclock); 1106 /* Try reclaiming now */ 1107 vgen_reclaim_dring(ldcp); 1108 ldcp->reclaim_lbolt = ddi_get_lbolt(); 1109 1110 if (ntbufp == ldcp->cur_tbufp) { 1111 /* Now we are really out of tbuf/txds */ 1112 ldcp->need_resched = B_TRUE; 1113 mutex_exit(&ldcp->tclock); 1114 1115 statsp->tx_no_desc++; 1116 mutex_exit(&ldcp->txlock); 1117 1118 return (VGEN_TX_NORESOURCES); 1119 } 1120 mutex_exit(&ldcp->tclock); 1121 } 1122 /* update next available tbuf in the ring and update tx index */ 1123 ldcp->next_tbufp = ntbufp; 1124 INCR_TXI(ldcp->next_txi, ldcp); 1125 1126 /* Mark the buffer busy before releasing the lock */ 1127 tbufp->flags = VGEN_PRIV_DESC_BUSY; 1128 mutex_exit(&ldcp->txlock); 1129 1130 /* copy data into pre-allocated transmit buffer */ 1131 dst = tbufp->datap + VNET_IPALIGN; 1132 for (bp = mp; bp != NULL; bp = bp->b_cont) { 1133 mblksz = MBLKL(bp); 1134 bcopy(bp->b_rptr, dst, mblksz); 1135 dst += mblksz; 1136 } 1137 1138 tbufp->datalen = size; 1139 1140 /* initialize the corresponding public descriptor (txd) */ 1141 txdp = tbufp->descp; 1142 hdrp = &txdp->hdr; 1143 txdp->nbytes = size; 1144 txdp->ncookies = tbufp->ncookies; 1145 bcopy((tbufp->memcookie), (txdp->memcookie), 1146 tbufp->ncookies * sizeof (ldc_mem_cookie_t)); 1147 1148 mutex_enter(&ldcp->wrlock); 1149 /* 1150 * If the flags not set to BUSY, it implies that the clobber 1151 * was done while we were copying the data. In such case, 1152 * discard the packet and return. 1153 */ 1154 if (tbufp->flags != VGEN_PRIV_DESC_BUSY) { 1155 statsp->oerrors++; 1156 mutex_exit(&ldcp->wrlock); 1157 goto send_dring_exit; 1158 } 1159 hdrp->dstate = VIO_DESC_READY; 1160 1161 /* update stats */ 1162 statsp->opackets++; 1163 statsp->obytes += size; 1164 if (is_bcast) 1165 statsp->brdcstxmt++; 1166 else if (is_mcast) 1167 statsp->multixmt++; 1168 1169 /* send dring datamsg to the peer */ 1170 if (ldcp->resched_peer) { 1171 1172 rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi]; 1173 rtxdp = rtbufp->descp; 1174 1175 if (rtxdp->hdr.dstate == VIO_DESC_READY) { 1176 1177 rv = vgen_send_dring_data(ldcp, 1178 (uint32_t)ldcp->resched_peer_txi, -1); 1179 if (rv != 0) { 1180 /* error: drop the packet */ 1181 DWARN(vgenp, ldcp, "vgen_send_dring_data " 1182 "failed: rv(%d) len(%d)\n", 1183 ldcp->ldc_id, rv, size); 1184 statsp->oerrors++; 1185 } else { 1186 ldcp->resched_peer = B_FALSE; 1187 } 1188 1189 } 1190 1191 } 1192 1193 mutex_exit(&ldcp->wrlock); 1194 1195 send_dring_exit: 1196 if (rv == ECONNRESET) { 1197 vgen_ldcsend_process_reset(ldcp); 1198 } 1199 freemsg(mp); 1200 DBG1(vgenp, ldcp, "exit\n"); 1201 return (VGEN_TX_SUCCESS); 1202 } 1203 1204 /* enable/disable a multicast address */ 1205 int 1206 vgen_multicst(void *arg, boolean_t add, const uint8_t *mca) 1207 { 1208 vgen_t *vgenp; 1209 vnet_mcast_msg_t mcastmsg; 1210 vio_msg_tag_t *tagp; 1211 vgen_port_t *portp; 1212 vgen_portlist_t *plistp; 1213 vgen_ldc_t *ldcp; 1214 vgen_ldclist_t *ldclp; 1215 struct ether_addr *addrp; 1216 int rv = DDI_FAILURE; 1217 uint32_t i; 1218 1219 portp = (vgen_port_t *)arg; 1220 vgenp = portp->vgenp; 1221 1222 if (portp != vgenp->vsw_portp) { 1223 return (DDI_SUCCESS); 1224 } 1225 1226 addrp = (struct ether_addr *)mca; 1227 tagp = &mcastmsg.tag; 1228 bzero(&mcastmsg, sizeof (mcastmsg)); 1229 1230 mutex_enter(&vgenp->lock); 1231 1232 plistp = &(vgenp->vgenports); 1233 1234 READ_ENTER(&plistp->rwlock); 1235 1236 portp = vgenp->vsw_portp; 1237 if (portp == NULL) { 1238 RW_EXIT(&plistp->rwlock); 1239 mutex_exit(&vgenp->lock); 1240 return (rv); 1241 } 1242 ldclp = &portp->ldclist; 1243 1244 READ_ENTER(&ldclp->rwlock); 1245 1246 ldcp = ldclp->headp; 1247 if (ldcp == NULL) 1248 goto vgen_mcast_exit; 1249 1250 mutex_enter(&ldcp->cblock); 1251 1252 if (ldcp->hphase == VH_DONE) { 1253 /* 1254 * If handshake is done, send a msg to vsw to add/remove 1255 * the multicast address. Otherwise, we just update this 1256 * mcast address in our table and the table will be sync'd 1257 * with vsw when handshake completes. 1258 */ 1259 tagp->vio_msgtype = VIO_TYPE_CTRL; 1260 tagp->vio_subtype = VIO_SUBTYPE_INFO; 1261 tagp->vio_subtype_env = VNET_MCAST_INFO; 1262 tagp->vio_sid = ldcp->local_sid; 1263 bcopy(mca, &(mcastmsg.mca), ETHERADDRL); 1264 mcastmsg.set = add; 1265 mcastmsg.count = 1; 1266 if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg), 1267 B_FALSE) != VGEN_SUCCESS) { 1268 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 1269 mutex_exit(&ldcp->cblock); 1270 goto vgen_mcast_exit; 1271 } 1272 } 1273 1274 mutex_exit(&ldcp->cblock); 1275 1276 if (add) { 1277 1278 /* expand multicast table if necessary */ 1279 if (vgenp->mccount >= vgenp->mcsize) { 1280 struct ether_addr *newtab; 1281 uint32_t newsize; 1282 1283 1284 newsize = vgenp->mcsize * 2; 1285 1286 newtab = kmem_zalloc(newsize * 1287 sizeof (struct ether_addr), KM_NOSLEEP); 1288 if (newtab == NULL) 1289 goto vgen_mcast_exit; 1290 bcopy(vgenp->mctab, newtab, vgenp->mcsize * 1291 sizeof (struct ether_addr)); 1292 kmem_free(vgenp->mctab, 1293 vgenp->mcsize * sizeof (struct ether_addr)); 1294 1295 vgenp->mctab = newtab; 1296 vgenp->mcsize = newsize; 1297 } 1298 1299 /* add address to the table */ 1300 vgenp->mctab[vgenp->mccount++] = *addrp; 1301 1302 } else { 1303 1304 /* delete address from the table */ 1305 for (i = 0; i < vgenp->mccount; i++) { 1306 if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) { 1307 1308 /* 1309 * If there's more than one address in this 1310 * table, delete the unwanted one by moving 1311 * the last one in the list over top of it; 1312 * otherwise, just remove it. 1313 */ 1314 if (vgenp->mccount > 1) { 1315 vgenp->mctab[i] = 1316 vgenp->mctab[vgenp->mccount-1]; 1317 } 1318 vgenp->mccount--; 1319 break; 1320 } 1321 } 1322 } 1323 1324 rv = DDI_SUCCESS; 1325 1326 vgen_mcast_exit: 1327 RW_EXIT(&ldclp->rwlock); 1328 RW_EXIT(&plistp->rwlock); 1329 1330 mutex_exit(&vgenp->lock); 1331 return (rv); 1332 } 1333 1334 /* set or clear promiscuous mode on the device */ 1335 static int 1336 vgen_promisc(void *arg, boolean_t on) 1337 { 1338 _NOTE(ARGUNUSED(arg, on)) 1339 return (DDI_SUCCESS); 1340 } 1341 1342 /* set the unicast mac address of the device */ 1343 static int 1344 vgen_unicst(void *arg, const uint8_t *mca) 1345 { 1346 _NOTE(ARGUNUSED(arg, mca)) 1347 return (DDI_SUCCESS); 1348 } 1349 1350 /* get device statistics */ 1351 int 1352 vgen_stat(void *arg, uint_t stat, uint64_t *val) 1353 { 1354 vgen_port_t *portp = (vgen_port_t *)arg; 1355 1356 *val = vgen_port_stat(portp, stat); 1357 1358 return (0); 1359 } 1360 1361 static void 1362 vgen_ioctl(void *arg, queue_t *wq, mblk_t *mp) 1363 { 1364 _NOTE(ARGUNUSED(arg, wq, mp)) 1365 } 1366 1367 /* vgen internal functions */ 1368 /* detach all ports from the device */ 1369 static void 1370 vgen_detach_ports(vgen_t *vgenp) 1371 { 1372 vgen_port_t *portp; 1373 vgen_portlist_t *plistp; 1374 1375 plistp = &(vgenp->vgenports); 1376 WRITE_ENTER(&plistp->rwlock); 1377 while ((portp = plistp->headp) != NULL) { 1378 vgen_port_detach(portp); 1379 } 1380 RW_EXIT(&plistp->rwlock); 1381 } 1382 1383 /* 1384 * detach the given port. 1385 */ 1386 static void 1387 vgen_port_detach(vgen_port_t *portp) 1388 { 1389 vgen_t *vgenp; 1390 vgen_ldclist_t *ldclp; 1391 int port_num; 1392 1393 vgenp = portp->vgenp; 1394 port_num = portp->port_num; 1395 1396 DBG1(vgenp, NULL, "port(%d):enter\n", port_num); 1397 1398 /* 1399 * If this port is connected to the vswitch, then 1400 * potentially there could be ports that may be using 1401 * this port to transmit packets. To address this do 1402 * the following: 1403 * - First set vgenp->vsw_portp to NULL, so that 1404 * its not used after that. 1405 * - Then wait for the refcnt to go down to 0. 1406 * - Now we can safely detach this port. 1407 */ 1408 if (vgenp->vsw_portp == portp) { 1409 vgenp->vsw_portp = NULL; 1410 while (vgenp->vsw_port_refcnt > 0) { 1411 delay(drv_usectohz(vgen_tx_delay)); 1412 } 1413 (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0); 1414 } 1415 1416 if (portp->vhp != NULL) { 1417 vio_net_resource_unreg(portp->vhp); 1418 portp->vhp = NULL; 1419 } 1420 1421 vgen_vlan_destroy_hash(portp); 1422 1423 /* remove it from port list */ 1424 vgen_port_list_remove(portp); 1425 1426 /* detach channels from this port */ 1427 ldclp = &portp->ldclist; 1428 WRITE_ENTER(&ldclp->rwlock); 1429 while (ldclp->headp) { 1430 vgen_ldc_detach(ldclp->headp); 1431 } 1432 RW_EXIT(&ldclp->rwlock); 1433 rw_destroy(&ldclp->rwlock); 1434 1435 if (portp->num_ldcs != 0) { 1436 kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t)); 1437 portp->num_ldcs = 0; 1438 } 1439 1440 mutex_destroy(&portp->lock); 1441 KMEM_FREE(portp); 1442 1443 DBG1(vgenp, NULL, "port(%d):exit\n", port_num); 1444 } 1445 1446 /* add a port to port list */ 1447 static void 1448 vgen_port_list_insert(vgen_port_t *portp) 1449 { 1450 vgen_portlist_t *plistp; 1451 vgen_t *vgenp; 1452 1453 vgenp = portp->vgenp; 1454 plistp = &(vgenp->vgenports); 1455 1456 if (plistp->headp == NULL) { 1457 plistp->headp = portp; 1458 } else { 1459 plistp->tailp->nextp = portp; 1460 } 1461 plistp->tailp = portp; 1462 portp->nextp = NULL; 1463 } 1464 1465 /* remove a port from port list */ 1466 static void 1467 vgen_port_list_remove(vgen_port_t *portp) 1468 { 1469 vgen_port_t *prevp; 1470 vgen_port_t *nextp; 1471 vgen_portlist_t *plistp; 1472 vgen_t *vgenp; 1473 1474 vgenp = portp->vgenp; 1475 1476 plistp = &(vgenp->vgenports); 1477 1478 if (plistp->headp == NULL) 1479 return; 1480 1481 if (portp == plistp->headp) { 1482 plistp->headp = portp->nextp; 1483 if (portp == plistp->tailp) 1484 plistp->tailp = plistp->headp; 1485 } else { 1486 for (prevp = plistp->headp; 1487 ((nextp = prevp->nextp) != NULL) && (nextp != portp); 1488 prevp = nextp) 1489 ; 1490 if (nextp == portp) { 1491 prevp->nextp = portp->nextp; 1492 } 1493 if (portp == plistp->tailp) 1494 plistp->tailp = prevp; 1495 } 1496 } 1497 1498 /* lookup a port in the list based on port_num */ 1499 static vgen_port_t * 1500 vgen_port_lookup(vgen_portlist_t *plistp, int port_num) 1501 { 1502 vgen_port_t *portp = NULL; 1503 1504 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 1505 if (portp->port_num == port_num) { 1506 break; 1507 } 1508 } 1509 1510 return (portp); 1511 } 1512 1513 /* enable ports for transmit/receive */ 1514 static void 1515 vgen_init_ports(vgen_t *vgenp) 1516 { 1517 vgen_port_t *portp; 1518 vgen_portlist_t *plistp; 1519 1520 plistp = &(vgenp->vgenports); 1521 READ_ENTER(&plistp->rwlock); 1522 1523 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 1524 vgen_port_init(portp); 1525 } 1526 1527 RW_EXIT(&plistp->rwlock); 1528 } 1529 1530 static void 1531 vgen_port_init(vgen_port_t *portp) 1532 { 1533 /* Add the port to the specified vlans */ 1534 vgen_vlan_add_ids(portp); 1535 1536 /* Bring up the channels of this port */ 1537 vgen_init_ldcs(portp); 1538 } 1539 1540 /* disable transmit/receive on ports */ 1541 static void 1542 vgen_uninit_ports(vgen_t *vgenp) 1543 { 1544 vgen_port_t *portp; 1545 vgen_portlist_t *plistp; 1546 1547 plistp = &(vgenp->vgenports); 1548 READ_ENTER(&plistp->rwlock); 1549 1550 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 1551 vgen_port_uninit(portp); 1552 } 1553 1554 RW_EXIT(&plistp->rwlock); 1555 } 1556 1557 static void 1558 vgen_port_uninit(vgen_port_t *portp) 1559 { 1560 vgen_uninit_ldcs(portp); 1561 1562 /* remove the port from vlans it has been assigned to */ 1563 vgen_vlan_remove_ids(portp); 1564 } 1565 1566 /* 1567 * Scan the machine description for this instance of vnet 1568 * and read its properties. Called only from vgen_init(). 1569 * Returns: 0 on success, 1 on failure. 1570 */ 1571 static int 1572 vgen_read_mdprops(vgen_t *vgenp) 1573 { 1574 vnet_t *vnetp = vgenp->vnetp; 1575 md_t *mdp = NULL; 1576 mde_cookie_t rootnode; 1577 mde_cookie_t *listp = NULL; 1578 uint64_t cfgh; 1579 char *name; 1580 int rv = 1; 1581 int num_nodes = 0; 1582 int num_devs = 0; 1583 int listsz = 0; 1584 int i; 1585 1586 if ((mdp = md_get_handle()) == NULL) { 1587 return (rv); 1588 } 1589 1590 num_nodes = md_node_count(mdp); 1591 ASSERT(num_nodes > 0); 1592 1593 listsz = num_nodes * sizeof (mde_cookie_t); 1594 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1595 1596 rootnode = md_root_node(mdp); 1597 1598 /* search for all "virtual_device" nodes */ 1599 num_devs = md_scan_dag(mdp, rootnode, 1600 md_find_name(mdp, vdev_propname), 1601 md_find_name(mdp, "fwd"), listp); 1602 if (num_devs <= 0) { 1603 goto vgen_readmd_exit; 1604 } 1605 1606 /* 1607 * Now loop through the list of virtual-devices looking for 1608 * devices with name "network" and for each such device compare 1609 * its instance with what we have from the 'reg' property to 1610 * find the right node in MD and then read all its properties. 1611 */ 1612 for (i = 0; i < num_devs; i++) { 1613 1614 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1615 goto vgen_readmd_exit; 1616 } 1617 1618 /* is this a "network" device? */ 1619 if (strcmp(name, vnet_propname) != 0) 1620 continue; 1621 1622 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1623 goto vgen_readmd_exit; 1624 } 1625 1626 /* is this the required instance of vnet? */ 1627 if (vgenp->regprop != cfgh) 1628 continue; 1629 1630 /* 1631 * Read the mtu. Note that we set the mtu of vnet device within 1632 * this routine itself, after validating the range. 1633 */ 1634 vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu); 1635 if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) { 1636 vnetp->mtu = ETHERMTU; 1637 } 1638 vgenp->max_frame_size = vnetp->mtu + 1639 sizeof (struct ether_header) + VLAN_TAGSZ; 1640 1641 /* read priority ether types */ 1642 vgen_read_pri_eth_types(vgenp, mdp, listp[i]); 1643 1644 /* read vlan id properties of this vnet instance */ 1645 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i], 1646 &vnetp->pvid, &vnetp->vids, &vnetp->nvids, 1647 &vnetp->default_vlan_id); 1648 1649 rv = 0; 1650 break; 1651 } 1652 1653 vgen_readmd_exit: 1654 1655 kmem_free(listp, listsz); 1656 (void) md_fini_handle(mdp); 1657 return (rv); 1658 } 1659 1660 /* 1661 * Read vlan id properties of the given MD node. 1662 * Arguments: 1663 * arg: device argument(vnet device or a port) 1664 * type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port) 1665 * mdp: machine description 1666 * node: md node cookie 1667 * 1668 * Returns: 1669 * pvidp: port-vlan-id of the node 1670 * vidspp: list of vlan-ids of the node 1671 * nvidsp: # of vlan-ids in the list 1672 * default_idp: default-vlan-id of the node(if node is vnet device) 1673 */ 1674 static void 1675 vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1676 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1677 uint16_t *default_idp) 1678 { 1679 vgen_t *vgenp; 1680 vnet_t *vnetp; 1681 vgen_port_t *portp; 1682 char *pvid_propname; 1683 char *vid_propname; 1684 uint_t nvids; 1685 uint32_t vids_size; 1686 int rv; 1687 int i; 1688 uint64_t *data; 1689 uint64_t val; 1690 int size; 1691 int inst; 1692 1693 if (type == VGEN_LOCAL) { 1694 1695 vgenp = (vgen_t *)arg; 1696 vnetp = vgenp->vnetp; 1697 pvid_propname = vgen_pvid_propname; 1698 vid_propname = vgen_vid_propname; 1699 inst = vnetp->instance; 1700 1701 } else if (type == VGEN_PEER) { 1702 1703 portp = (vgen_port_t *)arg; 1704 vgenp = portp->vgenp; 1705 vnetp = vgenp->vnetp; 1706 pvid_propname = port_pvid_propname; 1707 vid_propname = port_vid_propname; 1708 inst = portp->port_num; 1709 1710 } else { 1711 return; 1712 } 1713 1714 if (type == VGEN_LOCAL && default_idp != NULL) { 1715 rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val); 1716 if (rv != 0) { 1717 DWARN(vgenp, NULL, "prop(%s) not found", 1718 vgen_dvid_propname); 1719 1720 *default_idp = vnet_default_vlan_id; 1721 } else { 1722 *default_idp = val & 0xFFF; 1723 DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname, 1724 inst, *default_idp); 1725 } 1726 } 1727 1728 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1729 if (rv != 0) { 1730 DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname); 1731 *pvidp = vnet_default_vlan_id; 1732 } else { 1733 1734 *pvidp = val & 0xFFF; 1735 DBG2(vgenp, NULL, "%s(%d): (%d)\n", 1736 pvid_propname, inst, *pvidp); 1737 } 1738 1739 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1740 &size); 1741 if (rv != 0) { 1742 DBG2(vgenp, NULL, "prop(%s) not found", vid_propname); 1743 size = 0; 1744 } else { 1745 size /= sizeof (uint64_t); 1746 } 1747 nvids = size; 1748 1749 if (nvids != 0) { 1750 DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst); 1751 vids_size = sizeof (uint16_t) * nvids; 1752 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1753 for (i = 0; i < nvids; i++) { 1754 (*vidspp)[i] = data[i] & 0xFFFF; 1755 DBG2(vgenp, NULL, " %d ", (*vidspp)[i]); 1756 } 1757 DBG2(vgenp, NULL, "\n"); 1758 } 1759 1760 *nvidsp = nvids; 1761 } 1762 1763 /* 1764 * Create a vlan id hash table for the given port. 1765 */ 1766 static void 1767 vgen_vlan_create_hash(vgen_port_t *portp) 1768 { 1769 char hashname[MAXNAMELEN]; 1770 1771 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 1772 portp->port_num); 1773 1774 portp->vlan_nchains = vgen_vlan_nchains; 1775 portp->vlan_hashp = mod_hash_create_idhash(hashname, 1776 portp->vlan_nchains, mod_hash_null_valdtor); 1777 } 1778 1779 /* 1780 * Destroy the vlan id hash table in the given port. 1781 */ 1782 static void 1783 vgen_vlan_destroy_hash(vgen_port_t *portp) 1784 { 1785 if (portp->vlan_hashp != NULL) { 1786 mod_hash_destroy_hash(portp->vlan_hashp); 1787 portp->vlan_hashp = NULL; 1788 portp->vlan_nchains = 0; 1789 } 1790 } 1791 1792 /* 1793 * Add a port to the vlans specified in its port properites. 1794 */ 1795 static void 1796 vgen_vlan_add_ids(vgen_port_t *portp) 1797 { 1798 int rv; 1799 int i; 1800 1801 rv = mod_hash_insert(portp->vlan_hashp, 1802 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1803 (mod_hash_val_t)B_TRUE); 1804 ASSERT(rv == 0); 1805 1806 for (i = 0; i < portp->nvids; i++) { 1807 rv = mod_hash_insert(portp->vlan_hashp, 1808 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1809 (mod_hash_val_t)B_TRUE); 1810 ASSERT(rv == 0); 1811 } 1812 } 1813 1814 /* 1815 * Remove a port from the vlans it has been assigned to. 1816 */ 1817 static void 1818 vgen_vlan_remove_ids(vgen_port_t *portp) 1819 { 1820 int rv; 1821 int i; 1822 mod_hash_val_t vp; 1823 1824 rv = mod_hash_remove(portp->vlan_hashp, 1825 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1826 (mod_hash_val_t *)&vp); 1827 ASSERT(rv == 0); 1828 1829 for (i = 0; i < portp->nvids; i++) { 1830 rv = mod_hash_remove(portp->vlan_hashp, 1831 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1832 (mod_hash_val_t *)&vp); 1833 ASSERT(rv == 0); 1834 } 1835 } 1836 1837 /* 1838 * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame, 1839 * then the vlan-id is available in the tag; otherwise, its vlan id is 1840 * implicitly obtained from the port-vlan-id of the vnet device. 1841 * The vlan id determined is returned in vidp. 1842 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1843 */ 1844 static boolean_t 1845 vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp) 1846 { 1847 struct ether_vlan_header *evhp; 1848 1849 /* If it's a tagged frame, get the vlan id from vlan header */ 1850 if (ehp->ether_type == ETHERTYPE_VLAN) { 1851 1852 evhp = (struct ether_vlan_header *)ehp; 1853 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1854 return (B_TRUE); 1855 } 1856 1857 /* Untagged frame, vlan-id is the pvid of vnet device */ 1858 *vidp = vnetp->pvid; 1859 return (B_FALSE); 1860 } 1861 1862 /* 1863 * Find the given vlan id in the hash table. 1864 * Return: B_TRUE if the id is found; B_FALSE if not found. 1865 */ 1866 static boolean_t 1867 vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1868 { 1869 int rv; 1870 mod_hash_val_t vp; 1871 1872 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1873 1874 if (rv != 0) 1875 return (B_FALSE); 1876 1877 return (B_TRUE); 1878 } 1879 1880 /* 1881 * This function reads "priority-ether-types" property from md. This property 1882 * is used to enable support for priority frames. Applications which need 1883 * guaranteed and timely delivery of certain high priority frames to/from 1884 * a vnet or vsw within ldoms, should configure this property by providing 1885 * the ether type(s) for which the priority facility is needed. 1886 * Normal data frames are delivered over a ldc channel using the descriptor 1887 * ring mechanism which is constrained by factors such as descriptor ring size, 1888 * the rate at which the ring is processed at the peer ldc end point, etc. 1889 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1890 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1891 * descriptor ring path and enables a more reliable and timely delivery of 1892 * frames to the peer. 1893 */ 1894 static void 1895 vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node) 1896 { 1897 int rv; 1898 uint16_t *types; 1899 uint64_t *data; 1900 int size; 1901 int i; 1902 size_t mblk_sz; 1903 1904 rv = md_get_prop_data(mdp, node, pri_types_propname, 1905 (uint8_t **)&data, &size); 1906 if (rv != 0) { 1907 /* 1908 * Property may not exist if we are running pre-ldoms1.1 f/w. 1909 * Check if 'vgen_pri_eth_type' has been set in that case. 1910 */ 1911 if (vgen_pri_eth_type != 0) { 1912 size = sizeof (vgen_pri_eth_type); 1913 data = &vgen_pri_eth_type; 1914 } else { 1915 DBG2(vgenp, NULL, 1916 "prop(%s) not found", pri_types_propname); 1917 size = 0; 1918 } 1919 } 1920 1921 if (size == 0) { 1922 vgenp->pri_num_types = 0; 1923 return; 1924 } 1925 1926 /* 1927 * we have some priority-ether-types defined; 1928 * allocate a table of these types and also 1929 * allocate a pool of mblks to transmit these 1930 * priority packets. 1931 */ 1932 size /= sizeof (uint64_t); 1933 vgenp->pri_num_types = size; 1934 vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1935 for (i = 0, types = vgenp->pri_types; i < size; i++) { 1936 types[i] = data[i] & 0xFFFF; 1937 } 1938 mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7; 1939 (void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, 1940 &vgenp->pri_tx_vmp); 1941 } 1942 1943 static void 1944 vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1945 { 1946 int rv; 1947 uint64_t val; 1948 char *mtu_propname; 1949 1950 mtu_propname = vgen_mtu_propname; 1951 1952 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 1953 if (rv != 0) { 1954 DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname); 1955 *mtu = vnet_ethermtu; 1956 } else { 1957 1958 *mtu = val & 0xFFFF; 1959 DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname, 1960 vgenp->instance, *mtu); 1961 } 1962 } 1963 1964 /* register with MD event generator */ 1965 static int 1966 vgen_mdeg_reg(vgen_t *vgenp) 1967 { 1968 mdeg_prop_spec_t *pspecp; 1969 mdeg_node_spec_t *parentp; 1970 uint_t templatesz; 1971 int rv; 1972 mdeg_handle_t dev_hdl = NULL; 1973 mdeg_handle_t port_hdl = NULL; 1974 1975 templatesz = sizeof (vgen_prop_template); 1976 pspecp = kmem_zalloc(templatesz, KM_NOSLEEP); 1977 if (pspecp == NULL) { 1978 return (DDI_FAILURE); 1979 } 1980 parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP); 1981 if (parentp == NULL) { 1982 kmem_free(pspecp, templatesz); 1983 return (DDI_FAILURE); 1984 } 1985 1986 bcopy(vgen_prop_template, pspecp, templatesz); 1987 1988 /* 1989 * NOTE: The instance here refers to the value of "reg" property and 1990 * not the dev_info instance (ddi_get_instance()) of vnet. 1991 */ 1992 VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop); 1993 1994 parentp->namep = "virtual-device"; 1995 parentp->specp = pspecp; 1996 1997 /* save parentp in vgen_t */ 1998 vgenp->mdeg_parentp = parentp; 1999 2000 /* 2001 * Register an interest in 'virtual-device' nodes with a 2002 * 'name' property of 'network' 2003 */ 2004 rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl); 2005 if (rv != MDEG_SUCCESS) { 2006 DERR(vgenp, NULL, "mdeg_register failed\n"); 2007 goto mdeg_reg_fail; 2008 } 2009 2010 /* Register an interest in 'port' nodes */ 2011 rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp, 2012 &port_hdl); 2013 if (rv != MDEG_SUCCESS) { 2014 DERR(vgenp, NULL, "mdeg_register failed\n"); 2015 goto mdeg_reg_fail; 2016 } 2017 2018 /* save mdeg handle in vgen_t */ 2019 vgenp->mdeg_dev_hdl = dev_hdl; 2020 vgenp->mdeg_port_hdl = port_hdl; 2021 2022 return (DDI_SUCCESS); 2023 2024 mdeg_reg_fail: 2025 if (dev_hdl != NULL) { 2026 (void) mdeg_unregister(dev_hdl); 2027 } 2028 KMEM_FREE(parentp); 2029 kmem_free(pspecp, templatesz); 2030 vgenp->mdeg_parentp = NULL; 2031 return (DDI_FAILURE); 2032 } 2033 2034 /* unregister with MD event generator */ 2035 static void 2036 vgen_mdeg_unreg(vgen_t *vgenp) 2037 { 2038 (void) mdeg_unregister(vgenp->mdeg_dev_hdl); 2039 (void) mdeg_unregister(vgenp->mdeg_port_hdl); 2040 kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template)); 2041 KMEM_FREE(vgenp->mdeg_parentp); 2042 vgenp->mdeg_parentp = NULL; 2043 vgenp->mdeg_dev_hdl = NULL; 2044 vgenp->mdeg_port_hdl = NULL; 2045 } 2046 2047 /* mdeg callback function for the port node */ 2048 static int 2049 vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp) 2050 { 2051 int idx; 2052 int vsw_idx = -1; 2053 uint64_t val; 2054 vgen_t *vgenp; 2055 2056 if ((resp == NULL) || (cb_argp == NULL)) { 2057 return (MDEG_FAILURE); 2058 } 2059 2060 vgenp = (vgen_t *)cb_argp; 2061 DBG1(vgenp, NULL, "enter\n"); 2062 2063 mutex_enter(&vgenp->lock); 2064 2065 DBG1(vgenp, NULL, "ports: removed(%x), " 2066 "added(%x), updated(%x)\n", resp->removed.nelem, 2067 resp->added.nelem, resp->match_curr.nelem); 2068 2069 for (idx = 0; idx < resp->removed.nelem; idx++) { 2070 (void) vgen_remove_port(vgenp, resp->removed.mdp, 2071 resp->removed.mdep[idx]); 2072 } 2073 2074 if (vgenp->vsw_portp == NULL) { 2075 /* 2076 * find vsw_port and add it first, because other ports need 2077 * this when adding fdb entry (see vgen_port_init()). 2078 */ 2079 for (idx = 0; idx < resp->added.nelem; idx++) { 2080 if (!(md_get_prop_val(resp->added.mdp, 2081 resp->added.mdep[idx], swport_propname, &val))) { 2082 if (val == 0) { 2083 /* 2084 * This port is connected to the 2085 * vsw on service domain. 2086 */ 2087 vsw_idx = idx; 2088 if (vgen_add_port(vgenp, 2089 resp->added.mdp, 2090 resp->added.mdep[idx]) != 2091 DDI_SUCCESS) { 2092 cmn_err(CE_NOTE, "vnet%d Could " 2093 "not initialize virtual " 2094 "switch port.", 2095 vgenp->instance); 2096 mutex_exit(&vgenp->lock); 2097 return (MDEG_FAILURE); 2098 } 2099 break; 2100 } 2101 } 2102 } 2103 if (vsw_idx == -1) { 2104 DWARN(vgenp, NULL, "can't find vsw_port\n"); 2105 mutex_exit(&vgenp->lock); 2106 return (MDEG_FAILURE); 2107 } 2108 } 2109 2110 for (idx = 0; idx < resp->added.nelem; idx++) { 2111 if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */ 2112 continue; 2113 2114 /* If this port can't be added just skip it. */ 2115 (void) vgen_add_port(vgenp, resp->added.mdp, 2116 resp->added.mdep[idx]); 2117 } 2118 2119 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 2120 (void) vgen_update_port(vgenp, resp->match_curr.mdp, 2121 resp->match_curr.mdep[idx], 2122 resp->match_prev.mdp, 2123 resp->match_prev.mdep[idx]); 2124 } 2125 2126 mutex_exit(&vgenp->lock); 2127 DBG1(vgenp, NULL, "exit\n"); 2128 return (MDEG_SUCCESS); 2129 } 2130 2131 /* mdeg callback function for the vnet node */ 2132 static int 2133 vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 2134 { 2135 vgen_t *vgenp; 2136 vnet_t *vnetp; 2137 md_t *mdp; 2138 mde_cookie_t node; 2139 uint64_t inst; 2140 char *node_name = NULL; 2141 2142 if ((resp == NULL) || (cb_argp == NULL)) { 2143 return (MDEG_FAILURE); 2144 } 2145 2146 vgenp = (vgen_t *)cb_argp; 2147 vnetp = vgenp->vnetp; 2148 2149 DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d" 2150 " : prev matched %d", resp->added.nelem, resp->removed.nelem, 2151 resp->match_curr.nelem, resp->match_prev.nelem); 2152 2153 mutex_enter(&vgenp->lock); 2154 2155 /* 2156 * We get an initial callback for this node as 'added' after 2157 * registering with mdeg. Note that we would have already gathered 2158 * information about this vnet node by walking MD earlier during attach 2159 * (in vgen_read_mdprops()). So, there is a window where the properties 2160 * of this node might have changed when we get this initial 'added' 2161 * callback. We handle this as if an update occured and invoke the same 2162 * function which handles updates to the properties of this vnet-node 2163 * if any. A non-zero 'match' value indicates that the MD has been 2164 * updated and that a 'network' node is present which may or may not 2165 * have been updated. It is up to the clients to examine their own 2166 * nodes and determine if they have changed. 2167 */ 2168 if (resp->added.nelem != 0) { 2169 2170 if (resp->added.nelem != 1) { 2171 cmn_err(CE_NOTE, "!vnet%d: number of nodes added " 2172 "invalid: %d\n", vnetp->instance, 2173 resp->added.nelem); 2174 goto vgen_mdeg_cb_err; 2175 } 2176 2177 mdp = resp->added.mdp; 2178 node = resp->added.mdep[0]; 2179 2180 } else if (resp->match_curr.nelem != 0) { 2181 2182 if (resp->match_curr.nelem != 1) { 2183 cmn_err(CE_NOTE, "!vnet%d: number of nodes updated " 2184 "invalid: %d\n", vnetp->instance, 2185 resp->match_curr.nelem); 2186 goto vgen_mdeg_cb_err; 2187 } 2188 2189 mdp = resp->match_curr.mdp; 2190 node = resp->match_curr.mdep[0]; 2191 2192 } else { 2193 goto vgen_mdeg_cb_err; 2194 } 2195 2196 /* Validate name and instance */ 2197 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 2198 DERR(vgenp, NULL, "unable to get node name\n"); 2199 goto vgen_mdeg_cb_err; 2200 } 2201 2202 /* is this a virtual-network device? */ 2203 if (strcmp(node_name, vnet_propname) != 0) { 2204 DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name); 2205 goto vgen_mdeg_cb_err; 2206 } 2207 2208 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 2209 DERR(vgenp, NULL, "prop(cfg-handle) not found\n"); 2210 goto vgen_mdeg_cb_err; 2211 } 2212 2213 /* is this the right instance of vnet? */ 2214 if (inst != vgenp->regprop) { 2215 DERR(vgenp, NULL, "Invalid cfg-handle: %lx\n", inst); 2216 goto vgen_mdeg_cb_err; 2217 } 2218 2219 vgen_update_md_prop(vgenp, mdp, node); 2220 2221 mutex_exit(&vgenp->lock); 2222 return (MDEG_SUCCESS); 2223 2224 vgen_mdeg_cb_err: 2225 mutex_exit(&vgenp->lock); 2226 return (MDEG_FAILURE); 2227 } 2228 2229 /* 2230 * Check to see if the relevant properties in the specified node have 2231 * changed, and if so take the appropriate action. 2232 */ 2233 static void 2234 vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) 2235 { 2236 uint16_t pvid; 2237 uint16_t *vids; 2238 uint16_t nvids; 2239 vnet_t *vnetp = vgenp->vnetp; 2240 uint32_t mtu; 2241 enum { MD_init = 0x1, 2242 MD_vlans = 0x2, 2243 MD_mtu = 0x4 } updated; 2244 int rv; 2245 2246 updated = MD_init; 2247 2248 /* Read the vlan ids */ 2249 vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids, 2250 &nvids, NULL); 2251 2252 /* Determine if there are any vlan id updates */ 2253 if ((pvid != vnetp->pvid) || /* pvid changed? */ 2254 (nvids != vnetp->nvids) || /* # of vids changed? */ 2255 ((nvids != 0) && (vnetp->nvids != 0) && /* vids changed? */ 2256 bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) { 2257 updated |= MD_vlans; 2258 } 2259 2260 /* Read mtu */ 2261 vgen_mtu_read(vgenp, mdp, mdex, &mtu); 2262 if (mtu != vnetp->mtu) { 2263 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2264 updated |= MD_mtu; 2265 } else { 2266 cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update" 2267 " as the specified value:%d is invalid\n", 2268 vnetp->instance, mtu); 2269 } 2270 } 2271 2272 /* Now process the updated props */ 2273 2274 if (updated & MD_vlans) { 2275 2276 /* save the new vlan ids */ 2277 vnetp->pvid = pvid; 2278 if (vnetp->nvids != 0) { 2279 kmem_free(vnetp->vids, 2280 sizeof (uint16_t) * vnetp->nvids); 2281 vnetp->nvids = 0; 2282 } 2283 if (nvids != 0) { 2284 vnetp->nvids = nvids; 2285 vnetp->vids = vids; 2286 } 2287 2288 /* reset vlan-unaware peers (ver < 1.3) and restart handshake */ 2289 vgen_reset_vlan_unaware_ports(vgenp); 2290 2291 } else { 2292 2293 if (nvids != 0) { 2294 kmem_free(vids, sizeof (uint16_t) * nvids); 2295 } 2296 } 2297 2298 if (updated & MD_mtu) { 2299 2300 DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n", 2301 vnetp->mtu, mtu); 2302 2303 rv = vnet_mtu_update(vnetp, mtu); 2304 if (rv == 0) { 2305 vgenp->max_frame_size = mtu + 2306 sizeof (struct ether_header) + VLAN_TAGSZ; 2307 } 2308 } 2309 } 2310 2311 /* add a new port to the device */ 2312 static int 2313 vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) 2314 { 2315 vgen_port_t *portp; 2316 int rv; 2317 2318 portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP); 2319 2320 rv = vgen_port_read_props(portp, vgenp, mdp, mdex); 2321 if (rv != DDI_SUCCESS) { 2322 KMEM_FREE(portp); 2323 return (DDI_FAILURE); 2324 } 2325 2326 rv = vgen_port_attach(portp); 2327 if (rv != DDI_SUCCESS) { 2328 return (DDI_FAILURE); 2329 } 2330 2331 return (DDI_SUCCESS); 2332 } 2333 2334 /* read properties of the port from its md node */ 2335 static int 2336 vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp, 2337 mde_cookie_t mdex) 2338 { 2339 uint64_t port_num; 2340 uint64_t *ldc_ids; 2341 uint64_t macaddr; 2342 uint64_t val; 2343 int num_ldcs; 2344 int i; 2345 int addrsz; 2346 int num_nodes = 0; 2347 int listsz = 0; 2348 mde_cookie_t *listp = NULL; 2349 uint8_t *addrp; 2350 struct ether_addr ea; 2351 2352 /* read "id" property to get the port number */ 2353 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) { 2354 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2355 return (DDI_FAILURE); 2356 } 2357 2358 /* 2359 * Find the channel endpoint node(s) under this port node. 2360 */ 2361 if ((num_nodes = md_node_count(mdp)) <= 0) { 2362 DWARN(vgenp, NULL, "invalid number of nodes found (%d)", 2363 num_nodes); 2364 return (DDI_FAILURE); 2365 } 2366 2367 /* allocate space for node list */ 2368 listsz = num_nodes * sizeof (mde_cookie_t); 2369 listp = kmem_zalloc(listsz, KM_NOSLEEP); 2370 if (listp == NULL) 2371 return (DDI_FAILURE); 2372 2373 num_ldcs = md_scan_dag(mdp, mdex, 2374 md_find_name(mdp, channel_propname), 2375 md_find_name(mdp, "fwd"), listp); 2376 2377 if (num_ldcs <= 0) { 2378 DWARN(vgenp, NULL, "can't find %s nodes", channel_propname); 2379 kmem_free(listp, listsz); 2380 return (DDI_FAILURE); 2381 } 2382 2383 DBG2(vgenp, NULL, "num_ldcs %d", num_ldcs); 2384 2385 ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP); 2386 if (ldc_ids == NULL) { 2387 kmem_free(listp, listsz); 2388 return (DDI_FAILURE); 2389 } 2390 2391 for (i = 0; i < num_ldcs; i++) { 2392 /* read channel ids */ 2393 if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) { 2394 DWARN(vgenp, NULL, "prop(%s) not found\n", 2395 id_propname); 2396 kmem_free(listp, listsz); 2397 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2398 return (DDI_FAILURE); 2399 } 2400 DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]); 2401 } 2402 2403 kmem_free(listp, listsz); 2404 2405 if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp, 2406 &addrsz)) { 2407 DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname); 2408 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2409 return (DDI_FAILURE); 2410 } 2411 2412 if (addrsz < ETHERADDRL) { 2413 DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz); 2414 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2415 return (DDI_FAILURE); 2416 } 2417 2418 macaddr = *((uint64_t *)addrp); 2419 2420 DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr); 2421 2422 for (i = ETHERADDRL - 1; i >= 0; i--) { 2423 ea.ether_addr_octet[i] = macaddr & 0xFF; 2424 macaddr >>= 8; 2425 } 2426 2427 if (vgenp->vsw_portp == NULL) { 2428 if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) { 2429 if (val == 0) { 2430 (void) atomic_swap_32( 2431 &vgenp->vsw_port_refcnt, 0); 2432 /* This port is connected to the vsw */ 2433 vgenp->vsw_portp = portp; 2434 } 2435 } 2436 } 2437 2438 /* now update all properties into the port */ 2439 portp->vgenp = vgenp; 2440 portp->port_num = port_num; 2441 ether_copy(&ea, &portp->macaddr); 2442 portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP); 2443 bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs); 2444 portp->num_ldcs = num_ldcs; 2445 2446 /* read vlan id properties of this port node */ 2447 vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid, 2448 &portp->vids, &portp->nvids, NULL); 2449 2450 kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); 2451 2452 return (DDI_SUCCESS); 2453 } 2454 2455 /* remove a port from the device */ 2456 static int 2457 vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) 2458 { 2459 uint64_t port_num; 2460 vgen_port_t *portp; 2461 vgen_portlist_t *plistp; 2462 2463 /* read "id" property to get the port number */ 2464 if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) { 2465 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2466 return (DDI_FAILURE); 2467 } 2468 2469 plistp = &(vgenp->vgenports); 2470 2471 WRITE_ENTER(&plistp->rwlock); 2472 portp = vgen_port_lookup(plistp, (int)port_num); 2473 if (portp == NULL) { 2474 DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num); 2475 RW_EXIT(&plistp->rwlock); 2476 return (DDI_FAILURE); 2477 } 2478 2479 vgen_port_detach_mdeg(portp); 2480 RW_EXIT(&plistp->rwlock); 2481 2482 return (DDI_SUCCESS); 2483 } 2484 2485 /* attach a port to the device based on mdeg data */ 2486 static int 2487 vgen_port_attach(vgen_port_t *portp) 2488 { 2489 int i; 2490 vgen_portlist_t *plistp; 2491 vgen_t *vgenp; 2492 uint64_t *ldcids; 2493 uint32_t num_ldcs; 2494 mac_register_t *macp; 2495 vio_net_res_type_t type; 2496 int rv; 2497 2498 ASSERT(portp != NULL); 2499 2500 vgenp = portp->vgenp; 2501 ldcids = portp->ldc_ids; 2502 num_ldcs = portp->num_ldcs; 2503 2504 DBG1(vgenp, NULL, "port_num(%d)\n", portp->port_num); 2505 2506 mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL); 2507 rw_init(&portp->ldclist.rwlock, NULL, RW_DRIVER, NULL); 2508 portp->ldclist.headp = NULL; 2509 2510 for (i = 0; i < num_ldcs; i++) { 2511 DBG2(vgenp, NULL, "ldcid (%lx)\n", ldcids[i]); 2512 if (vgen_ldc_attach(portp, ldcids[i]) == DDI_FAILURE) { 2513 vgen_port_detach(portp); 2514 return (DDI_FAILURE); 2515 } 2516 } 2517 2518 /* create vlan id hash table */ 2519 vgen_vlan_create_hash(portp); 2520 2521 if (portp == vgenp->vsw_portp) { 2522 /* This port is connected to the switch port */ 2523 vgenp->vsw_portp = portp; 2524 (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE); 2525 type = VIO_NET_RES_LDC_SERVICE; 2526 } else { 2527 (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE); 2528 type = VIO_NET_RES_LDC_GUEST; 2529 } 2530 2531 if ((macp = mac_alloc(MAC_VERSION)) == NULL) { 2532 vgen_port_detach(portp); 2533 return (DDI_FAILURE); 2534 } 2535 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 2536 macp->m_driver = portp; 2537 macp->m_dip = vgenp->vnetdip; 2538 macp->m_src_addr = (uint8_t *)&(vgenp->macaddr); 2539 macp->m_callbacks = &vgen_m_callbacks; 2540 macp->m_min_sdu = 0; 2541 macp->m_max_sdu = ETHERMTU; 2542 2543 mutex_enter(&portp->lock); 2544 rv = vio_net_resource_reg(macp, type, vgenp->macaddr, 2545 portp->macaddr, &portp->vhp, &portp->vcb); 2546 mutex_exit(&portp->lock); 2547 mac_free(macp); 2548 2549 if (rv == 0) { 2550 /* link it into the list of ports */ 2551 plistp = &(vgenp->vgenports); 2552 WRITE_ENTER(&plistp->rwlock); 2553 vgen_port_list_insert(portp); 2554 RW_EXIT(&plistp->rwlock); 2555 } else { 2556 DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p", 2557 portp); 2558 vgen_port_detach(portp); 2559 } 2560 2561 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num); 2562 return (DDI_SUCCESS); 2563 } 2564 2565 /* detach a port from the device based on mdeg data */ 2566 static void 2567 vgen_port_detach_mdeg(vgen_port_t *portp) 2568 { 2569 vgen_t *vgenp = portp->vgenp; 2570 2571 DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num); 2572 2573 mutex_enter(&portp->lock); 2574 2575 /* stop the port if needed */ 2576 if (portp->flags & VGEN_STARTED) { 2577 vgen_port_uninit(portp); 2578 } 2579 2580 mutex_exit(&portp->lock); 2581 vgen_port_detach(portp); 2582 2583 DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num); 2584 } 2585 2586 static int 2587 vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2588 md_t *prev_mdp, mde_cookie_t prev_mdex) 2589 { 2590 uint64_t cport_num; 2591 uint64_t pport_num; 2592 vgen_portlist_t *plistp; 2593 vgen_port_t *portp; 2594 boolean_t updated_vlans = B_FALSE; 2595 uint16_t pvid; 2596 uint16_t *vids; 2597 uint16_t nvids; 2598 2599 /* 2600 * For now, we get port updates only if vlan ids changed. 2601 * We read the port num and do some sanity check. 2602 */ 2603 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2604 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2605 return (DDI_FAILURE); 2606 } 2607 2608 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2609 DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); 2610 return (DDI_FAILURE); 2611 } 2612 if (cport_num != pport_num) 2613 return (DDI_FAILURE); 2614 2615 plistp = &(vgenp->vgenports); 2616 2617 READ_ENTER(&plistp->rwlock); 2618 2619 portp = vgen_port_lookup(plistp, (int)cport_num); 2620 if (portp == NULL) { 2621 DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num); 2622 RW_EXIT(&plistp->rwlock); 2623 return (DDI_FAILURE); 2624 } 2625 2626 /* Read the vlan ids */ 2627 vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids, 2628 &nvids, NULL); 2629 2630 /* Determine if there are any vlan id updates */ 2631 if ((pvid != portp->pvid) || /* pvid changed? */ 2632 (nvids != portp->nvids) || /* # of vids changed? */ 2633 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2634 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2635 updated_vlans = B_TRUE; 2636 } 2637 2638 if (updated_vlans == B_FALSE) { 2639 RW_EXIT(&plistp->rwlock); 2640 return (DDI_FAILURE); 2641 } 2642 2643 /* remove the port from vlans it has been assigned to */ 2644 vgen_vlan_remove_ids(portp); 2645 2646 /* save the new vlan ids */ 2647 portp->pvid = pvid; 2648 if (portp->nvids != 0) { 2649 kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids); 2650 portp->nvids = 0; 2651 } 2652 if (nvids != 0) { 2653 portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP); 2654 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2655 portp->nvids = nvids; 2656 kmem_free(vids, sizeof (uint16_t) * nvids); 2657 } 2658 2659 /* add port to the new vlans */ 2660 vgen_vlan_add_ids(portp); 2661 2662 /* reset the port if it is vlan unaware (ver < 1.3) */ 2663 vgen_vlan_unaware_port_reset(portp); 2664 2665 RW_EXIT(&plistp->rwlock); 2666 2667 return (DDI_SUCCESS); 2668 } 2669 2670 static uint64_t 2671 vgen_port_stat(vgen_port_t *portp, uint_t stat) 2672 { 2673 vgen_ldclist_t *ldclp; 2674 vgen_ldc_t *ldcp; 2675 uint64_t val; 2676 2677 val = 0; 2678 ldclp = &portp->ldclist; 2679 2680 READ_ENTER(&ldclp->rwlock); 2681 for (ldcp = ldclp->headp; ldcp != NULL; ldcp = ldcp->nextp) { 2682 val += vgen_ldc_stat(ldcp, stat); 2683 } 2684 RW_EXIT(&ldclp->rwlock); 2685 2686 return (val); 2687 } 2688 2689 /* allocate receive resources */ 2690 static int 2691 vgen_init_multipools(vgen_ldc_t *ldcp) 2692 { 2693 size_t data_sz; 2694 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 2695 int status; 2696 uint32_t sz1 = 0; 2697 uint32_t sz2 = 0; 2698 uint32_t sz3 = 0; 2699 uint32_t sz4 = 0; 2700 2701 /* 2702 * We round up the mtu specified to be a multiple of 2K. 2703 * We then create rx pools based on the rounded up size. 2704 */ 2705 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 2706 data_sz = VNET_ROUNDUP_2K(data_sz); 2707 2708 /* 2709 * If pool sizes are specified, use them. Note that the presence of 2710 * the first tunable will be used as a hint. 2711 */ 2712 if (vgen_rbufsz1 != 0) { 2713 2714 sz1 = vgen_rbufsz1; 2715 sz2 = vgen_rbufsz2; 2716 sz3 = vgen_rbufsz3; 2717 sz4 = vgen_rbufsz4; 2718 2719 if (sz4 == 0) { /* need 3 pools */ 2720 2721 ldcp->max_rxpool_size = sz3; 2722 status = vio_init_multipools(&ldcp->vmp, 2723 VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1, 2724 vgen_nrbufs2, vgen_nrbufs3); 2725 2726 } else { 2727 2728 ldcp->max_rxpool_size = sz4; 2729 status = vio_init_multipools(&ldcp->vmp, 2730 VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, 2731 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, 2732 vgen_nrbufs4); 2733 } 2734 return (status); 2735 } 2736 2737 /* 2738 * Pool sizes are not specified. We select the pool sizes based on the 2739 * mtu if vnet_jumbo_rxpools is enabled. 2740 */ 2741 if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { 2742 /* 2743 * Receive buffer pool allocation based on mtu is disabled. 2744 * Use the default mechanism of standard size pool allocation. 2745 */ 2746 sz1 = VGEN_DBLK_SZ_128; 2747 sz2 = VGEN_DBLK_SZ_256; 2748 sz3 = VGEN_DBLK_SZ_2048; 2749 ldcp->max_rxpool_size = sz3; 2750 2751 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS, 2752 sz1, sz2, sz3, 2753 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3); 2754 2755 return (status); 2756 } 2757 2758 switch (data_sz) { 2759 2760 case VNET_4K: 2761 2762 sz1 = VGEN_DBLK_SZ_128; 2763 sz2 = VGEN_DBLK_SZ_256; 2764 sz3 = VGEN_DBLK_SZ_2048; 2765 sz4 = sz3 << 1; /* 4K */ 2766 ldcp->max_rxpool_size = sz4; 2767 2768 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, 2769 sz1, sz2, sz3, sz4, 2770 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); 2771 break; 2772 2773 default: /* data_sz: 4K+ to 16K */ 2774 2775 sz1 = VGEN_DBLK_SZ_256; 2776 sz2 = VGEN_DBLK_SZ_2048; 2777 sz3 = data_sz >> 1; /* Jumbo-size/2 */ 2778 sz4 = data_sz; /* Jumbo-size */ 2779 ldcp->max_rxpool_size = sz4; 2780 2781 status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, 2782 sz1, sz2, sz3, sz4, 2783 vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); 2784 break; 2785 2786 } 2787 2788 return (status); 2789 } 2790 2791 /* attach the channel corresponding to the given ldc_id to the port */ 2792 static int 2793 vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id) 2794 { 2795 vgen_t *vgenp; 2796 vgen_ldclist_t *ldclp; 2797 vgen_ldc_t *ldcp, **prev_ldcp; 2798 ldc_attr_t attr; 2799 int status; 2800 ldc_status_t istatus; 2801 char kname[MAXNAMELEN]; 2802 int instance; 2803 enum {AST_init = 0x0, AST_ldc_alloc = 0x1, 2804 AST_mutex_init = 0x2, AST_ldc_init = 0x4, 2805 AST_ldc_reg_cb = 0x8, AST_alloc_tx_ring = 0x10, 2806 AST_create_rxmblks = 0x20, 2807 AST_create_rcv_thread = 0x40} attach_state; 2808 2809 attach_state = AST_init; 2810 vgenp = portp->vgenp; 2811 ldclp = &portp->ldclist; 2812 2813 ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP); 2814 if (ldcp == NULL) { 2815 goto ldc_attach_failed; 2816 } 2817 ldcp->ldc_id = ldc_id; 2818 ldcp->portp = portp; 2819 2820 attach_state |= AST_ldc_alloc; 2821 2822 mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL); 2823 mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL); 2824 mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL); 2825 mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL); 2826 mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL); 2827 2828 attach_state |= AST_mutex_init; 2829 2830 attr.devclass = LDC_DEV_NT; 2831 attr.instance = vgenp->instance; 2832 attr.mode = LDC_MODE_UNRELIABLE; 2833 attr.mtu = vnet_ldc_mtu; 2834 status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 2835 if (status != 0) { 2836 DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status); 2837 goto ldc_attach_failed; 2838 } 2839 attach_state |= AST_ldc_init; 2840 2841 if (vgen_rcv_thread_enabled) { 2842 ldcp->rcv_thr_flags = 0; 2843 2844 mutex_init(&ldcp->rcv_thr_lock, NULL, MUTEX_DRIVER, NULL); 2845 cv_init(&ldcp->rcv_thr_cv, NULL, CV_DRIVER, NULL); 2846 ldcp->rcv_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 2847 vgen_ldc_rcv_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); 2848 2849 attach_state |= AST_create_rcv_thread; 2850 if (ldcp->rcv_thread == NULL) { 2851 DWARN(vgenp, ldcp, "Failed to create worker thread"); 2852 goto ldc_attach_failed; 2853 } 2854 } 2855 2856 status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp); 2857 if (status != 0) { 2858 DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n", 2859 status); 2860 goto ldc_attach_failed; 2861 } 2862 /* 2863 * allocate a message for ldc_read()s, big enough to hold ctrl and 2864 * data msgs, including raw data msgs used to recv priority frames. 2865 */ 2866 ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size; 2867 ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); 2868 attach_state |= AST_ldc_reg_cb; 2869 2870 (void) ldc_status(ldcp->ldc_handle, &istatus); 2871 ASSERT(istatus == LDC_INIT); 2872 ldcp->ldc_status = istatus; 2873 2874 /* allocate transmit resources */ 2875 status = vgen_alloc_tx_ring(ldcp); 2876 if (status != 0) { 2877 goto ldc_attach_failed; 2878 } 2879 attach_state |= AST_alloc_tx_ring; 2880 2881 /* allocate receive resources */ 2882 status = vgen_init_multipools(ldcp); 2883 if (status != 0) { 2884 /* 2885 * We do not return failure if receive mblk pools can't be 2886 * allocated; instead allocb(9F) will be used to dynamically 2887 * allocate buffers during receive. 2888 */ 2889 DWARN(vgenp, ldcp, 2890 "vnet%d: status(%d), failed to allocate rx mblk pools for " 2891 "channel(0x%lx)\n", 2892 vgenp->instance, status, ldcp->ldc_id); 2893 } else { 2894 attach_state |= AST_create_rxmblks; 2895 } 2896 2897 /* Setup kstats for the channel */ 2898 instance = vgenp->instance; 2899 (void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id); 2900 ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats); 2901 if (ldcp->ksp == NULL) { 2902 goto ldc_attach_failed; 2903 } 2904 2905 /* initialize vgen_versions supported */ 2906 bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions)); 2907 vgen_reset_vnet_proto_ops(ldcp); 2908 2909 /* link it into the list of channels for this port */ 2910 WRITE_ENTER(&ldclp->rwlock); 2911 prev_ldcp = (vgen_ldc_t **)(&ldclp->headp); 2912 ldcp->nextp = *prev_ldcp; 2913 *prev_ldcp = ldcp; 2914 RW_EXIT(&ldclp->rwlock); 2915 2916 ldcp->flags |= CHANNEL_ATTACHED; 2917 return (DDI_SUCCESS); 2918 2919 ldc_attach_failed: 2920 if (attach_state & AST_ldc_reg_cb) { 2921 (void) ldc_unreg_callback(ldcp->ldc_handle); 2922 kmem_free(ldcp->ldcmsg, ldcp->msglen); 2923 } 2924 if (attach_state & AST_create_rcv_thread) { 2925 if (ldcp->rcv_thread != NULL) { 2926 vgen_stop_rcv_thread(ldcp); 2927 } 2928 mutex_destroy(&ldcp->rcv_thr_lock); 2929 cv_destroy(&ldcp->rcv_thr_cv); 2930 } 2931 if (attach_state & AST_create_rxmblks) { 2932 vio_mblk_pool_t *fvmp = NULL; 2933 vio_destroy_multipools(&ldcp->vmp, &fvmp); 2934 ASSERT(fvmp == NULL); 2935 } 2936 if (attach_state & AST_alloc_tx_ring) { 2937 vgen_free_tx_ring(ldcp); 2938 } 2939 if (attach_state & AST_ldc_init) { 2940 (void) ldc_fini(ldcp->ldc_handle); 2941 } 2942 if (attach_state & AST_mutex_init) { 2943 mutex_destroy(&ldcp->tclock); 2944 mutex_destroy(&ldcp->txlock); 2945 mutex_destroy(&ldcp->cblock); 2946 mutex_destroy(&ldcp->wrlock); 2947 mutex_destroy(&ldcp->rxlock); 2948 } 2949 if (attach_state & AST_ldc_alloc) { 2950 KMEM_FREE(ldcp); 2951 } 2952 return (DDI_FAILURE); 2953 } 2954 2955 /* detach a channel from the port */ 2956 static void 2957 vgen_ldc_detach(vgen_ldc_t *ldcp) 2958 { 2959 vgen_port_t *portp; 2960 vgen_t *vgenp; 2961 vgen_ldc_t *pldcp; 2962 vgen_ldc_t **prev_ldcp; 2963 vgen_ldclist_t *ldclp; 2964 2965 portp = ldcp->portp; 2966 vgenp = portp->vgenp; 2967 ldclp = &portp->ldclist; 2968 2969 prev_ldcp = (vgen_ldc_t **)&ldclp->headp; 2970 for (; (pldcp = *prev_ldcp) != NULL; prev_ldcp = &pldcp->nextp) { 2971 if (pldcp == ldcp) { 2972 break; 2973 } 2974 } 2975 2976 if (pldcp == NULL) { 2977 /* invalid ldcp? */ 2978 return; 2979 } 2980 2981 if (ldcp->ldc_status != LDC_INIT) { 2982 DWARN(vgenp, ldcp, "ldc_status is not INIT\n"); 2983 } 2984 2985 if (ldcp->flags & CHANNEL_ATTACHED) { 2986 ldcp->flags &= ~(CHANNEL_ATTACHED); 2987 2988 (void) ldc_unreg_callback(ldcp->ldc_handle); 2989 if (ldcp->rcv_thread != NULL) { 2990 /* First stop the receive thread */ 2991 vgen_stop_rcv_thread(ldcp); 2992 mutex_destroy(&ldcp->rcv_thr_lock); 2993 cv_destroy(&ldcp->rcv_thr_cv); 2994 } 2995 kmem_free(ldcp->ldcmsg, ldcp->msglen); 2996 2997 vgen_destroy_kstats(ldcp->ksp); 2998 ldcp->ksp = NULL; 2999 3000 /* 3001 * if we cannot reclaim all mblks, put this 3002 * on the list of pools(vgenp->rmp) to be reclaimed when the 3003 * device gets detached (see vgen_uninit()). 3004 */ 3005 vio_destroy_multipools(&ldcp->vmp, &vgenp->rmp); 3006 3007 /* free transmit resources */ 3008 vgen_free_tx_ring(ldcp); 3009 3010 (void) ldc_fini(ldcp->ldc_handle); 3011 mutex_destroy(&ldcp->tclock); 3012 mutex_destroy(&ldcp->txlock); 3013 mutex_destroy(&ldcp->cblock); 3014 mutex_destroy(&ldcp->wrlock); 3015 mutex_destroy(&ldcp->rxlock); 3016 3017 /* unlink it from the list */ 3018 *prev_ldcp = ldcp->nextp; 3019 KMEM_FREE(ldcp); 3020 } 3021 } 3022 3023 /* 3024 * This function allocates transmit resources for the channel. 3025 * The resources consist of a transmit descriptor ring and an associated 3026 * transmit buffer ring. 3027 */ 3028 static int 3029 vgen_alloc_tx_ring(vgen_ldc_t *ldcp) 3030 { 3031 void *tbufp; 3032 ldc_mem_info_t minfo; 3033 uint32_t txdsize; 3034 uint32_t tbufsize; 3035 int status; 3036 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3037 3038 ldcp->num_txds = vnet_ntxds; 3039 txdsize = sizeof (vnet_public_desc_t); 3040 tbufsize = sizeof (vgen_private_desc_t); 3041 3042 /* allocate transmit buffer ring */ 3043 tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP); 3044 if (tbufp == NULL) { 3045 return (DDI_FAILURE); 3046 } 3047 3048 /* create transmit descriptor ring */ 3049 status = ldc_mem_dring_create(ldcp->num_txds, txdsize, 3050 &ldcp->tx_dhandle); 3051 if (status) { 3052 DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); 3053 kmem_free(tbufp, ldcp->num_txds * tbufsize); 3054 return (DDI_FAILURE); 3055 } 3056 3057 /* get the addr of descripror ring */ 3058 status = ldc_mem_dring_info(ldcp->tx_dhandle, &minfo); 3059 if (status) { 3060 DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); 3061 kmem_free(tbufp, ldcp->num_txds * tbufsize); 3062 (void) ldc_mem_dring_destroy(ldcp->tx_dhandle); 3063 ldcp->tbufp = NULL; 3064 return (DDI_FAILURE); 3065 } 3066 ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr); 3067 ldcp->tbufp = tbufp; 3068 3069 ldcp->txdendp = &((ldcp->txdp)[ldcp->num_txds]); 3070 ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]); 3071 3072 return (DDI_SUCCESS); 3073 } 3074 3075 /* Free transmit resources for the channel */ 3076 static void 3077 vgen_free_tx_ring(vgen_ldc_t *ldcp) 3078 { 3079 int tbufsize = sizeof (vgen_private_desc_t); 3080 3081 /* free transmit descriptor ring */ 3082 (void) ldc_mem_dring_destroy(ldcp->tx_dhandle); 3083 3084 /* free transmit buffer ring */ 3085 kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize); 3086 ldcp->txdp = ldcp->txdendp = NULL; 3087 ldcp->tbufp = ldcp->tbufendp = NULL; 3088 } 3089 3090 /* enable transmit/receive on the channels for the port */ 3091 static void 3092 vgen_init_ldcs(vgen_port_t *portp) 3093 { 3094 vgen_ldclist_t *ldclp = &portp->ldclist; 3095 vgen_ldc_t *ldcp; 3096 3097 READ_ENTER(&ldclp->rwlock); 3098 ldcp = ldclp->headp; 3099 for (; ldcp != NULL; ldcp = ldcp->nextp) { 3100 (void) vgen_ldc_init(ldcp); 3101 } 3102 RW_EXIT(&ldclp->rwlock); 3103 } 3104 3105 /* stop transmit/receive on the channels for the port */ 3106 static void 3107 vgen_uninit_ldcs(vgen_port_t *portp) 3108 { 3109 vgen_ldclist_t *ldclp = &portp->ldclist; 3110 vgen_ldc_t *ldcp; 3111 3112 READ_ENTER(&ldclp->rwlock); 3113 ldcp = ldclp->headp; 3114 for (; ldcp != NULL; ldcp = ldcp->nextp) { 3115 vgen_ldc_uninit(ldcp); 3116 } 3117 RW_EXIT(&ldclp->rwlock); 3118 } 3119 3120 /* enable transmit/receive on the channel */ 3121 static int 3122 vgen_ldc_init(vgen_ldc_t *ldcp) 3123 { 3124 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3125 ldc_status_t istatus; 3126 int rv; 3127 uint32_t retries = 0; 3128 enum { ST_init = 0x0, ST_ldc_open = 0x1, 3129 ST_init_tbufs = 0x2, ST_cb_enable = 0x4} init_state; 3130 init_state = ST_init; 3131 3132 DBG1(vgenp, ldcp, "enter\n"); 3133 LDC_LOCK(ldcp); 3134 3135 rv = ldc_open(ldcp->ldc_handle); 3136 if (rv != 0) { 3137 DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv); 3138 goto ldcinit_failed; 3139 } 3140 init_state |= ST_ldc_open; 3141 3142 (void) ldc_status(ldcp->ldc_handle, &istatus); 3143 if (istatus != LDC_OPEN && istatus != LDC_READY) { 3144 DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus); 3145 goto ldcinit_failed; 3146 } 3147 ldcp->ldc_status = istatus; 3148 3149 rv = vgen_init_tbufs(ldcp); 3150 if (rv != 0) { 3151 DWARN(vgenp, ldcp, "vgen_init_tbufs() failed\n"); 3152 goto ldcinit_failed; 3153 } 3154 init_state |= ST_init_tbufs; 3155 3156 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE); 3157 if (rv != 0) { 3158 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv); 3159 goto ldcinit_failed; 3160 } 3161 3162 init_state |= ST_cb_enable; 3163 3164 do { 3165 rv = ldc_up(ldcp->ldc_handle); 3166 if ((rv != 0) && (rv == EWOULDBLOCK)) { 3167 DBG2(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 3168 drv_usecwait(VGEN_LDC_UP_DELAY); 3169 } 3170 if (retries++ >= vgen_ldcup_retries) 3171 break; 3172 } while (rv == EWOULDBLOCK); 3173 3174 (void) ldc_status(ldcp->ldc_handle, &istatus); 3175 if (istatus == LDC_UP) { 3176 DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus); 3177 } 3178 3179 ldcp->ldc_status = istatus; 3180 3181 /* initialize transmit watchdog timeout */ 3182 ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp, 3183 drv_usectohz(vnet_ldcwd_interval * 1000)); 3184 3185 ldcp->hphase = -1; 3186 ldcp->flags |= CHANNEL_STARTED; 3187 3188 /* if channel is already UP - start handshake */ 3189 if (istatus == LDC_UP) { 3190 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3191 if (ldcp->portp != vgenp->vsw_portp) { 3192 /* 3193 * As the channel is up, use this port from now on. 3194 */ 3195 (void) atomic_swap_32( 3196 &ldcp->portp->use_vsw_port, B_FALSE); 3197 } 3198 3199 /* Initialize local session id */ 3200 ldcp->local_sid = ddi_get_lbolt(); 3201 3202 /* clear peer session id */ 3203 ldcp->peer_sid = 0; 3204 ldcp->hretries = 0; 3205 3206 /* Initiate Handshake process with peer ldc endpoint */ 3207 vgen_reset_hphase(ldcp); 3208 3209 mutex_exit(&ldcp->tclock); 3210 mutex_exit(&ldcp->txlock); 3211 mutex_exit(&ldcp->wrlock); 3212 mutex_exit(&ldcp->rxlock); 3213 vgen_handshake(vh_nextphase(ldcp)); 3214 mutex_exit(&ldcp->cblock); 3215 } else { 3216 LDC_UNLOCK(ldcp); 3217 } 3218 3219 return (DDI_SUCCESS); 3220 3221 ldcinit_failed: 3222 if (init_state & ST_cb_enable) { 3223 (void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3224 } 3225 if (init_state & ST_init_tbufs) { 3226 vgen_uninit_tbufs(ldcp); 3227 } 3228 if (init_state & ST_ldc_open) { 3229 (void) ldc_close(ldcp->ldc_handle); 3230 } 3231 LDC_UNLOCK(ldcp); 3232 DBG1(vgenp, ldcp, "exit\n"); 3233 return (DDI_FAILURE); 3234 } 3235 3236 /* stop transmit/receive on the channel */ 3237 static void 3238 vgen_ldc_uninit(vgen_ldc_t *ldcp) 3239 { 3240 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3241 int rv; 3242 uint_t retries = 0; 3243 3244 DBG1(vgenp, ldcp, "enter\n"); 3245 LDC_LOCK(ldcp); 3246 3247 if ((ldcp->flags & CHANNEL_STARTED) == 0) { 3248 LDC_UNLOCK(ldcp); 3249 DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n"); 3250 return; 3251 } 3252 3253 /* disable further callbacks */ 3254 rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 3255 if (rv != 0) { 3256 DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n"); 3257 } 3258 3259 if (vgenp->vsw_portp == ldcp->portp) { 3260 vio_net_report_err_t rep_err = 3261 ldcp->portp->vcb.vio_net_report_err; 3262 rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN); 3263 } 3264 3265 /* 3266 * clear handshake done bit and wait for pending tx and cb to finish. 3267 * release locks before untimeout(9F) is invoked to cancel timeouts. 3268 */ 3269 ldcp->hphase &= ~(VH_DONE); 3270 LDC_UNLOCK(ldcp); 3271 3272 /* cancel handshake watchdog timeout */ 3273 if (ldcp->htid) { 3274 (void) untimeout(ldcp->htid); 3275 ldcp->htid = 0; 3276 } 3277 3278 if (ldcp->cancel_htid) { 3279 (void) untimeout(ldcp->cancel_htid); 3280 ldcp->cancel_htid = 0; 3281 } 3282 3283 /* cancel transmit watchdog timeout */ 3284 if (ldcp->wd_tid) { 3285 (void) untimeout(ldcp->wd_tid); 3286 ldcp->wd_tid = 0; 3287 } 3288 3289 drv_usecwait(1000); 3290 3291 if (ldcp->rcv_thread != NULL) { 3292 /* 3293 * Note that callbacks have been disabled already(above). The 3294 * drain function takes care of the condition when an already 3295 * executing callback signals the worker to start processing or 3296 * the worker has already been signalled and is in the middle of 3297 * processing. 3298 */ 3299 vgen_drain_rcv_thread(ldcp); 3300 } 3301 3302 /* acquire locks again; any pending transmits and callbacks are done */ 3303 LDC_LOCK(ldcp); 3304 3305 vgen_reset_hphase(ldcp); 3306 3307 vgen_uninit_tbufs(ldcp); 3308 3309 /* close the channel - retry on EAGAIN */ 3310 while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { 3311 if (++retries > vgen_ldccl_retries) { 3312 break; 3313 } 3314 drv_usecwait(VGEN_LDC_CLOSE_DELAY); 3315 } 3316 if (rv != 0) { 3317 cmn_err(CE_NOTE, 3318 "!vnet%d: Error(%d) closing the channel(0x%lx)\n", 3319 vgenp->instance, rv, ldcp->ldc_id); 3320 } 3321 3322 ldcp->ldc_status = LDC_INIT; 3323 ldcp->flags &= ~(CHANNEL_STARTED); 3324 3325 LDC_UNLOCK(ldcp); 3326 3327 DBG1(vgenp, ldcp, "exit\n"); 3328 } 3329 3330 /* Initialize the transmit buffer ring for the channel */ 3331 static int 3332 vgen_init_tbufs(vgen_ldc_t *ldcp) 3333 { 3334 vgen_private_desc_t *tbufp; 3335 vnet_public_desc_t *txdp; 3336 vio_dring_entry_hdr_t *hdrp; 3337 int i; 3338 int rv; 3339 caddr_t datap = NULL; 3340 int ci; 3341 uint32_t ncookies; 3342 size_t data_sz; 3343 vgen_t *vgenp; 3344 3345 vgenp = LDC_TO_VGEN(ldcp); 3346 3347 bzero(ldcp->tbufp, sizeof (*tbufp) * (ldcp->num_txds)); 3348 bzero(ldcp->txdp, sizeof (*txdp) * (ldcp->num_txds)); 3349 3350 /* 3351 * In order to ensure that the number of ldc cookies per descriptor is 3352 * limited to be within the default MAX_COOKIES (2), we take the steps 3353 * outlined below: 3354 * 3355 * Align the entire data buffer area to 8K and carve out per descriptor 3356 * data buffers starting from this 8K aligned base address. 3357 * 3358 * We round up the mtu specified to be a multiple of 2K or 4K. 3359 * For sizes up to 12K we round up the size to the next 2K. 3360 * For sizes > 12K we round up to the next 4K (otherwise sizes such as 3361 * 14K could end up needing 3 cookies, with the buffer spread across 3362 * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). 3363 */ 3364 data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; 3365 if (data_sz <= VNET_12K) { 3366 data_sz = VNET_ROUNDUP_2K(data_sz); 3367 } else { 3368 data_sz = VNET_ROUNDUP_4K(data_sz); 3369 } 3370 3371 /* allocate extra 8K bytes for alignment */ 3372 ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K; 3373 datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP); 3374 ldcp->tx_datap = datap; 3375 3376 3377 /* align the starting address of the data area to 8K */ 3378 datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap); 3379 3380 /* 3381 * for each private descriptor, allocate a ldc mem_handle which is 3382 * required to map the data during transmit, set the flags 3383 * to free (available for use by transmit routine). 3384 */ 3385 3386 for (i = 0; i < ldcp->num_txds; i++) { 3387 3388 tbufp = &(ldcp->tbufp[i]); 3389 rv = ldc_mem_alloc_handle(ldcp->ldc_handle, 3390 &(tbufp->memhandle)); 3391 if (rv) { 3392 tbufp->memhandle = 0; 3393 goto init_tbufs_failed; 3394 } 3395 3396 /* 3397 * bind ldc memhandle to the corresponding transmit buffer. 3398 */ 3399 ci = ncookies = 0; 3400 rv = ldc_mem_bind_handle(tbufp->memhandle, 3401 (caddr_t)datap, data_sz, LDC_SHADOW_MAP, 3402 LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies); 3403 if (rv != 0) { 3404 goto init_tbufs_failed; 3405 } 3406 3407 /* 3408 * successful in binding the handle to tx data buffer. 3409 * set datap in the private descr to this buffer. 3410 */ 3411 tbufp->datap = datap; 3412 3413 if ((ncookies == 0) || 3414 (ncookies > MAX_COOKIES)) { 3415 goto init_tbufs_failed; 3416 } 3417 3418 for (ci = 1; ci < ncookies; ci++) { 3419 rv = ldc_mem_nextcookie(tbufp->memhandle, 3420 &(tbufp->memcookie[ci])); 3421 if (rv != 0) { 3422 goto init_tbufs_failed; 3423 } 3424 } 3425 3426 tbufp->ncookies = ncookies; 3427 datap += data_sz; 3428 3429 tbufp->flags = VGEN_PRIV_DESC_FREE; 3430 txdp = &(ldcp->txdp[i]); 3431 hdrp = &txdp->hdr; 3432 hdrp->dstate = VIO_DESC_FREE; 3433 hdrp->ack = B_FALSE; 3434 tbufp->descp = txdp; 3435 3436 } 3437 3438 /* reset tbuf walking pointers */ 3439 ldcp->next_tbufp = ldcp->tbufp; 3440 ldcp->cur_tbufp = ldcp->tbufp; 3441 3442 /* initialize tx seqnum and index */ 3443 ldcp->next_txseq = VNET_ISS; 3444 ldcp->next_txi = 0; 3445 3446 ldcp->resched_peer = B_TRUE; 3447 ldcp->resched_peer_txi = 0; 3448 3449 return (DDI_SUCCESS); 3450 3451 init_tbufs_failed:; 3452 vgen_uninit_tbufs(ldcp); 3453 return (DDI_FAILURE); 3454 } 3455 3456 /* Uninitialize transmit buffer ring for the channel */ 3457 static void 3458 vgen_uninit_tbufs(vgen_ldc_t *ldcp) 3459 { 3460 vgen_private_desc_t *tbufp = ldcp->tbufp; 3461 int i; 3462 3463 /* for each tbuf (priv_desc), free ldc mem_handle */ 3464 for (i = 0; i < ldcp->num_txds; i++) { 3465 3466 tbufp = &(ldcp->tbufp[i]); 3467 3468 if (tbufp->datap) { /* if bound to a ldc memhandle */ 3469 (void) ldc_mem_unbind_handle(tbufp->memhandle); 3470 tbufp->datap = NULL; 3471 } 3472 if (tbufp->memhandle) { 3473 (void) ldc_mem_free_handle(tbufp->memhandle); 3474 tbufp->memhandle = 0; 3475 } 3476 } 3477 3478 if (ldcp->tx_datap) { 3479 /* prealloc'd tx data buffer */ 3480 kmem_free(ldcp->tx_datap, ldcp->tx_data_sz); 3481 ldcp->tx_datap = NULL; 3482 ldcp->tx_data_sz = 0; 3483 } 3484 3485 bzero(ldcp->tbufp, sizeof (vgen_private_desc_t) * (ldcp->num_txds)); 3486 bzero(ldcp->txdp, sizeof (vnet_public_desc_t) * (ldcp->num_txds)); 3487 } 3488 3489 /* clobber tx descriptor ring */ 3490 static void 3491 vgen_clobber_tbufs(vgen_ldc_t *ldcp) 3492 { 3493 vnet_public_desc_t *txdp; 3494 vgen_private_desc_t *tbufp; 3495 vio_dring_entry_hdr_t *hdrp; 3496 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3497 int i; 3498 #ifdef DEBUG 3499 int ndone = 0; 3500 #endif 3501 3502 for (i = 0; i < ldcp->num_txds; i++) { 3503 3504 tbufp = &(ldcp->tbufp[i]); 3505 txdp = tbufp->descp; 3506 hdrp = &txdp->hdr; 3507 3508 if (tbufp->flags & VGEN_PRIV_DESC_BUSY) { 3509 tbufp->flags = VGEN_PRIV_DESC_FREE; 3510 #ifdef DEBUG 3511 if (hdrp->dstate == VIO_DESC_DONE) 3512 ndone++; 3513 #endif 3514 hdrp->dstate = VIO_DESC_FREE; 3515 hdrp->ack = B_FALSE; 3516 } 3517 } 3518 /* reset tbuf walking pointers */ 3519 ldcp->next_tbufp = ldcp->tbufp; 3520 ldcp->cur_tbufp = ldcp->tbufp; 3521 3522 /* reset tx seqnum and index */ 3523 ldcp->next_txseq = VNET_ISS; 3524 ldcp->next_txi = 0; 3525 3526 ldcp->resched_peer = B_TRUE; 3527 ldcp->resched_peer_txi = 0; 3528 3529 DBG2(vgenp, ldcp, "num descrs done (%d)\n", ndone); 3530 } 3531 3532 /* clobber receive descriptor ring */ 3533 static void 3534 vgen_clobber_rxds(vgen_ldc_t *ldcp) 3535 { 3536 ldcp->rx_dhandle = 0; 3537 bzero(&ldcp->rx_dcookie, sizeof (ldcp->rx_dcookie)); 3538 ldcp->rxdp = NULL; 3539 ldcp->next_rxi = 0; 3540 ldcp->num_rxds = 0; 3541 ldcp->next_rxseq = VNET_ISS; 3542 } 3543 3544 /* initialize receive descriptor ring */ 3545 static int 3546 vgen_init_rxds(vgen_ldc_t *ldcp, uint32_t num_desc, uint32_t desc_size, 3547 ldc_mem_cookie_t *dcookie, uint32_t ncookies) 3548 { 3549 int rv; 3550 ldc_mem_info_t minfo; 3551 3552 rv = ldc_mem_dring_map(ldcp->ldc_handle, dcookie, ncookies, num_desc, 3553 desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dhandle)); 3554 if (rv != 0) { 3555 return (DDI_FAILURE); 3556 } 3557 3558 /* 3559 * sucessfully mapped, now try to 3560 * get info about the mapped dring 3561 */ 3562 rv = ldc_mem_dring_info(ldcp->rx_dhandle, &minfo); 3563 if (rv != 0) { 3564 (void) ldc_mem_dring_unmap(ldcp->rx_dhandle); 3565 return (DDI_FAILURE); 3566 } 3567 3568 /* 3569 * save ring address, number of descriptors. 3570 */ 3571 ldcp->rxdp = (vnet_public_desc_t *)(minfo.vaddr); 3572 bcopy(dcookie, &(ldcp->rx_dcookie), sizeof (*dcookie)); 3573 ldcp->num_rxdcookies = ncookies; 3574 ldcp->num_rxds = num_desc; 3575 ldcp->next_rxi = 0; 3576 ldcp->next_rxseq = VNET_ISS; 3577 ldcp->dring_mtype = minfo.mtype; 3578 3579 return (DDI_SUCCESS); 3580 } 3581 3582 /* get channel statistics */ 3583 static uint64_t 3584 vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat) 3585 { 3586 vgen_stats_t *statsp; 3587 uint64_t val; 3588 3589 val = 0; 3590 statsp = &ldcp->stats; 3591 switch (stat) { 3592 3593 case MAC_STAT_MULTIRCV: 3594 val = statsp->multircv; 3595 break; 3596 3597 case MAC_STAT_BRDCSTRCV: 3598 val = statsp->brdcstrcv; 3599 break; 3600 3601 case MAC_STAT_MULTIXMT: 3602 val = statsp->multixmt; 3603 break; 3604 3605 case MAC_STAT_BRDCSTXMT: 3606 val = statsp->brdcstxmt; 3607 break; 3608 3609 case MAC_STAT_NORCVBUF: 3610 val = statsp->norcvbuf; 3611 break; 3612 3613 case MAC_STAT_IERRORS: 3614 val = statsp->ierrors; 3615 break; 3616 3617 case MAC_STAT_NOXMTBUF: 3618 val = statsp->noxmtbuf; 3619 break; 3620 3621 case MAC_STAT_OERRORS: 3622 val = statsp->oerrors; 3623 break; 3624 3625 case MAC_STAT_COLLISIONS: 3626 break; 3627 3628 case MAC_STAT_RBYTES: 3629 val = statsp->rbytes; 3630 break; 3631 3632 case MAC_STAT_IPACKETS: 3633 val = statsp->ipackets; 3634 break; 3635 3636 case MAC_STAT_OBYTES: 3637 val = statsp->obytes; 3638 break; 3639 3640 case MAC_STAT_OPACKETS: 3641 val = statsp->opackets; 3642 break; 3643 3644 /* stats not relevant to ldc, return 0 */ 3645 case MAC_STAT_IFSPEED: 3646 case ETHER_STAT_ALIGN_ERRORS: 3647 case ETHER_STAT_FCS_ERRORS: 3648 case ETHER_STAT_FIRST_COLLISIONS: 3649 case ETHER_STAT_MULTI_COLLISIONS: 3650 case ETHER_STAT_DEFER_XMTS: 3651 case ETHER_STAT_TX_LATE_COLLISIONS: 3652 case ETHER_STAT_EX_COLLISIONS: 3653 case ETHER_STAT_MACXMT_ERRORS: 3654 case ETHER_STAT_CARRIER_ERRORS: 3655 case ETHER_STAT_TOOLONG_ERRORS: 3656 case ETHER_STAT_XCVR_ADDR: 3657 case ETHER_STAT_XCVR_ID: 3658 case ETHER_STAT_XCVR_INUSE: 3659 case ETHER_STAT_CAP_1000FDX: 3660 case ETHER_STAT_CAP_1000HDX: 3661 case ETHER_STAT_CAP_100FDX: 3662 case ETHER_STAT_CAP_100HDX: 3663 case ETHER_STAT_CAP_10FDX: 3664 case ETHER_STAT_CAP_10HDX: 3665 case ETHER_STAT_CAP_ASMPAUSE: 3666 case ETHER_STAT_CAP_PAUSE: 3667 case ETHER_STAT_CAP_AUTONEG: 3668 case ETHER_STAT_ADV_CAP_1000FDX: 3669 case ETHER_STAT_ADV_CAP_1000HDX: 3670 case ETHER_STAT_ADV_CAP_100FDX: 3671 case ETHER_STAT_ADV_CAP_100HDX: 3672 case ETHER_STAT_ADV_CAP_10FDX: 3673 case ETHER_STAT_ADV_CAP_10HDX: 3674 case ETHER_STAT_ADV_CAP_ASMPAUSE: 3675 case ETHER_STAT_ADV_CAP_PAUSE: 3676 case ETHER_STAT_ADV_CAP_AUTONEG: 3677 case ETHER_STAT_LP_CAP_1000FDX: 3678 case ETHER_STAT_LP_CAP_1000HDX: 3679 case ETHER_STAT_LP_CAP_100FDX: 3680 case ETHER_STAT_LP_CAP_100HDX: 3681 case ETHER_STAT_LP_CAP_10FDX: 3682 case ETHER_STAT_LP_CAP_10HDX: 3683 case ETHER_STAT_LP_CAP_ASMPAUSE: 3684 case ETHER_STAT_LP_CAP_PAUSE: 3685 case ETHER_STAT_LP_CAP_AUTONEG: 3686 case ETHER_STAT_LINK_ASMPAUSE: 3687 case ETHER_STAT_LINK_PAUSE: 3688 case ETHER_STAT_LINK_AUTONEG: 3689 case ETHER_STAT_LINK_DUPLEX: 3690 default: 3691 val = 0; 3692 break; 3693 3694 } 3695 return (val); 3696 } 3697 3698 /* 3699 * LDC channel is UP, start handshake process with peer. 3700 */ 3701 static void 3702 vgen_handle_evt_up(vgen_ldc_t *ldcp) 3703 { 3704 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3705 3706 DBG1(vgenp, ldcp, "enter\n"); 3707 3708 ASSERT(MUTEX_HELD(&ldcp->cblock)); 3709 3710 if (ldcp->portp != vgenp->vsw_portp) { 3711 /* 3712 * As the channel is up, use this port from now on. 3713 */ 3714 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE); 3715 } 3716 3717 /* Initialize local session id */ 3718 ldcp->local_sid = ddi_get_lbolt(); 3719 3720 /* clear peer session id */ 3721 ldcp->peer_sid = 0; 3722 ldcp->hretries = 0; 3723 3724 if (ldcp->hphase != VH_PHASE0) { 3725 vgen_handshake_reset(ldcp); 3726 } 3727 3728 /* Initiate Handshake process with peer ldc endpoint */ 3729 vgen_handshake(vh_nextphase(ldcp)); 3730 3731 DBG1(vgenp, ldcp, "exit\n"); 3732 } 3733 3734 /* 3735 * LDC channel is Reset, terminate connection with peer and try to 3736 * bring the channel up again. 3737 */ 3738 static void 3739 vgen_handle_evt_reset(vgen_ldc_t *ldcp) 3740 { 3741 ldc_status_t istatus; 3742 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3743 int rv; 3744 3745 DBG1(vgenp, ldcp, "enter\n"); 3746 3747 ASSERT(MUTEX_HELD(&ldcp->cblock)); 3748 3749 if ((ldcp->portp != vgenp->vsw_portp) && 3750 (vgenp->vsw_portp != NULL)) { 3751 /* 3752 * As the channel is down, use the switch port until 3753 * the channel becomes ready to be used. 3754 */ 3755 (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_TRUE); 3756 } 3757 3758 if (vgenp->vsw_portp == ldcp->portp) { 3759 vio_net_report_err_t rep_err = 3760 ldcp->portp->vcb.vio_net_report_err; 3761 3762 /* Post a reset message */ 3763 rep_err(ldcp->portp->vhp, VIO_NET_RES_DOWN); 3764 } 3765 3766 if (ldcp->hphase != VH_PHASE0) { 3767 vgen_handshake_reset(ldcp); 3768 } 3769 3770 /* try to bring the channel up */ 3771 rv = ldc_up(ldcp->ldc_handle); 3772 if (rv != 0) { 3773 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 3774 } 3775 3776 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3777 DWARN(vgenp, ldcp, "ldc_status err\n"); 3778 } else { 3779 ldcp->ldc_status = istatus; 3780 } 3781 3782 /* if channel is already UP - restart handshake */ 3783 if (ldcp->ldc_status == LDC_UP) { 3784 vgen_handle_evt_up(ldcp); 3785 } 3786 3787 DBG1(vgenp, ldcp, "exit\n"); 3788 } 3789 3790 /* Interrupt handler for the channel */ 3791 static uint_t 3792 vgen_ldc_cb(uint64_t event, caddr_t arg) 3793 { 3794 _NOTE(ARGUNUSED(event)) 3795 vgen_ldc_t *ldcp; 3796 vgen_t *vgenp; 3797 ldc_status_t istatus; 3798 vgen_stats_t *statsp; 3799 timeout_id_t cancel_htid = 0; 3800 uint_t ret = LDC_SUCCESS; 3801 3802 ldcp = (vgen_ldc_t *)arg; 3803 vgenp = LDC_TO_VGEN(ldcp); 3804 statsp = &ldcp->stats; 3805 3806 DBG1(vgenp, ldcp, "enter\n"); 3807 3808 mutex_enter(&ldcp->cblock); 3809 statsp->callbacks++; 3810 if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 3811 DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n", 3812 ldcp->ldc_status); 3813 mutex_exit(&ldcp->cblock); 3814 return (LDC_SUCCESS); 3815 } 3816 3817 /* 3818 * cache cancel_htid before the events specific 3819 * code may overwrite it. Do not clear ldcp->cancel_htid 3820 * as it is also used to indicate the timer to quit immediately. 3821 */ 3822 cancel_htid = ldcp->cancel_htid; 3823 3824 /* 3825 * NOTE: not using switch() as event could be triggered by 3826 * a state change and a read request. Also the ordering of the 3827 * check for the event types is deliberate. 3828 */ 3829 if (event & LDC_EVT_UP) { 3830 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3831 DWARN(vgenp, ldcp, "ldc_status err\n"); 3832 /* status couldn't be determined */ 3833 ret = LDC_FAILURE; 3834 goto ldc_cb_ret; 3835 } 3836 ldcp->ldc_status = istatus; 3837 if (ldcp->ldc_status != LDC_UP) { 3838 DWARN(vgenp, ldcp, "LDC_EVT_UP received " 3839 " but ldc status is not UP(0x%x)\n", 3840 ldcp->ldc_status); 3841 /* spurious interrupt, return success */ 3842 goto ldc_cb_ret; 3843 } 3844 DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n", 3845 event, ldcp->ldc_status); 3846 3847 vgen_handle_evt_up(ldcp); 3848 3849 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3850 } 3851 3852 /* Handle RESET/DOWN before READ event */ 3853 if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) { 3854 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 3855 DWARN(vgenp, ldcp, "ldc_status error\n"); 3856 /* status couldn't be determined */ 3857 ret = LDC_FAILURE; 3858 goto ldc_cb_ret; 3859 } 3860 ldcp->ldc_status = istatus; 3861 DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n", 3862 event, ldcp->ldc_status); 3863 3864 vgen_handle_evt_reset(ldcp); 3865 3866 /* 3867 * As the channel is down/reset, ignore READ event 3868 * but print a debug warning message. 3869 */ 3870 if (event & LDC_EVT_READ) { 3871 DWARN(vgenp, ldcp, 3872 "LDC_EVT_READ set along with RESET/DOWN\n"); 3873 event &= ~LDC_EVT_READ; 3874 } 3875 } 3876 3877 if (event & LDC_EVT_READ) { 3878 DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n", 3879 event, ldcp->ldc_status); 3880 3881 ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 3882 3883 if (ldcp->rcv_thread != NULL) { 3884 /* 3885 * If the receive thread is enabled, then 3886 * wakeup the receive thread to process the 3887 * LDC messages. 3888 */ 3889 mutex_exit(&ldcp->cblock); 3890 mutex_enter(&ldcp->rcv_thr_lock); 3891 if (!(ldcp->rcv_thr_flags & VGEN_WTHR_DATARCVD)) { 3892 ldcp->rcv_thr_flags |= VGEN_WTHR_DATARCVD; 3893 cv_signal(&ldcp->rcv_thr_cv); 3894 } 3895 mutex_exit(&ldcp->rcv_thr_lock); 3896 mutex_enter(&ldcp->cblock); 3897 } else { 3898 vgen_handle_evt_read(ldcp); 3899 } 3900 } 3901 3902 ldc_cb_ret: 3903 /* 3904 * Check to see if the status of cancel_htid has 3905 * changed. If another timer needs to be cancelled, 3906 * then let the next callback to clear it. 3907 */ 3908 if (cancel_htid == 0) { 3909 cancel_htid = ldcp->cancel_htid; 3910 } 3911 mutex_exit(&ldcp->cblock); 3912 3913 if (cancel_htid) { 3914 /* 3915 * Cancel handshake timer. 3916 * untimeout(9F) will not return until the pending callback is 3917 * cancelled or has run. No problems will result from calling 3918 * untimeout if the handler has already completed. 3919 * If the timeout handler did run, then it would just 3920 * return as cancel_htid is set. 3921 */ 3922 DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", cancel_htid); 3923 (void) untimeout(cancel_htid); 3924 mutex_enter(&ldcp->cblock); 3925 /* clear it only if its the same as the one we cancelled */ 3926 if (ldcp->cancel_htid == cancel_htid) { 3927 ldcp->cancel_htid = 0; 3928 } 3929 mutex_exit(&ldcp->cblock); 3930 } 3931 DBG1(vgenp, ldcp, "exit\n"); 3932 return (ret); 3933 } 3934 3935 static void 3936 vgen_handle_evt_read(vgen_ldc_t *ldcp) 3937 { 3938 int rv; 3939 uint64_t *ldcmsg; 3940 size_t msglen; 3941 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 3942 vio_msg_tag_t *tagp; 3943 ldc_status_t istatus; 3944 boolean_t has_data; 3945 3946 DBG1(vgenp, ldcp, "enter\n"); 3947 3948 ldcmsg = ldcp->ldcmsg; 3949 /* 3950 * If the receive thread is enabled, then the cblock 3951 * need to be acquired here. If not, the vgen_ldc_cb() 3952 * calls this function with cblock held already. 3953 */ 3954 if (ldcp->rcv_thread != NULL) { 3955 mutex_enter(&ldcp->cblock); 3956 } else { 3957 ASSERT(MUTEX_HELD(&ldcp->cblock)); 3958 } 3959 3960 vgen_evt_read: 3961 do { 3962 msglen = ldcp->msglen; 3963 rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); 3964 3965 if (rv != 0) { 3966 DWARN(vgenp, ldcp, "err rv(%d) len(%d)\n", 3967 rv, msglen); 3968 if (rv == ECONNRESET) 3969 goto vgen_evtread_error; 3970 break; 3971 } 3972 if (msglen == 0) { 3973 DBG2(vgenp, ldcp, "ldc_read NODATA"); 3974 break; 3975 } 3976 DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen); 3977 3978 tagp = (vio_msg_tag_t *)ldcmsg; 3979 3980 if (ldcp->peer_sid) { 3981 /* 3982 * check sid only after we have received peer's sid 3983 * in the version negotiate msg. 3984 */ 3985 #ifdef DEBUG 3986 if (vgen_hdbg & HDBG_BAD_SID) { 3987 /* simulate bad sid condition */ 3988 tagp->vio_sid = 0; 3989 vgen_hdbg &= ~(HDBG_BAD_SID); 3990 } 3991 #endif 3992 rv = vgen_check_sid(ldcp, tagp); 3993 if (rv != VGEN_SUCCESS) { 3994 /* 3995 * If sid mismatch is detected, 3996 * reset the channel. 3997 */ 3998 ldcp->need_ldc_reset = B_TRUE; 3999 goto vgen_evtread_error; 4000 } 4001 } 4002 4003 switch (tagp->vio_msgtype) { 4004 case VIO_TYPE_CTRL: 4005 rv = vgen_handle_ctrlmsg(ldcp, tagp); 4006 break; 4007 4008 case VIO_TYPE_DATA: 4009 rv = vgen_handle_datamsg(ldcp, tagp, msglen); 4010 break; 4011 4012 case VIO_TYPE_ERR: 4013 vgen_handle_errmsg(ldcp, tagp); 4014 break; 4015 4016 default: 4017 DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n", 4018 tagp->vio_msgtype); 4019 break; 4020 } 4021 4022 /* 4023 * If an error is encountered, stop processing and 4024 * handle the error. 4025 */ 4026 if (rv != 0) { 4027 goto vgen_evtread_error; 4028 } 4029 4030 } while (msglen); 4031 4032 /* check once more before exiting */ 4033 rv = ldc_chkq(ldcp->ldc_handle, &has_data); 4034 if ((rv == 0) && (has_data == B_TRUE)) { 4035 DTRACE_PROBE(vgen_chkq); 4036 goto vgen_evt_read; 4037 } 4038 4039 vgen_evtread_error: 4040 if (rv == ECONNRESET) { 4041 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 4042 DWARN(vgenp, ldcp, "ldc_status err\n"); 4043 } else { 4044 ldcp->ldc_status = istatus; 4045 } 4046 vgen_handle_evt_reset(ldcp); 4047 } else if (rv) { 4048 vgen_handshake_retry(ldcp); 4049 } 4050 4051 /* 4052 * If the receive thread is enabled, then cancel the 4053 * handshake timeout here. 4054 */ 4055 if (ldcp->rcv_thread != NULL) { 4056 timeout_id_t cancel_htid = ldcp->cancel_htid; 4057 4058 mutex_exit(&ldcp->cblock); 4059 if (cancel_htid) { 4060 /* 4061 * Cancel handshake timer. untimeout(9F) will 4062 * not return until the pending callback is cancelled 4063 * or has run. No problems will result from calling 4064 * untimeout if the handler has already completed. 4065 * If the timeout handler did run, then it would just 4066 * return as cancel_htid is set. 4067 */ 4068 DBG2(vgenp, ldcp, "calling cance_htid =0x%X \n", 4069 cancel_htid); 4070 (void) untimeout(cancel_htid); 4071 4072 /* 4073 * clear it only if its the same as the one we 4074 * cancelled 4075 */ 4076 mutex_enter(&ldcp->cblock); 4077 if (ldcp->cancel_htid == cancel_htid) { 4078 ldcp->cancel_htid = 0; 4079 } 4080 mutex_exit(&ldcp->cblock); 4081 } 4082 } 4083 4084 DBG1(vgenp, ldcp, "exit\n"); 4085 } 4086 4087 /* vgen handshake functions */ 4088 4089 /* change the hphase for the channel to the next phase */ 4090 static vgen_ldc_t * 4091 vh_nextphase(vgen_ldc_t *ldcp) 4092 { 4093 if (ldcp->hphase == VH_PHASE3) { 4094 ldcp->hphase = VH_DONE; 4095 } else { 4096 ldcp->hphase++; 4097 } 4098 return (ldcp); 4099 } 4100 4101 /* 4102 * wrapper routine to send the given message over ldc using ldc_write(). 4103 */ 4104 static int 4105 vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, 4106 boolean_t caller_holds_lock) 4107 { 4108 int rv; 4109 size_t len; 4110 uint32_t retries = 0; 4111 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4112 vio_msg_tag_t *tagp = (vio_msg_tag_t *)msg; 4113 vio_dring_msg_t *dmsg; 4114 vio_raw_data_msg_t *rmsg; 4115 boolean_t data_msg = B_FALSE; 4116 4117 len = msglen; 4118 if ((len == 0) || (msg == NULL)) 4119 return (VGEN_FAILURE); 4120 4121 if (!caller_holds_lock) { 4122 mutex_enter(&ldcp->wrlock); 4123 } 4124 4125 if (tagp->vio_subtype == VIO_SUBTYPE_INFO) { 4126 if (tagp->vio_subtype_env == VIO_DRING_DATA) { 4127 dmsg = (vio_dring_msg_t *)tagp; 4128 dmsg->seq_num = ldcp->next_txseq; 4129 data_msg = B_TRUE; 4130 } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { 4131 rmsg = (vio_raw_data_msg_t *)tagp; 4132 rmsg->seq_num = ldcp->next_txseq; 4133 data_msg = B_TRUE; 4134 } 4135 } 4136 4137 do { 4138 len = msglen; 4139 rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); 4140 if (retries++ >= vgen_ldcwr_retries) 4141 break; 4142 } while (rv == EWOULDBLOCK); 4143 4144 if (rv == 0 && data_msg == B_TRUE) { 4145 ldcp->next_txseq++; 4146 } 4147 4148 if (!caller_holds_lock) { 4149 mutex_exit(&ldcp->wrlock); 4150 } 4151 4152 if (rv != 0) { 4153 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n", 4154 rv, msglen); 4155 return (rv); 4156 } 4157 4158 if (len != msglen) { 4159 DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", 4160 rv, msglen); 4161 return (VGEN_FAILURE); 4162 } 4163 4164 return (VGEN_SUCCESS); 4165 } 4166 4167 /* send version negotiate message to the peer over ldc */ 4168 static int 4169 vgen_send_version_negotiate(vgen_ldc_t *ldcp) 4170 { 4171 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4172 vio_ver_msg_t vermsg; 4173 vio_msg_tag_t *tagp = &vermsg.tag; 4174 int rv; 4175 4176 bzero(&vermsg, sizeof (vermsg)); 4177 4178 tagp->vio_msgtype = VIO_TYPE_CTRL; 4179 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4180 tagp->vio_subtype_env = VIO_VER_INFO; 4181 tagp->vio_sid = ldcp->local_sid; 4182 4183 /* get version msg payload from ldcp->local */ 4184 vermsg.ver_major = ldcp->local_hparams.ver_major; 4185 vermsg.ver_minor = ldcp->local_hparams.ver_minor; 4186 vermsg.dev_class = ldcp->local_hparams.dev_class; 4187 4188 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE); 4189 if (rv != VGEN_SUCCESS) { 4190 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4191 return (rv); 4192 } 4193 4194 ldcp->hstate |= VER_INFO_SENT; 4195 DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n", 4196 vermsg.ver_major, vermsg.ver_minor); 4197 4198 return (VGEN_SUCCESS); 4199 } 4200 4201 /* send attr info message to the peer over ldc */ 4202 static int 4203 vgen_send_attr_info(vgen_ldc_t *ldcp) 4204 { 4205 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4206 vnet_attr_msg_t attrmsg; 4207 vio_msg_tag_t *tagp = &attrmsg.tag; 4208 int rv; 4209 4210 bzero(&attrmsg, sizeof (attrmsg)); 4211 4212 tagp->vio_msgtype = VIO_TYPE_CTRL; 4213 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4214 tagp->vio_subtype_env = VIO_ATTR_INFO; 4215 tagp->vio_sid = ldcp->local_sid; 4216 4217 /* get attr msg payload from ldcp->local */ 4218 attrmsg.mtu = ldcp->local_hparams.mtu; 4219 attrmsg.addr = ldcp->local_hparams.addr; 4220 attrmsg.addr_type = ldcp->local_hparams.addr_type; 4221 attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode; 4222 attrmsg.ack_freq = ldcp->local_hparams.ack_freq; 4223 4224 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE); 4225 if (rv != VGEN_SUCCESS) { 4226 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4227 return (rv); 4228 } 4229 4230 ldcp->hstate |= ATTR_INFO_SENT; 4231 DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n"); 4232 4233 return (VGEN_SUCCESS); 4234 } 4235 4236 /* send descriptor ring register message to the peer over ldc */ 4237 static int 4238 vgen_send_dring_reg(vgen_ldc_t *ldcp) 4239 { 4240 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4241 vio_dring_reg_msg_t msg; 4242 vio_msg_tag_t *tagp = &msg.tag; 4243 int rv; 4244 4245 bzero(&msg, sizeof (msg)); 4246 4247 tagp->vio_msgtype = VIO_TYPE_CTRL; 4248 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4249 tagp->vio_subtype_env = VIO_DRING_REG; 4250 tagp->vio_sid = ldcp->local_sid; 4251 4252 /* get dring info msg payload from ldcp->local */ 4253 bcopy(&(ldcp->local_hparams.dring_cookie), (msg.cookie), 4254 sizeof (ldc_mem_cookie_t)); 4255 msg.ncookies = ldcp->local_hparams.num_dcookies; 4256 msg.num_descriptors = ldcp->local_hparams.num_desc; 4257 msg.descriptor_size = ldcp->local_hparams.desc_size; 4258 4259 /* 4260 * dring_ident is set to 0. After mapping the dring, peer sets this 4261 * value and sends it in the ack, which is saved in 4262 * vgen_handle_dring_reg(). 4263 */ 4264 msg.dring_ident = 0; 4265 4266 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (msg), B_FALSE); 4267 if (rv != VGEN_SUCCESS) { 4268 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4269 return (rv); 4270 } 4271 4272 ldcp->hstate |= DRING_INFO_SENT; 4273 DBG2(vgenp, ldcp, "DRING_INFO_SENT \n"); 4274 4275 return (VGEN_SUCCESS); 4276 } 4277 4278 static int 4279 vgen_send_rdx_info(vgen_ldc_t *ldcp) 4280 { 4281 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4282 vio_rdx_msg_t rdxmsg; 4283 vio_msg_tag_t *tagp = &rdxmsg.tag; 4284 int rv; 4285 4286 bzero(&rdxmsg, sizeof (rdxmsg)); 4287 4288 tagp->vio_msgtype = VIO_TYPE_CTRL; 4289 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4290 tagp->vio_subtype_env = VIO_RDX; 4291 tagp->vio_sid = ldcp->local_sid; 4292 4293 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE); 4294 if (rv != VGEN_SUCCESS) { 4295 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4296 return (rv); 4297 } 4298 4299 ldcp->hstate |= RDX_INFO_SENT; 4300 DBG2(vgenp, ldcp, "RDX_INFO_SENT\n"); 4301 4302 return (VGEN_SUCCESS); 4303 } 4304 4305 /* send descriptor ring data message to the peer over ldc */ 4306 static int 4307 vgen_send_dring_data(vgen_ldc_t *ldcp, uint32_t start, int32_t end) 4308 { 4309 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4310 vio_dring_msg_t dringmsg, *msgp = &dringmsg; 4311 vio_msg_tag_t *tagp = &msgp->tag; 4312 vgen_stats_t *statsp = &ldcp->stats; 4313 int rv; 4314 4315 bzero(msgp, sizeof (*msgp)); 4316 4317 tagp->vio_msgtype = VIO_TYPE_DATA; 4318 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4319 tagp->vio_subtype_env = VIO_DRING_DATA; 4320 tagp->vio_sid = ldcp->local_sid; 4321 4322 msgp->dring_ident = ldcp->local_hparams.dring_ident; 4323 msgp->start_idx = start; 4324 msgp->end_idx = end; 4325 4326 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE); 4327 if (rv != VGEN_SUCCESS) { 4328 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 4329 return (rv); 4330 } 4331 4332 statsp->dring_data_msgs++; 4333 4334 DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); 4335 4336 return (VGEN_SUCCESS); 4337 } 4338 4339 /* send multicast addr info message to vsw */ 4340 static int 4341 vgen_send_mcast_info(vgen_ldc_t *ldcp) 4342 { 4343 vnet_mcast_msg_t mcastmsg; 4344 vnet_mcast_msg_t *msgp; 4345 vio_msg_tag_t *tagp; 4346 vgen_t *vgenp; 4347 struct ether_addr *mca; 4348 int rv; 4349 int i; 4350 uint32_t size; 4351 uint32_t mccount; 4352 uint32_t n; 4353 4354 msgp = &mcastmsg; 4355 tagp = &msgp->tag; 4356 vgenp = LDC_TO_VGEN(ldcp); 4357 4358 mccount = vgenp->mccount; 4359 i = 0; 4360 4361 do { 4362 tagp->vio_msgtype = VIO_TYPE_CTRL; 4363 tagp->vio_subtype = VIO_SUBTYPE_INFO; 4364 tagp->vio_subtype_env = VNET_MCAST_INFO; 4365 tagp->vio_sid = ldcp->local_sid; 4366 4367 n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount); 4368 size = n * sizeof (struct ether_addr); 4369 4370 mca = &(vgenp->mctab[i]); 4371 bcopy(mca, (msgp->mca), size); 4372 msgp->set = B_TRUE; 4373 msgp->count = n; 4374 4375 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), 4376 B_FALSE); 4377 if (rv != VGEN_SUCCESS) { 4378 DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv); 4379 return (rv); 4380 } 4381 4382 mccount -= n; 4383 i += n; 4384 4385 } while (mccount); 4386 4387 return (VGEN_SUCCESS); 4388 } 4389 4390 /* Initiate Phase 2 of handshake */ 4391 static int 4392 vgen_handshake_phase2(vgen_ldc_t *ldcp) 4393 { 4394 int rv; 4395 uint32_t ncookies = 0; 4396 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4397 4398 #ifdef DEBUG 4399 if (vgen_hdbg & HDBG_OUT_STATE) { 4400 /* simulate out of state condition */ 4401 vgen_hdbg &= ~(HDBG_OUT_STATE); 4402 rv = vgen_send_rdx_info(ldcp); 4403 return (rv); 4404 } 4405 if (vgen_hdbg & HDBG_TIMEOUT) { 4406 /* simulate timeout condition */ 4407 vgen_hdbg &= ~(HDBG_TIMEOUT); 4408 return (VGEN_SUCCESS); 4409 } 4410 #endif 4411 rv = vgen_send_attr_info(ldcp); 4412 if (rv != VGEN_SUCCESS) { 4413 return (rv); 4414 } 4415 4416 /* Bind descriptor ring to the channel */ 4417 if (ldcp->num_txdcookies == 0) { 4418 rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dhandle, 4419 LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, 4420 &ldcp->tx_dcookie, &ncookies); 4421 if (rv != 0) { 4422 DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " 4423 "rv(%x)\n", rv); 4424 return (rv); 4425 } 4426 ASSERT(ncookies == 1); 4427 ldcp->num_txdcookies = ncookies; 4428 } 4429 4430 /* update local dring_info params */ 4431 bcopy(&(ldcp->tx_dcookie), &(ldcp->local_hparams.dring_cookie), 4432 sizeof (ldc_mem_cookie_t)); 4433 ldcp->local_hparams.num_dcookies = ldcp->num_txdcookies; 4434 ldcp->local_hparams.num_desc = ldcp->num_txds; 4435 ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t); 4436 4437 rv = vgen_send_dring_reg(ldcp); 4438 if (rv != VGEN_SUCCESS) { 4439 return (rv); 4440 } 4441 4442 return (VGEN_SUCCESS); 4443 } 4444 4445 /* 4446 * Set vnet-protocol-version dependent functions based on version. 4447 */ 4448 static void 4449 vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp) 4450 { 4451 vgen_hparams_t *lp = &ldcp->local_hparams; 4452 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4453 4454 if (VGEN_VER_GTEQ(ldcp, 1, 4)) { 4455 /* 4456 * If the version negotiated with peer is >= 1.4(Jumbo Frame 4457 * Support), set the mtu in our attributes to max_frame_size. 4458 */ 4459 lp->mtu = vgenp->max_frame_size; 4460 } else if (VGEN_VER_EQ(ldcp, 1, 3)) { 4461 /* 4462 * If the version negotiated with peer is == 1.3 (Vlan Tag 4463 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. 4464 */ 4465 lp->mtu = ETHERMAX + VLAN_TAGSZ; 4466 } else { 4467 vgen_port_t *portp = ldcp->portp; 4468 vnet_t *vnetp = vgenp->vnetp; 4469 /* 4470 * Pre-1.3 peers expect max frame size of ETHERMAX. 4471 * We can negotiate that size with those peers provided the 4472 * following conditions are true: 4473 * - Only pvid is defined for our peer and there are no vids. 4474 * - pvids are equal. 4475 * If the above conditions are true, then we can send/recv only 4476 * untagged frames of max size ETHERMAX. 4477 */ 4478 if (portp->nvids == 0 && portp->pvid == vnetp->pvid) { 4479 lp->mtu = ETHERMAX; 4480 } 4481 } 4482 4483 if (VGEN_VER_GTEQ(ldcp, 1, 2)) { 4484 /* Versions >= 1.2 */ 4485 4486 if (VGEN_PRI_ETH_DEFINED(vgenp)) { 4487 /* 4488 * enable priority routines and pkt mode only if 4489 * at least one pri-eth-type is specified in MD. 4490 */ 4491 4492 ldcp->tx = vgen_ldcsend; 4493 ldcp->rx_pktdata = vgen_handle_pkt_data; 4494 4495 /* set xfer mode for vgen_send_attr_info() */ 4496 lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; 4497 4498 } else { 4499 /* no priority eth types defined in MD */ 4500 4501 ldcp->tx = vgen_ldcsend_dring; 4502 ldcp->rx_pktdata = vgen_handle_pkt_data_nop; 4503 4504 /* set xfer mode for vgen_send_attr_info() */ 4505 lp->xfer_mode = VIO_DRING_MODE_V1_2; 4506 4507 } 4508 } else { 4509 /* Versions prior to 1.2 */ 4510 4511 vgen_reset_vnet_proto_ops(ldcp); 4512 } 4513 } 4514 4515 /* 4516 * Reset vnet-protocol-version dependent functions to pre-v1.2. 4517 */ 4518 static void 4519 vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp) 4520 { 4521 vgen_hparams_t *lp = &ldcp->local_hparams; 4522 4523 ldcp->tx = vgen_ldcsend_dring; 4524 ldcp->rx_pktdata = vgen_handle_pkt_data_nop; 4525 4526 /* set xfer mode for vgen_send_attr_info() */ 4527 lp->xfer_mode = VIO_DRING_MODE_V1_0; 4528 } 4529 4530 static void 4531 vgen_vlan_unaware_port_reset(vgen_port_t *portp) 4532 { 4533 vgen_ldclist_t *ldclp; 4534 vgen_ldc_t *ldcp; 4535 vgen_t *vgenp = portp->vgenp; 4536 vnet_t *vnetp = vgenp->vnetp; 4537 4538 ldclp = &portp->ldclist; 4539 4540 READ_ENTER(&ldclp->rwlock); 4541 4542 /* 4543 * NOTE: for now, we will assume we have a single channel. 4544 */ 4545 if (ldclp->headp == NULL) { 4546 RW_EXIT(&ldclp->rwlock); 4547 return; 4548 } 4549 ldcp = ldclp->headp; 4550 4551 mutex_enter(&ldcp->cblock); 4552 4553 /* 4554 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate 4555 * the connection. See comments in vgen_set_vnet_proto_ops(). 4556 */ 4557 if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) && 4558 (portp->nvids != 0 || portp->pvid != vnetp->pvid)) { 4559 ldcp->need_ldc_reset = B_TRUE; 4560 vgen_handshake_retry(ldcp); 4561 } 4562 4563 mutex_exit(&ldcp->cblock); 4564 4565 RW_EXIT(&ldclp->rwlock); 4566 } 4567 4568 static void 4569 vgen_reset_vlan_unaware_ports(vgen_t *vgenp) 4570 { 4571 vgen_port_t *portp; 4572 vgen_portlist_t *plistp; 4573 4574 plistp = &(vgenp->vgenports); 4575 READ_ENTER(&plistp->rwlock); 4576 4577 for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { 4578 4579 vgen_vlan_unaware_port_reset(portp); 4580 4581 } 4582 4583 RW_EXIT(&plistp->rwlock); 4584 } 4585 4586 /* 4587 * This function resets the handshake phase to VH_PHASE0(pre-handshake phase). 4588 * This can happen after a channel comes up (status: LDC_UP) or 4589 * when handshake gets terminated due to various conditions. 4590 */ 4591 static void 4592 vgen_reset_hphase(vgen_ldc_t *ldcp) 4593 { 4594 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4595 ldc_status_t istatus; 4596 int rv; 4597 4598 DBG1(vgenp, ldcp, "enter\n"); 4599 /* reset hstate and hphase */ 4600 ldcp->hstate = 0; 4601 ldcp->hphase = VH_PHASE0; 4602 4603 vgen_reset_vnet_proto_ops(ldcp); 4604 4605 /* 4606 * Save the id of pending handshake timer in cancel_htid. 4607 * This will be checked in vgen_ldc_cb() and the handshake timer will 4608 * be cancelled after releasing cblock. 4609 */ 4610 if (ldcp->htid) { 4611 ldcp->cancel_htid = ldcp->htid; 4612 ldcp->htid = 0; 4613 } 4614 4615 if (ldcp->local_hparams.dring_ready) { 4616 ldcp->local_hparams.dring_ready = B_FALSE; 4617 } 4618 4619 /* Unbind tx descriptor ring from the channel */ 4620 if (ldcp->num_txdcookies) { 4621 rv = ldc_mem_dring_unbind(ldcp->tx_dhandle); 4622 if (rv != 0) { 4623 DWARN(vgenp, ldcp, "ldc_mem_dring_unbind failed\n"); 4624 } 4625 ldcp->num_txdcookies = 0; 4626 } 4627 4628 if (ldcp->peer_hparams.dring_ready) { 4629 ldcp->peer_hparams.dring_ready = B_FALSE; 4630 /* Unmap peer's dring */ 4631 (void) ldc_mem_dring_unmap(ldcp->rx_dhandle); 4632 vgen_clobber_rxds(ldcp); 4633 } 4634 4635 vgen_clobber_tbufs(ldcp); 4636 4637 /* 4638 * clear local handshake params and initialize. 4639 */ 4640 bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams)); 4641 4642 /* set version to the highest version supported */ 4643 ldcp->local_hparams.ver_major = 4644 ldcp->vgen_versions[0].ver_major; 4645 ldcp->local_hparams.ver_minor = 4646 ldcp->vgen_versions[0].ver_minor; 4647 ldcp->local_hparams.dev_class = VDEV_NETWORK; 4648 4649 /* set attr_info params */ 4650 ldcp->local_hparams.mtu = vgenp->max_frame_size; 4651 ldcp->local_hparams.addr = 4652 vnet_macaddr_strtoul(vgenp->macaddr); 4653 ldcp->local_hparams.addr_type = ADDR_TYPE_MAC; 4654 ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0; 4655 ldcp->local_hparams.ack_freq = 0; /* don't need acks */ 4656 4657 /* 4658 * Note: dring is created, but not bound yet. 4659 * local dring_info params will be updated when we bind the dring in 4660 * vgen_handshake_phase2(). 4661 * dring_ident is set to 0. After mapping the dring, peer sets this 4662 * value and sends it in the ack, which is saved in 4663 * vgen_handle_dring_reg(). 4664 */ 4665 ldcp->local_hparams.dring_ident = 0; 4666 4667 /* clear peer_hparams */ 4668 bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams)); 4669 4670 /* reset the channel if required */ 4671 if (ldcp->need_ldc_reset) { 4672 DWARN(vgenp, ldcp, "Doing Channel Reset...\n"); 4673 ldcp->need_ldc_reset = B_FALSE; 4674 (void) ldc_down(ldcp->ldc_handle); 4675 (void) ldc_status(ldcp->ldc_handle, &istatus); 4676 DBG2(vgenp, ldcp, "Reset Done,ldc_status(%x)\n", istatus); 4677 ldcp->ldc_status = istatus; 4678 4679 /* clear sids */ 4680 ldcp->local_sid = 0; 4681 ldcp->peer_sid = 0; 4682 4683 /* try to bring the channel up */ 4684 rv = ldc_up(ldcp->ldc_handle); 4685 if (rv != 0) { 4686 DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); 4687 } 4688 4689 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 4690 DWARN(vgenp, ldcp, "ldc_status err\n"); 4691 } else { 4692 ldcp->ldc_status = istatus; 4693 } 4694 } 4695 } 4696 4697 /* wrapper function for vgen_reset_hphase */ 4698 static void 4699 vgen_handshake_reset(vgen_ldc_t *ldcp) 4700 { 4701 ASSERT(MUTEX_HELD(&ldcp->cblock)); 4702 mutex_enter(&ldcp->rxlock); 4703 mutex_enter(&ldcp->wrlock); 4704 mutex_enter(&ldcp->txlock); 4705 mutex_enter(&ldcp->tclock); 4706 4707 vgen_reset_hphase(ldcp); 4708 4709 mutex_exit(&ldcp->tclock); 4710 mutex_exit(&ldcp->txlock); 4711 mutex_exit(&ldcp->wrlock); 4712 mutex_exit(&ldcp->rxlock); 4713 } 4714 4715 /* 4716 * Initiate handshake with the peer by sending various messages 4717 * based on the handshake-phase that the channel is currently in. 4718 */ 4719 static void 4720 vgen_handshake(vgen_ldc_t *ldcp) 4721 { 4722 uint32_t hphase = ldcp->hphase; 4723 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4724 ldc_status_t istatus; 4725 int rv = 0; 4726 4727 switch (hphase) { 4728 4729 case VH_PHASE1: 4730 4731 /* 4732 * start timer, for entire handshake process, turn this timer 4733 * off if all phases of handshake complete successfully and 4734 * hphase goes to VH_DONE(below) or 4735 * vgen_reset_hphase() gets called or 4736 * channel is reset due to errors or 4737 * vgen_ldc_uninit() is invoked(vgen_stop). 4738 */ 4739 ASSERT(ldcp->htid == 0); 4740 ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp, 4741 drv_usectohz(vgen_hwd_interval * MICROSEC)); 4742 4743 /* Phase 1 involves negotiating the version */ 4744 rv = vgen_send_version_negotiate(ldcp); 4745 break; 4746 4747 case VH_PHASE2: 4748 rv = vgen_handshake_phase2(ldcp); 4749 break; 4750 4751 case VH_PHASE3: 4752 rv = vgen_send_rdx_info(ldcp); 4753 break; 4754 4755 case VH_DONE: 4756 /* 4757 * Save the id of pending handshake timer in cancel_htid. 4758 * This will be checked in vgen_ldc_cb() and the handshake 4759 * timer will be cancelled after releasing cblock. 4760 */ 4761 if (ldcp->htid) { 4762 ldcp->cancel_htid = ldcp->htid; 4763 ldcp->htid = 0; 4764 } 4765 ldcp->hretries = 0; 4766 DBG1(vgenp, ldcp, "Handshake Done\n"); 4767 4768 if (ldcp->portp == vgenp->vsw_portp) { 4769 /* 4770 * If this channel(port) is connected to vsw, 4771 * need to sync multicast table with vsw. 4772 */ 4773 mutex_exit(&ldcp->cblock); 4774 4775 mutex_enter(&vgenp->lock); 4776 rv = vgen_send_mcast_info(ldcp); 4777 mutex_exit(&vgenp->lock); 4778 4779 mutex_enter(&ldcp->cblock); 4780 if (rv != VGEN_SUCCESS) 4781 break; 4782 } 4783 4784 /* 4785 * Check if mac layer should be notified to restart 4786 * transmissions. This can happen if the channel got 4787 * reset and vgen_clobber_tbufs() is called, while 4788 * need_resched is set. 4789 */ 4790 mutex_enter(&ldcp->tclock); 4791 if (ldcp->need_resched) { 4792 vio_net_tx_update_t vtx_update = 4793 ldcp->portp->vcb.vio_net_tx_update; 4794 4795 ldcp->need_resched = B_FALSE; 4796 vtx_update(ldcp->portp->vhp); 4797 } 4798 mutex_exit(&ldcp->tclock); 4799 4800 break; 4801 4802 default: 4803 break; 4804 } 4805 4806 if (rv == ECONNRESET) { 4807 if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 4808 DWARN(vgenp, ldcp, "ldc_status err\n"); 4809 } else { 4810 ldcp->ldc_status = istatus; 4811 } 4812 vgen_handle_evt_reset(ldcp); 4813 } else if (rv) { 4814 vgen_handshake_reset(ldcp); 4815 } 4816 } 4817 4818 /* 4819 * Check if the current handshake phase has completed successfully and 4820 * return the status. 4821 */ 4822 static int 4823 vgen_handshake_done(vgen_ldc_t *ldcp) 4824 { 4825 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 4826 uint32_t hphase = ldcp->hphase; 4827 int status = 0; 4828 4829 switch (hphase) { 4830 4831 case VH_PHASE1: 4832 /* 4833 * Phase1 is done, if version negotiation 4834 * completed successfully. 4835 */ 4836 status = ((ldcp->hstate & VER_NEGOTIATED) == 4837 VER_NEGOTIATED); 4838 break; 4839 4840 case VH_PHASE2: 4841 /* 4842 * Phase 2 is done, if attr info and dring info 4843 * have been exchanged successfully. 4844 */ 4845 status = (((ldcp->hstate & ATTR_INFO_EXCHANGED) == 4846 ATTR_INFO_EXCHANGED) && 4847 ((ldcp->hstate & DRING_INFO_EXCHANGED) == 4848 DRING_INFO_EXCHANGED)); 4849 break; 4850 4851 case VH_PHASE3: 4852 /* Phase 3 is done, if rdx msg has been exchanged */ 4853 status = ((ldcp->hstate & RDX_EXCHANGED) == 4854 RDX_EXCHANGED); 4855 break; 4856 4857 default: 4858 break; 4859 } 4860 4861 if (status == 0) { 4862 return (VGEN_FAILURE); 4863 } 4864 DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase); 4865 return (VGEN_SUCCESS); 4866 } 4867 4868 /* retry handshake on failure */ 4869 static void 4870 vgen_handshake_retry(vgen_ldc_t *ldcp) 4871 { 4872 /* reset handshake phase */ 4873 vgen_handshake_reset(ldcp); 4874 4875 /* handshake retry is specified and the channel is UP */ 4876 if (vgen_max_hretries && (ldcp->ldc_status == LDC_UP)) { 4877 if (ldcp->hretries++ < vgen_max_hretries) { 4878 ldcp->local_sid = ddi_get_lbolt(); 4879 vgen_handshake(vh_nextphase(ldcp)); 4880 } 4881 } 4882 } 4883 4884 /* 4885 * Handle a version info msg from the peer or an ACK/NACK from the peer 4886 * to a version info msg that we sent. 4887 */ 4888 static int 4889 vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 4890 { 4891 vgen_t *vgenp; 4892 vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp; 4893 int ack = 0; 4894 int failed = 0; 4895 int idx; 4896 vgen_ver_t *versions = ldcp->vgen_versions; 4897 int rv = 0; 4898 4899 vgenp = LDC_TO_VGEN(ldcp); 4900 DBG1(vgenp, ldcp, "enter\n"); 4901 switch (tagp->vio_subtype) { 4902 case VIO_SUBTYPE_INFO: 4903 4904 /* Cache sid of peer if this is the first time */ 4905 if (ldcp->peer_sid == 0) { 4906 DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n", 4907 tagp->vio_sid); 4908 ldcp->peer_sid = tagp->vio_sid; 4909 } 4910 4911 if (ldcp->hphase != VH_PHASE1) { 4912 /* 4913 * If we are not already in VH_PHASE1, reset to 4914 * pre-handshake state, and initiate handshake 4915 * to the peer too. 4916 */ 4917 vgen_handshake_reset(ldcp); 4918 vgen_handshake(vh_nextphase(ldcp)); 4919 } 4920 ldcp->hstate |= VER_INFO_RCVD; 4921 4922 /* save peer's requested values */ 4923 ldcp->peer_hparams.ver_major = vermsg->ver_major; 4924 ldcp->peer_hparams.ver_minor = vermsg->ver_minor; 4925 ldcp->peer_hparams.dev_class = vermsg->dev_class; 4926 4927 if ((vermsg->dev_class != VDEV_NETWORK) && 4928 (vermsg->dev_class != VDEV_NETWORK_SWITCH)) { 4929 /* unsupported dev_class, send NACK */ 4930 4931 DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); 4932 4933 tagp->vio_subtype = VIO_SUBTYPE_NACK; 4934 tagp->vio_sid = ldcp->local_sid; 4935 /* send reply msg back to peer */ 4936 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, 4937 sizeof (*vermsg), B_FALSE); 4938 if (rv != VGEN_SUCCESS) { 4939 return (rv); 4940 } 4941 return (VGEN_FAILURE); 4942 } 4943 4944 DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n", 4945 vermsg->ver_major, vermsg->ver_minor); 4946 4947 idx = 0; 4948 4949 for (;;) { 4950 4951 if (vermsg->ver_major > versions[idx].ver_major) { 4952 4953 /* nack with next lower version */ 4954 tagp->vio_subtype = VIO_SUBTYPE_NACK; 4955 vermsg->ver_major = versions[idx].ver_major; 4956 vermsg->ver_minor = versions[idx].ver_minor; 4957 break; 4958 } 4959 4960 if (vermsg->ver_major == versions[idx].ver_major) { 4961 4962 /* major version match - ACK version */ 4963 tagp->vio_subtype = VIO_SUBTYPE_ACK; 4964 ack = 1; 4965 4966 /* 4967 * lower minor version to the one this endpt 4968 * supports, if necessary 4969 */ 4970 if (vermsg->ver_minor > 4971 versions[idx].ver_minor) { 4972 vermsg->ver_minor = 4973 versions[idx].ver_minor; 4974 ldcp->peer_hparams.ver_minor = 4975 versions[idx].ver_minor; 4976 } 4977 break; 4978 } 4979 4980 idx++; 4981 4982 if (idx == VGEN_NUM_VER) { 4983 4984 /* no version match - send NACK */ 4985 tagp->vio_subtype = VIO_SUBTYPE_NACK; 4986 vermsg->ver_major = 0; 4987 vermsg->ver_minor = 0; 4988 failed = 1; 4989 break; 4990 } 4991 4992 } 4993 4994 tagp->vio_sid = ldcp->local_sid; 4995 4996 /* send reply msg back to peer */ 4997 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg), 4998 B_FALSE); 4999 if (rv != VGEN_SUCCESS) { 5000 return (rv); 5001 } 5002 5003 if (ack) { 5004 ldcp->hstate |= VER_ACK_SENT; 5005 DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n", 5006 vermsg->ver_major, vermsg->ver_minor); 5007 } 5008 if (failed) { 5009 DWARN(vgenp, ldcp, "Negotiation Failed\n"); 5010 return (VGEN_FAILURE); 5011 } 5012 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5013 5014 /* VER_ACK_SENT and VER_ACK_RCVD */ 5015 5016 /* local and peer versions match? */ 5017 ASSERT((ldcp->local_hparams.ver_major == 5018 ldcp->peer_hparams.ver_major) && 5019 (ldcp->local_hparams.ver_minor == 5020 ldcp->peer_hparams.ver_minor)); 5021 5022 vgen_set_vnet_proto_ops(ldcp); 5023 5024 /* move to the next phase */ 5025 vgen_handshake(vh_nextphase(ldcp)); 5026 } 5027 5028 break; 5029 5030 case VIO_SUBTYPE_ACK: 5031 5032 if (ldcp->hphase != VH_PHASE1) { 5033 /* This should not happen. */ 5034 DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase); 5035 return (VGEN_FAILURE); 5036 } 5037 5038 /* SUCCESS - we have agreed on a version */ 5039 ldcp->local_hparams.ver_major = vermsg->ver_major; 5040 ldcp->local_hparams.ver_minor = vermsg->ver_minor; 5041 ldcp->hstate |= VER_ACK_RCVD; 5042 5043 DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n", 5044 vermsg->ver_major, vermsg->ver_minor); 5045 5046 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5047 5048 /* VER_ACK_SENT and VER_ACK_RCVD */ 5049 5050 /* local and peer versions match? */ 5051 ASSERT((ldcp->local_hparams.ver_major == 5052 ldcp->peer_hparams.ver_major) && 5053 (ldcp->local_hparams.ver_minor == 5054 ldcp->peer_hparams.ver_minor)); 5055 5056 vgen_set_vnet_proto_ops(ldcp); 5057 5058 /* move to the next phase */ 5059 vgen_handshake(vh_nextphase(ldcp)); 5060 } 5061 break; 5062 5063 case VIO_SUBTYPE_NACK: 5064 5065 if (ldcp->hphase != VH_PHASE1) { 5066 /* This should not happen. */ 5067 DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid " 5068 "Phase(%u)\n", ldcp->hphase); 5069 return (VGEN_FAILURE); 5070 } 5071 5072 DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n", 5073 vermsg->ver_major, vermsg->ver_minor); 5074 5075 /* check if version in NACK is zero */ 5076 if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) { 5077 /* 5078 * Version Negotiation has failed. 5079 */ 5080 DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); 5081 return (VGEN_FAILURE); 5082 } 5083 5084 idx = 0; 5085 5086 for (;;) { 5087 5088 if (vermsg->ver_major > versions[idx].ver_major) { 5089 /* select next lower version */ 5090 5091 ldcp->local_hparams.ver_major = 5092 versions[idx].ver_major; 5093 ldcp->local_hparams.ver_minor = 5094 versions[idx].ver_minor; 5095 break; 5096 } 5097 5098 if (vermsg->ver_major == versions[idx].ver_major) { 5099 /* major version match */ 5100 5101 ldcp->local_hparams.ver_major = 5102 versions[idx].ver_major; 5103 5104 ldcp->local_hparams.ver_minor = 5105 versions[idx].ver_minor; 5106 break; 5107 } 5108 5109 idx++; 5110 5111 if (idx == VGEN_NUM_VER) { 5112 /* 5113 * no version match. 5114 * Version Negotiation has failed. 5115 */ 5116 DWARN(vgenp, ldcp, 5117 "Version Negotiation Failed\n"); 5118 return (VGEN_FAILURE); 5119 } 5120 5121 } 5122 5123 rv = vgen_send_version_negotiate(ldcp); 5124 if (rv != VGEN_SUCCESS) { 5125 return (rv); 5126 } 5127 5128 break; 5129 } 5130 5131 DBG1(vgenp, ldcp, "exit\n"); 5132 return (VGEN_SUCCESS); 5133 } 5134 5135 /* Check if the attributes are supported */ 5136 static int 5137 vgen_check_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg) 5138 { 5139 vgen_hparams_t *lp = &ldcp->local_hparams; 5140 5141 if ((msg->addr_type != ADDR_TYPE_MAC) || 5142 (msg->ack_freq > 64) || 5143 (msg->xfer_mode != lp->xfer_mode)) { 5144 return (VGEN_FAILURE); 5145 } 5146 5147 if (VGEN_VER_LT(ldcp, 1, 4)) { 5148 /* versions < 1.4, mtu must match */ 5149 if (msg->mtu != lp->mtu) { 5150 return (VGEN_FAILURE); 5151 } 5152 } else { 5153 /* Ver >= 1.4, validate mtu of the peer is at least ETHERMAX */ 5154 if (msg->mtu < ETHERMAX) { 5155 return (VGEN_FAILURE); 5156 } 5157 } 5158 5159 return (VGEN_SUCCESS); 5160 } 5161 5162 /* 5163 * Handle an attribute info msg from the peer or an ACK/NACK from the peer 5164 * to an attr info msg that we sent. 5165 */ 5166 static int 5167 vgen_handle_attr_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5168 { 5169 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5170 vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp; 5171 vgen_hparams_t *lp = &ldcp->local_hparams; 5172 vgen_hparams_t *rp = &ldcp->peer_hparams; 5173 int ack = 1; 5174 int rv = 0; 5175 uint32_t mtu; 5176 5177 DBG1(vgenp, ldcp, "enter\n"); 5178 if (ldcp->hphase != VH_PHASE2) { 5179 DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d)," 5180 " Invalid Phase(%u)\n", 5181 tagp->vio_subtype, ldcp->hphase); 5182 return (VGEN_FAILURE); 5183 } 5184 switch (tagp->vio_subtype) { 5185 case VIO_SUBTYPE_INFO: 5186 5187 DBG2(vgenp, ldcp, "ATTR_INFO_RCVD \n"); 5188 ldcp->hstate |= ATTR_INFO_RCVD; 5189 5190 /* save peer's values */ 5191 rp->mtu = msg->mtu; 5192 rp->addr = msg->addr; 5193 rp->addr_type = msg->addr_type; 5194 rp->xfer_mode = msg->xfer_mode; 5195 rp->ack_freq = msg->ack_freq; 5196 5197 rv = vgen_check_attr_info(ldcp, msg); 5198 if (rv == VGEN_FAILURE) { 5199 /* unsupported attr, send NACK */ 5200 ack = 0; 5201 } else { 5202 5203 if (VGEN_VER_GTEQ(ldcp, 1, 4)) { 5204 5205 /* 5206 * Versions >= 1.4: 5207 * The mtu is negotiated down to the 5208 * minimum of our mtu and peer's mtu. 5209 */ 5210 mtu = MIN(msg->mtu, vgenp->max_frame_size); 5211 5212 /* 5213 * If we have received an ack for the attr info 5214 * that we sent, then check if the mtu computed 5215 * above matches the mtu that the peer had ack'd 5216 * (saved in local hparams). If they don't 5217 * match, we fail the handshake. 5218 */ 5219 if (ldcp->hstate & ATTR_ACK_RCVD) { 5220 if (mtu != lp->mtu) { 5221 /* send NACK */ 5222 ack = 0; 5223 } 5224 } else { 5225 /* 5226 * Save the mtu computed above in our 5227 * attr parameters, so it gets sent in 5228 * the attr info from us to the peer. 5229 */ 5230 lp->mtu = mtu; 5231 } 5232 5233 /* save the MIN mtu in the msg to be replied */ 5234 msg->mtu = mtu; 5235 5236 } 5237 } 5238 5239 5240 if (ack) { 5241 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5242 } else { 5243 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5244 } 5245 tagp->vio_sid = ldcp->local_sid; 5246 5247 /* send reply msg back to peer */ 5248 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg), 5249 B_FALSE); 5250 if (rv != VGEN_SUCCESS) { 5251 return (rv); 5252 } 5253 5254 if (ack) { 5255 ldcp->hstate |= ATTR_ACK_SENT; 5256 DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n"); 5257 } else { 5258 /* failed */ 5259 DWARN(vgenp, ldcp, "ATTR_NACK_SENT \n"); 5260 return (VGEN_FAILURE); 5261 } 5262 5263 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5264 vgen_handshake(vh_nextphase(ldcp)); 5265 } 5266 5267 break; 5268 5269 case VIO_SUBTYPE_ACK: 5270 5271 if (VGEN_VER_GTEQ(ldcp, 1, 4)) { 5272 /* 5273 * Versions >= 1.4: 5274 * The ack msg sent by the peer contains the minimum of 5275 * our mtu (that we had sent in our attr info) and the 5276 * peer's mtu. 5277 * 5278 * If we have sent an ack for the attr info msg from 5279 * the peer, check if the mtu that was computed then 5280 * (saved in local hparams) matches the mtu that the 5281 * peer has ack'd. If they don't match, we fail the 5282 * handshake. 5283 */ 5284 if (ldcp->hstate & ATTR_ACK_SENT) { 5285 if (lp->mtu != msg->mtu) { 5286 return (VGEN_FAILURE); 5287 } 5288 } else { 5289 /* 5290 * If the mtu ack'd by the peer is > our mtu 5291 * fail handshake. Otherwise, save the mtu, so 5292 * we can validate it when we receive attr info 5293 * from our peer. 5294 */ 5295 if (msg->mtu > lp->mtu) { 5296 return (VGEN_FAILURE); 5297 } 5298 if (msg->mtu <= lp->mtu) { 5299 lp->mtu = msg->mtu; 5300 } 5301 } 5302 } 5303 5304 ldcp->hstate |= ATTR_ACK_RCVD; 5305 5306 DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n"); 5307 5308 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5309 vgen_handshake(vh_nextphase(ldcp)); 5310 } 5311 break; 5312 5313 case VIO_SUBTYPE_NACK: 5314 5315 DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n"); 5316 return (VGEN_FAILURE); 5317 } 5318 DBG1(vgenp, ldcp, "exit\n"); 5319 return (VGEN_SUCCESS); 5320 } 5321 5322 /* Check if the dring info msg is ok */ 5323 static int 5324 vgen_check_dring_reg(vio_dring_reg_msg_t *msg) 5325 { 5326 /* check if msg contents are ok */ 5327 if ((msg->num_descriptors < 128) || (msg->descriptor_size < 5328 sizeof (vnet_public_desc_t))) { 5329 return (VGEN_FAILURE); 5330 } 5331 return (VGEN_SUCCESS); 5332 } 5333 5334 /* 5335 * Handle a descriptor ring register msg from the peer or an ACK/NACK from 5336 * the peer to a dring register msg that we sent. 5337 */ 5338 static int 5339 vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5340 { 5341 vio_dring_reg_msg_t *msg = (vio_dring_reg_msg_t *)tagp; 5342 ldc_mem_cookie_t dcookie; 5343 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5344 int ack = 0; 5345 int rv = 0; 5346 5347 DBG1(vgenp, ldcp, "enter\n"); 5348 if (ldcp->hphase < VH_PHASE2) { 5349 /* dring_info can be rcvd in any of the phases after Phase1 */ 5350 DWARN(vgenp, ldcp, 5351 "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n", 5352 tagp->vio_subtype, ldcp->hphase); 5353 return (VGEN_FAILURE); 5354 } 5355 switch (tagp->vio_subtype) { 5356 case VIO_SUBTYPE_INFO: 5357 5358 DBG2(vgenp, ldcp, "DRING_INFO_RCVD \n"); 5359 ldcp->hstate |= DRING_INFO_RCVD; 5360 bcopy((msg->cookie), &dcookie, sizeof (dcookie)); 5361 5362 ASSERT(msg->ncookies == 1); 5363 5364 if (vgen_check_dring_reg(msg) == VGEN_SUCCESS) { 5365 /* 5366 * verified dring info msg to be ok, 5367 * now try to map the remote dring. 5368 */ 5369 rv = vgen_init_rxds(ldcp, msg->num_descriptors, 5370 msg->descriptor_size, &dcookie, 5371 msg->ncookies); 5372 if (rv == DDI_SUCCESS) { 5373 /* now we can ack the peer */ 5374 ack = 1; 5375 } 5376 } 5377 if (ack == 0) { 5378 /* failed, send NACK */ 5379 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5380 } else { 5381 if (!(ldcp->peer_hparams.dring_ready)) { 5382 5383 /* save peer's dring_info values */ 5384 bcopy(&dcookie, 5385 &(ldcp->peer_hparams.dring_cookie), 5386 sizeof (dcookie)); 5387 ldcp->peer_hparams.num_desc = 5388 msg->num_descriptors; 5389 ldcp->peer_hparams.desc_size = 5390 msg->descriptor_size; 5391 ldcp->peer_hparams.num_dcookies = 5392 msg->ncookies; 5393 5394 /* set dring_ident for the peer */ 5395 ldcp->peer_hparams.dring_ident = 5396 (uint64_t)ldcp->rxdp; 5397 /* return the dring_ident in ack msg */ 5398 msg->dring_ident = 5399 (uint64_t)ldcp->rxdp; 5400 5401 ldcp->peer_hparams.dring_ready = B_TRUE; 5402 } 5403 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5404 } 5405 tagp->vio_sid = ldcp->local_sid; 5406 /* send reply msg back to peer */ 5407 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg), 5408 B_FALSE); 5409 if (rv != VGEN_SUCCESS) { 5410 return (rv); 5411 } 5412 5413 if (ack) { 5414 ldcp->hstate |= DRING_ACK_SENT; 5415 DBG2(vgenp, ldcp, "DRING_ACK_SENT"); 5416 } else { 5417 DWARN(vgenp, ldcp, "DRING_NACK_SENT"); 5418 return (VGEN_FAILURE); 5419 } 5420 5421 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5422 vgen_handshake(vh_nextphase(ldcp)); 5423 } 5424 5425 break; 5426 5427 case VIO_SUBTYPE_ACK: 5428 5429 ldcp->hstate |= DRING_ACK_RCVD; 5430 5431 DBG2(vgenp, ldcp, "DRING_ACK_RCVD"); 5432 5433 if (!(ldcp->local_hparams.dring_ready)) { 5434 /* local dring is now ready */ 5435 ldcp->local_hparams.dring_ready = B_TRUE; 5436 5437 /* save dring_ident acked by peer */ 5438 ldcp->local_hparams.dring_ident = 5439 msg->dring_ident; 5440 } 5441 5442 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5443 vgen_handshake(vh_nextphase(ldcp)); 5444 } 5445 5446 break; 5447 5448 case VIO_SUBTYPE_NACK: 5449 5450 DBG2(vgenp, ldcp, "DRING_NACK_RCVD"); 5451 return (VGEN_FAILURE); 5452 } 5453 DBG1(vgenp, ldcp, "exit\n"); 5454 return (VGEN_SUCCESS); 5455 } 5456 5457 /* 5458 * Handle a rdx info msg from the peer or an ACK/NACK 5459 * from the peer to a rdx info msg that we sent. 5460 */ 5461 static int 5462 vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5463 { 5464 int rv = 0; 5465 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5466 5467 DBG1(vgenp, ldcp, "enter\n"); 5468 if (ldcp->hphase != VH_PHASE3) { 5469 DWARN(vgenp, ldcp, 5470 "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n", 5471 tagp->vio_subtype, ldcp->hphase); 5472 return (VGEN_FAILURE); 5473 } 5474 switch (tagp->vio_subtype) { 5475 case VIO_SUBTYPE_INFO: 5476 5477 DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n"); 5478 ldcp->hstate |= RDX_INFO_RCVD; 5479 5480 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5481 tagp->vio_sid = ldcp->local_sid; 5482 /* send reply msg back to peer */ 5483 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t), 5484 B_FALSE); 5485 if (rv != VGEN_SUCCESS) { 5486 return (rv); 5487 } 5488 5489 ldcp->hstate |= RDX_ACK_SENT; 5490 DBG2(vgenp, ldcp, "RDX_ACK_SENT \n"); 5491 5492 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5493 vgen_handshake(vh_nextphase(ldcp)); 5494 } 5495 5496 break; 5497 5498 case VIO_SUBTYPE_ACK: 5499 5500 ldcp->hstate |= RDX_ACK_RCVD; 5501 5502 DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n"); 5503 5504 if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { 5505 vgen_handshake(vh_nextphase(ldcp)); 5506 } 5507 break; 5508 5509 case VIO_SUBTYPE_NACK: 5510 5511 DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n"); 5512 return (VGEN_FAILURE); 5513 } 5514 DBG1(vgenp, ldcp, "exit\n"); 5515 return (VGEN_SUCCESS); 5516 } 5517 5518 /* Handle ACK/NACK from vsw to a set multicast msg that we sent */ 5519 static int 5520 vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5521 { 5522 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5523 vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp; 5524 struct ether_addr *addrp; 5525 int count; 5526 int i; 5527 5528 DBG1(vgenp, ldcp, "enter\n"); 5529 switch (tagp->vio_subtype) { 5530 5531 case VIO_SUBTYPE_INFO: 5532 5533 /* vnet shouldn't recv set mcast msg, only vsw handles it */ 5534 DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n"); 5535 break; 5536 5537 case VIO_SUBTYPE_ACK: 5538 5539 /* success adding/removing multicast addr */ 5540 DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n"); 5541 break; 5542 5543 case VIO_SUBTYPE_NACK: 5544 5545 DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n"); 5546 if (!(msgp->set)) { 5547 /* multicast remove request failed */ 5548 break; 5549 } 5550 5551 /* multicast add request failed */ 5552 for (count = 0; count < msgp->count; count++) { 5553 addrp = &(msgp->mca[count]); 5554 5555 /* delete address from the table */ 5556 for (i = 0; i < vgenp->mccount; i++) { 5557 if (ether_cmp(addrp, 5558 &(vgenp->mctab[i])) == 0) { 5559 if (vgenp->mccount > 1) { 5560 int t = vgenp->mccount - 1; 5561 vgenp->mctab[i] = 5562 vgenp->mctab[t]; 5563 } 5564 vgenp->mccount--; 5565 break; 5566 } 5567 } 5568 } 5569 break; 5570 5571 } 5572 DBG1(vgenp, ldcp, "exit\n"); 5573 5574 return (VGEN_SUCCESS); 5575 } 5576 5577 /* handler for control messages received from the peer ldc end-point */ 5578 static int 5579 vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5580 { 5581 int rv = 0; 5582 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5583 5584 DBG1(vgenp, ldcp, "enter\n"); 5585 switch (tagp->vio_subtype_env) { 5586 5587 case VIO_VER_INFO: 5588 rv = vgen_handle_version_negotiate(ldcp, tagp); 5589 break; 5590 5591 case VIO_ATTR_INFO: 5592 rv = vgen_handle_attr_info(ldcp, tagp); 5593 break; 5594 5595 case VIO_DRING_REG: 5596 rv = vgen_handle_dring_reg(ldcp, tagp); 5597 break; 5598 5599 case VIO_RDX: 5600 rv = vgen_handle_rdx_info(ldcp, tagp); 5601 break; 5602 5603 case VNET_MCAST_INFO: 5604 rv = vgen_handle_mcast_info(ldcp, tagp); 5605 break; 5606 5607 case VIO_DDS_INFO: 5608 rv = vgen_dds_rx(ldcp, tagp); 5609 break; 5610 } 5611 5612 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 5613 return (rv); 5614 } 5615 5616 /* handler for data messages received from the peer ldc end-point */ 5617 static int 5618 vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen) 5619 { 5620 int rv = 0; 5621 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5622 5623 DBG1(vgenp, ldcp, "enter\n"); 5624 5625 if (ldcp->hphase != VH_DONE) 5626 return (rv); 5627 5628 if (tagp->vio_subtype == VIO_SUBTYPE_INFO) { 5629 rv = vgen_check_datamsg_seq(ldcp, tagp); 5630 if (rv != 0) { 5631 return (rv); 5632 } 5633 } 5634 5635 switch (tagp->vio_subtype_env) { 5636 case VIO_DRING_DATA: 5637 rv = vgen_handle_dring_data(ldcp, tagp); 5638 break; 5639 5640 case VIO_PKT_DATA: 5641 ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen); 5642 break; 5643 default: 5644 break; 5645 } 5646 5647 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 5648 return (rv); 5649 } 5650 5651 /* 5652 * dummy pkt data handler function for vnet protocol version 1.0 5653 */ 5654 static void 5655 vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) 5656 { 5657 _NOTE(ARGUNUSED(arg1, arg2, msglen)) 5658 } 5659 5660 /* 5661 * This function handles raw pkt data messages received over the channel. 5662 * Currently, only priority-eth-type frames are received through this mechanism. 5663 * In this case, the frame(data) is present within the message itself which 5664 * is copied into an mblk before sending it up the stack. 5665 */ 5666 static void 5667 vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen) 5668 { 5669 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; 5670 vio_raw_data_msg_t *pkt = (vio_raw_data_msg_t *)arg2; 5671 uint32_t size; 5672 mblk_t *mp; 5673 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5674 vgen_stats_t *statsp = &ldcp->stats; 5675 vgen_hparams_t *lp = &ldcp->local_hparams; 5676 vio_net_rx_cb_t vrx_cb; 5677 5678 ASSERT(MUTEX_HELD(&ldcp->cblock)); 5679 5680 mutex_exit(&ldcp->cblock); 5681 5682 size = msglen - VIO_PKT_DATA_HDRSIZE; 5683 if (size < ETHERMIN || size > lp->mtu) { 5684 (void) atomic_inc_32(&statsp->rx_pri_fail); 5685 goto exit; 5686 } 5687 5688 mp = vio_multipool_allocb(&ldcp->vmp, size); 5689 if (mp == NULL) { 5690 mp = allocb(size, BPRI_MED); 5691 if (mp == NULL) { 5692 (void) atomic_inc_32(&statsp->rx_pri_fail); 5693 DWARN(vgenp, ldcp, "allocb failure, " 5694 "unable to process priority frame\n"); 5695 goto exit; 5696 } 5697 } 5698 5699 /* copy the frame from the payload of raw data msg into the mblk */ 5700 bcopy(pkt->data, mp->b_rptr, size); 5701 mp->b_wptr = mp->b_rptr + size; 5702 5703 /* update stats */ 5704 (void) atomic_inc_64(&statsp->rx_pri_packets); 5705 (void) atomic_add_64(&statsp->rx_pri_bytes, size); 5706 5707 /* send up; call vrx_cb() as cblock is already released */ 5708 vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 5709 vrx_cb(ldcp->portp->vhp, mp); 5710 5711 exit: 5712 mutex_enter(&ldcp->cblock); 5713 } 5714 5715 static int 5716 vgen_send_dring_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, 5717 int32_t end, uint8_t pstate) 5718 { 5719 int rv = 0; 5720 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5721 vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; 5722 5723 tagp->vio_subtype = VIO_SUBTYPE_ACK; 5724 tagp->vio_sid = ldcp->local_sid; 5725 msgp->start_idx = start; 5726 msgp->end_idx = end; 5727 msgp->dring_process_state = pstate; 5728 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE); 5729 if (rv != VGEN_SUCCESS) { 5730 DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); 5731 } 5732 return (rv); 5733 } 5734 5735 static int 5736 vgen_handle_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5737 { 5738 int rv = 0; 5739 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5740 5741 5742 DBG1(vgenp, ldcp, "enter\n"); 5743 switch (tagp->vio_subtype) { 5744 5745 case VIO_SUBTYPE_INFO: 5746 /* 5747 * To reduce the locking contention, release the 5748 * cblock here and re-acquire it once we are done 5749 * receiving packets. 5750 */ 5751 mutex_exit(&ldcp->cblock); 5752 mutex_enter(&ldcp->rxlock); 5753 rv = vgen_handle_dring_data_info(ldcp, tagp); 5754 mutex_exit(&ldcp->rxlock); 5755 mutex_enter(&ldcp->cblock); 5756 break; 5757 5758 case VIO_SUBTYPE_ACK: 5759 rv = vgen_handle_dring_data_ack(ldcp, tagp); 5760 break; 5761 5762 case VIO_SUBTYPE_NACK: 5763 rv = vgen_handle_dring_data_nack(ldcp, tagp); 5764 break; 5765 } 5766 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 5767 return (rv); 5768 } 5769 5770 static int 5771 vgen_handle_dring_data_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5772 { 5773 uint32_t start; 5774 int32_t end; 5775 int rv = 0; 5776 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 5777 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5778 #ifdef VGEN_HANDLE_LOST_PKTS 5779 vgen_stats_t *statsp = &ldcp->stats; 5780 uint32_t rxi; 5781 int n; 5782 #endif 5783 5784 DBG1(vgenp, ldcp, "enter\n"); 5785 5786 start = dringmsg->start_idx; 5787 end = dringmsg->end_idx; 5788 /* 5789 * received a data msg, which contains the start and end 5790 * indices of the descriptors within the rx ring holding data, 5791 * the seq_num of data packet corresponding to the start index, 5792 * and the dring_ident. 5793 * We can now read the contents of each of these descriptors 5794 * and gather data from it. 5795 */ 5796 DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", 5797 start, end); 5798 5799 /* validate rx start and end indeces */ 5800 if (!(CHECK_RXI(start, ldcp)) || ((end != -1) && 5801 !(CHECK_RXI(end, ldcp)))) { 5802 DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", 5803 start, end); 5804 /* drop the message if invalid index */ 5805 return (rv); 5806 } 5807 5808 /* validate dring_ident */ 5809 if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { 5810 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 5811 dringmsg->dring_ident); 5812 /* invalid dring_ident, drop the msg */ 5813 return (rv); 5814 } 5815 #ifdef DEBUG 5816 if (vgen_trigger_rxlost) { 5817 /* drop this msg to simulate lost pkts for debugging */ 5818 vgen_trigger_rxlost = 0; 5819 return (rv); 5820 } 5821 #endif 5822 5823 #ifdef VGEN_HANDLE_LOST_PKTS 5824 5825 /* receive start index doesn't match expected index */ 5826 if (ldcp->next_rxi != start) { 5827 DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n", 5828 ldcp->next_rxi, start); 5829 5830 /* calculate the number of pkts lost */ 5831 if (start >= ldcp->next_rxi) { 5832 n = start - ldcp->next_rxi; 5833 } else { 5834 n = ldcp->num_rxds - (ldcp->next_rxi - start); 5835 } 5836 5837 statsp->rx_lost_pkts += n; 5838 tagp->vio_subtype = VIO_SUBTYPE_NACK; 5839 tagp->vio_sid = ldcp->local_sid; 5840 /* indicate the range of lost descriptors */ 5841 dringmsg->start_idx = ldcp->next_rxi; 5842 rxi = start; 5843 DECR_RXI(rxi, ldcp); 5844 dringmsg->end_idx = rxi; 5845 /* dring ident is left unchanged */ 5846 rv = vgen_sendmsg(ldcp, (caddr_t)tagp, 5847 sizeof (*dringmsg), B_FALSE); 5848 if (rv != VGEN_SUCCESS) { 5849 DWARN(vgenp, ldcp, 5850 "vgen_sendmsg failed, stype:NACK\n"); 5851 return (rv); 5852 } 5853 /* 5854 * treat this range of descrs/pkts as dropped 5855 * and set the new expected value of next_rxi 5856 * and continue(below) to process from the new 5857 * start index. 5858 */ 5859 ldcp->next_rxi = start; 5860 } 5861 5862 #endif /* VGEN_HANDLE_LOST_PKTS */ 5863 5864 /* Now receive messages */ 5865 rv = vgen_process_dring_data(ldcp, tagp); 5866 5867 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 5868 return (rv); 5869 } 5870 5871 static int 5872 vgen_process_dring_data(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 5873 { 5874 boolean_t set_ack_start = B_FALSE; 5875 uint32_t start; 5876 uint32_t ack_end; 5877 uint32_t next_rxi; 5878 uint32_t rxi; 5879 int count = 0; 5880 int rv = 0; 5881 uint32_t retries = 0; 5882 vgen_stats_t *statsp; 5883 vnet_public_desc_t rxd; 5884 vio_dring_entry_hdr_t *hdrp; 5885 mblk_t *bp = NULL; 5886 mblk_t *bpt = NULL; 5887 uint32_t ack_start; 5888 boolean_t rxd_err = B_FALSE; 5889 mblk_t *mp = NULL; 5890 size_t nbytes; 5891 boolean_t ack_needed = B_FALSE; 5892 size_t nread; 5893 uint64_t off = 0; 5894 struct ether_header *ehp; 5895 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 5896 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 5897 vgen_hparams_t *lp = &ldcp->local_hparams; 5898 5899 DBG1(vgenp, ldcp, "enter\n"); 5900 5901 statsp = &ldcp->stats; 5902 start = dringmsg->start_idx; 5903 5904 /* 5905 * start processing the descriptors from the specified 5906 * start index, up to the index a descriptor is not ready 5907 * to be processed or we process the entire descriptor ring 5908 * and wrap around upto the start index. 5909 */ 5910 5911 /* need to set the start index of descriptors to be ack'd */ 5912 set_ack_start = B_TRUE; 5913 5914 /* index upto which we have ack'd */ 5915 ack_end = start; 5916 DECR_RXI(ack_end, ldcp); 5917 5918 next_rxi = rxi = start; 5919 do { 5920 vgen_recv_retry: 5921 rv = vnet_dring_entry_copy(&(ldcp->rxdp[rxi]), &rxd, 5922 ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi); 5923 if (rv != 0) { 5924 DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed" 5925 " rv(%d)\n", rv); 5926 statsp->ierrors++; 5927 return (rv); 5928 } 5929 5930 hdrp = &rxd.hdr; 5931 5932 if (hdrp->dstate != VIO_DESC_READY) { 5933 /* 5934 * Before waiting and retry here, send up 5935 * the packets that are received already 5936 */ 5937 if (bp != NULL) { 5938 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 5939 vgen_rx(ldcp, bp); 5940 count = 0; 5941 bp = bpt = NULL; 5942 } 5943 /* 5944 * descriptor is not ready. 5945 * retry descriptor acquire, stop processing 5946 * after max # retries. 5947 */ 5948 if (retries == vgen_recv_retries) 5949 break; 5950 retries++; 5951 drv_usecwait(vgen_recv_delay); 5952 goto vgen_recv_retry; 5953 } 5954 retries = 0; 5955 5956 if (set_ack_start) { 5957 /* 5958 * initialize the start index of the range 5959 * of descriptors to be ack'd. 5960 */ 5961 ack_start = rxi; 5962 set_ack_start = B_FALSE; 5963 } 5964 5965 if ((rxd.nbytes < ETHERMIN) || 5966 (rxd.nbytes > lp->mtu) || 5967 (rxd.ncookies == 0) || 5968 (rxd.ncookies > MAX_COOKIES)) { 5969 rxd_err = B_TRUE; 5970 } else { 5971 /* 5972 * Try to allocate an mblk from the free pool 5973 * of recv mblks for the channel. 5974 * If this fails, use allocb(). 5975 */ 5976 nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7; 5977 if (nbytes > ldcp->max_rxpool_size) { 5978 mp = allocb(VNET_IPALIGN + rxd.nbytes + 8, 5979 BPRI_MED); 5980 } else { 5981 mp = vio_multipool_allocb(&ldcp->vmp, nbytes); 5982 if (mp == NULL) { 5983 statsp->rx_vio_allocb_fail++; 5984 /* 5985 * Data buffer returned by allocb(9F) 5986 * is 8byte aligned. We allocate extra 5987 * 8 bytes to ensure size is multiple 5988 * of 8 bytes for ldc_mem_copy(). 5989 */ 5990 mp = allocb(VNET_IPALIGN + 5991 rxd.nbytes + 8, BPRI_MED); 5992 } 5993 } 5994 } 5995 if ((rxd_err) || (mp == NULL)) { 5996 /* 5997 * rxd_err or allocb() failure, 5998 * drop this packet, get next. 5999 */ 6000 if (rxd_err) { 6001 statsp->ierrors++; 6002 rxd_err = B_FALSE; 6003 } else { 6004 statsp->rx_allocb_fail++; 6005 } 6006 6007 ack_needed = hdrp->ack; 6008 6009 /* set descriptor done bit */ 6010 rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]), 6011 ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi, 6012 VIO_DESC_DONE); 6013 if (rv != 0) { 6014 DWARN(vgenp, ldcp, 6015 "vnet_dring_entry_set_dstate err rv(%d)\n", 6016 rv); 6017 return (rv); 6018 } 6019 6020 if (ack_needed) { 6021 ack_needed = B_FALSE; 6022 /* 6023 * sender needs ack for this packet, 6024 * ack pkts upto this index. 6025 */ 6026 ack_end = rxi; 6027 6028 rv = vgen_send_dring_ack(ldcp, tagp, 6029 ack_start, ack_end, 6030 VIO_DP_ACTIVE); 6031 if (rv != VGEN_SUCCESS) { 6032 goto error_ret; 6033 } 6034 6035 /* need to set new ack start index */ 6036 set_ack_start = B_TRUE; 6037 } 6038 goto vgen_next_rxi; 6039 } 6040 6041 nread = nbytes; 6042 rv = ldc_mem_copy(ldcp->ldc_handle, 6043 (caddr_t)mp->b_rptr, off, &nread, 6044 rxd.memcookie, rxd.ncookies, LDC_COPY_IN); 6045 6046 /* if ldc_mem_copy() failed */ 6047 if (rv) { 6048 DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv); 6049 statsp->ierrors++; 6050 freemsg(mp); 6051 goto error_ret; 6052 } 6053 6054 ack_needed = hdrp->ack; 6055 6056 rv = vnet_dring_entry_set_dstate(&(ldcp->rxdp[rxi]), 6057 ldcp->dring_mtype, ldcp->rx_dhandle, rxi, rxi, 6058 VIO_DESC_DONE); 6059 if (rv != 0) { 6060 DWARN(vgenp, ldcp, 6061 "vnet_dring_entry_set_dstate err rv(%d)\n", rv); 6062 goto error_ret; 6063 } 6064 6065 mp->b_rptr += VNET_IPALIGN; 6066 6067 if (ack_needed) { 6068 ack_needed = B_FALSE; 6069 /* 6070 * sender needs ack for this packet, 6071 * ack pkts upto this index. 6072 */ 6073 ack_end = rxi; 6074 6075 rv = vgen_send_dring_ack(ldcp, tagp, 6076 ack_start, ack_end, VIO_DP_ACTIVE); 6077 if (rv != VGEN_SUCCESS) { 6078 goto error_ret; 6079 } 6080 6081 /* need to set new ack start index */ 6082 set_ack_start = B_TRUE; 6083 } 6084 6085 if (nread != nbytes) { 6086 DWARN(vgenp, ldcp, 6087 "ldc_mem_copy nread(%lx), nbytes(%lx)\n", 6088 nread, nbytes); 6089 statsp->ierrors++; 6090 freemsg(mp); 6091 goto vgen_next_rxi; 6092 } 6093 6094 /* point to the actual end of data */ 6095 mp->b_wptr = mp->b_rptr + rxd.nbytes; 6096 6097 /* update stats */ 6098 statsp->ipackets++; 6099 statsp->rbytes += rxd.nbytes; 6100 ehp = (struct ether_header *)mp->b_rptr; 6101 if (IS_BROADCAST(ehp)) 6102 statsp->brdcstrcv++; 6103 else if (IS_MULTICAST(ehp)) 6104 statsp->multircv++; 6105 6106 /* build a chain of received packets */ 6107 if (bp == NULL) { 6108 /* first pkt */ 6109 bp = mp; 6110 bpt = bp; 6111 bpt->b_next = NULL; 6112 } else { 6113 mp->b_next = NULL; 6114 bpt->b_next = mp; 6115 bpt = mp; 6116 } 6117 6118 if (count++ > vgen_chain_len) { 6119 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 6120 vgen_rx(ldcp, bp); 6121 count = 0; 6122 bp = bpt = NULL; 6123 } 6124 6125 vgen_next_rxi: 6126 /* update end index of range of descrs to be ack'd */ 6127 ack_end = rxi; 6128 6129 /* update the next index to be processed */ 6130 INCR_RXI(next_rxi, ldcp); 6131 if (next_rxi == start) { 6132 /* 6133 * processed the entire descriptor ring upto 6134 * the index at which we started. 6135 */ 6136 break; 6137 } 6138 6139 rxi = next_rxi; 6140 6141 _NOTE(CONSTCOND) 6142 } while (1); 6143 6144 /* 6145 * send an ack message to peer indicating that we have stopped 6146 * processing descriptors. 6147 */ 6148 if (set_ack_start) { 6149 /* 6150 * We have ack'd upto some index and we have not 6151 * processed any descriptors beyond that index. 6152 * Use the last ack'd index as both the start and 6153 * end of range of descrs being ack'd. 6154 * Note: This results in acking the last index twice 6155 * and should be harmless. 6156 */ 6157 ack_start = ack_end; 6158 } 6159 6160 rv = vgen_send_dring_ack(ldcp, tagp, ack_start, ack_end, 6161 VIO_DP_STOPPED); 6162 if (rv != VGEN_SUCCESS) { 6163 goto error_ret; 6164 } 6165 6166 /* save new recv index of next dring msg */ 6167 ldcp->next_rxi = next_rxi; 6168 6169 error_ret: 6170 /* send up packets received so far */ 6171 if (bp != NULL) { 6172 DTRACE_PROBE1(vgen_rcv_msgs, int, count); 6173 vgen_rx(ldcp, bp); 6174 bp = bpt = NULL; 6175 } 6176 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6177 return (rv); 6178 6179 } 6180 6181 static int 6182 vgen_handle_dring_data_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6183 { 6184 int rv = 0; 6185 uint32_t start; 6186 int32_t end; 6187 uint32_t txi; 6188 boolean_t ready_txd = B_FALSE; 6189 vgen_stats_t *statsp; 6190 vgen_private_desc_t *tbufp; 6191 vnet_public_desc_t *txdp; 6192 vio_dring_entry_hdr_t *hdrp; 6193 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6194 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 6195 6196 DBG1(vgenp, ldcp, "enter\n"); 6197 start = dringmsg->start_idx; 6198 end = dringmsg->end_idx; 6199 statsp = &ldcp->stats; 6200 6201 /* 6202 * received an ack corresponding to a specific descriptor for 6203 * which we had set the ACK bit in the descriptor (during 6204 * transmit). This enables us to reclaim descriptors. 6205 */ 6206 6207 DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); 6208 6209 /* validate start and end indeces in the tx ack msg */ 6210 if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { 6211 /* drop the message if invalid index */ 6212 DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", 6213 start, end); 6214 return (rv); 6215 } 6216 /* validate dring_ident */ 6217 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 6218 /* invalid dring_ident, drop the msg */ 6219 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 6220 dringmsg->dring_ident); 6221 return (rv); 6222 } 6223 statsp->dring_data_acks++; 6224 6225 /* reclaim descriptors that are done */ 6226 vgen_reclaim(ldcp); 6227 6228 if (dringmsg->dring_process_state != VIO_DP_STOPPED) { 6229 /* 6230 * receiver continued processing descriptors after 6231 * sending us the ack. 6232 */ 6233 return (rv); 6234 } 6235 6236 statsp->dring_stopped_acks++; 6237 6238 /* receiver stopped processing descriptors */ 6239 mutex_enter(&ldcp->wrlock); 6240 mutex_enter(&ldcp->tclock); 6241 6242 /* 6243 * determine if there are any pending tx descriptors 6244 * ready to be processed by the receiver(peer) and if so, 6245 * send a message to the peer to restart receiving. 6246 */ 6247 ready_txd = B_FALSE; 6248 6249 /* 6250 * using the end index of the descriptor range for which 6251 * we received the ack, check if the next descriptor is 6252 * ready. 6253 */ 6254 txi = end; 6255 INCR_TXI(txi, ldcp); 6256 tbufp = &ldcp->tbufp[txi]; 6257 txdp = tbufp->descp; 6258 hdrp = &txdp->hdr; 6259 if (hdrp->dstate == VIO_DESC_READY) { 6260 ready_txd = B_TRUE; 6261 } else { 6262 /* 6263 * descr next to the end of ack'd descr range is not 6264 * ready. 6265 * starting from the current reclaim index, check 6266 * if any descriptor is ready. 6267 */ 6268 6269 txi = ldcp->cur_tbufp - ldcp->tbufp; 6270 tbufp = &ldcp->tbufp[txi]; 6271 6272 txdp = tbufp->descp; 6273 hdrp = &txdp->hdr; 6274 if (hdrp->dstate == VIO_DESC_READY) { 6275 ready_txd = B_TRUE; 6276 } 6277 6278 } 6279 6280 if (ready_txd) { 6281 /* 6282 * we have tx descriptor(s) ready to be 6283 * processed by the receiver. 6284 * send a message to the peer with the start index 6285 * of ready descriptors. 6286 */ 6287 rv = vgen_send_dring_data(ldcp, txi, -1); 6288 if (rv != VGEN_SUCCESS) { 6289 ldcp->resched_peer = B_TRUE; 6290 ldcp->resched_peer_txi = txi; 6291 mutex_exit(&ldcp->tclock); 6292 mutex_exit(&ldcp->wrlock); 6293 return (rv); 6294 } 6295 } else { 6296 /* 6297 * no ready tx descriptors. set the flag to send a 6298 * message to peer when tx descriptors are ready in 6299 * transmit routine. 6300 */ 6301 ldcp->resched_peer = B_TRUE; 6302 ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp; 6303 } 6304 6305 mutex_exit(&ldcp->tclock); 6306 mutex_exit(&ldcp->wrlock); 6307 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6308 return (rv); 6309 } 6310 6311 static int 6312 vgen_handle_dring_data_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6313 { 6314 int rv = 0; 6315 uint32_t start; 6316 int32_t end; 6317 uint32_t txi; 6318 vnet_public_desc_t *txdp; 6319 vio_dring_entry_hdr_t *hdrp; 6320 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6321 vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; 6322 6323 DBG1(vgenp, ldcp, "enter\n"); 6324 start = dringmsg->start_idx; 6325 end = dringmsg->end_idx; 6326 6327 /* 6328 * peer sent a NACK msg to indicate lost packets. 6329 * The start and end correspond to the range of descriptors 6330 * for which the peer didn't receive a dring data msg and so 6331 * didn't receive the corresponding data. 6332 */ 6333 DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); 6334 6335 /* validate start and end indeces in the tx nack msg */ 6336 if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { 6337 /* drop the message if invalid index */ 6338 DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", 6339 start, end); 6340 return (rv); 6341 } 6342 /* validate dring_ident */ 6343 if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { 6344 /* invalid dring_ident, drop the msg */ 6345 DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", 6346 dringmsg->dring_ident); 6347 return (rv); 6348 } 6349 mutex_enter(&ldcp->txlock); 6350 mutex_enter(&ldcp->tclock); 6351 6352 if (ldcp->next_tbufp == ldcp->cur_tbufp) { 6353 /* no busy descriptors, bogus nack ? */ 6354 mutex_exit(&ldcp->tclock); 6355 mutex_exit(&ldcp->txlock); 6356 return (rv); 6357 } 6358 6359 /* we just mark the descrs as done so they can be reclaimed */ 6360 for (txi = start; txi <= end; ) { 6361 txdp = &(ldcp->txdp[txi]); 6362 hdrp = &txdp->hdr; 6363 if (hdrp->dstate == VIO_DESC_READY) 6364 hdrp->dstate = VIO_DESC_DONE; 6365 INCR_TXI(txi, ldcp); 6366 } 6367 mutex_exit(&ldcp->tclock); 6368 mutex_exit(&ldcp->txlock); 6369 DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); 6370 return (rv); 6371 } 6372 6373 static void 6374 vgen_reclaim(vgen_ldc_t *ldcp) 6375 { 6376 mutex_enter(&ldcp->tclock); 6377 6378 vgen_reclaim_dring(ldcp); 6379 ldcp->reclaim_lbolt = ddi_get_lbolt(); 6380 6381 mutex_exit(&ldcp->tclock); 6382 } 6383 6384 /* 6385 * transmit reclaim function. starting from the current reclaim index 6386 * look for descriptors marked DONE and reclaim the descriptor and the 6387 * corresponding buffers (tbuf). 6388 */ 6389 static void 6390 vgen_reclaim_dring(vgen_ldc_t *ldcp) 6391 { 6392 int count = 0; 6393 vnet_public_desc_t *txdp; 6394 vgen_private_desc_t *tbufp; 6395 vio_dring_entry_hdr_t *hdrp; 6396 6397 #ifdef DEBUG 6398 if (vgen_trigger_txtimeout) 6399 return; 6400 #endif 6401 6402 tbufp = ldcp->cur_tbufp; 6403 txdp = tbufp->descp; 6404 hdrp = &txdp->hdr; 6405 6406 while ((hdrp->dstate == VIO_DESC_DONE) && 6407 (tbufp != ldcp->next_tbufp)) { 6408 tbufp->flags = VGEN_PRIV_DESC_FREE; 6409 hdrp->dstate = VIO_DESC_FREE; 6410 hdrp->ack = B_FALSE; 6411 6412 tbufp = NEXTTBUF(ldcp, tbufp); 6413 txdp = tbufp->descp; 6414 hdrp = &txdp->hdr; 6415 count++; 6416 } 6417 6418 ldcp->cur_tbufp = tbufp; 6419 6420 /* 6421 * Check if mac layer should be notified to restart transmissions 6422 */ 6423 if ((ldcp->need_resched) && (count > 0)) { 6424 vio_net_tx_update_t vtx_update = 6425 ldcp->portp->vcb.vio_net_tx_update; 6426 6427 ldcp->need_resched = B_FALSE; 6428 vtx_update(ldcp->portp->vhp); 6429 } 6430 } 6431 6432 /* return the number of pending transmits for the channel */ 6433 static int 6434 vgen_num_txpending(vgen_ldc_t *ldcp) 6435 { 6436 int n; 6437 6438 if (ldcp->next_tbufp >= ldcp->cur_tbufp) { 6439 n = ldcp->next_tbufp - ldcp->cur_tbufp; 6440 } else { 6441 /* cur_tbufp > next_tbufp */ 6442 n = ldcp->num_txds - (ldcp->cur_tbufp - ldcp->next_tbufp); 6443 } 6444 6445 return (n); 6446 } 6447 6448 /* determine if the transmit descriptor ring is full */ 6449 static int 6450 vgen_tx_dring_full(vgen_ldc_t *ldcp) 6451 { 6452 vgen_private_desc_t *tbufp; 6453 vgen_private_desc_t *ntbufp; 6454 6455 tbufp = ldcp->next_tbufp; 6456 ntbufp = NEXTTBUF(ldcp, tbufp); 6457 if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */ 6458 return (VGEN_SUCCESS); 6459 } 6460 return (VGEN_FAILURE); 6461 } 6462 6463 /* determine if timeout condition has occured */ 6464 static int 6465 vgen_ldc_txtimeout(vgen_ldc_t *ldcp) 6466 { 6467 if (((ddi_get_lbolt() - ldcp->reclaim_lbolt) > 6468 drv_usectohz(vnet_ldcwd_txtimeout * 1000)) && 6469 (vnet_ldcwd_txtimeout) && 6470 (vgen_tx_dring_full(ldcp) == VGEN_SUCCESS)) { 6471 return (VGEN_SUCCESS); 6472 } else { 6473 return (VGEN_FAILURE); 6474 } 6475 } 6476 6477 /* transmit watchdog timeout handler */ 6478 static void 6479 vgen_ldc_watchdog(void *arg) 6480 { 6481 vgen_ldc_t *ldcp; 6482 vgen_t *vgenp; 6483 int rv; 6484 6485 ldcp = (vgen_ldc_t *)arg; 6486 vgenp = LDC_TO_VGEN(ldcp); 6487 6488 rv = vgen_ldc_txtimeout(ldcp); 6489 if (rv == VGEN_SUCCESS) { 6490 DWARN(vgenp, ldcp, "transmit timeout\n"); 6491 #ifdef DEBUG 6492 if (vgen_trigger_txtimeout) { 6493 /* tx timeout triggered for debugging */ 6494 vgen_trigger_txtimeout = 0; 6495 } 6496 #endif 6497 mutex_enter(&ldcp->cblock); 6498 ldcp->need_ldc_reset = B_TRUE; 6499 vgen_handshake_retry(ldcp); 6500 mutex_exit(&ldcp->cblock); 6501 if (ldcp->need_resched) { 6502 vio_net_tx_update_t vtx_update = 6503 ldcp->portp->vcb.vio_net_tx_update; 6504 6505 ldcp->need_resched = B_FALSE; 6506 vtx_update(ldcp->portp->vhp); 6507 } 6508 } 6509 6510 ldcp->wd_tid = timeout(vgen_ldc_watchdog, (caddr_t)ldcp, 6511 drv_usectohz(vnet_ldcwd_interval * 1000)); 6512 } 6513 6514 /* handler for error messages received from the peer ldc end-point */ 6515 static void 6516 vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6517 { 6518 _NOTE(ARGUNUSED(ldcp, tagp)) 6519 } 6520 6521 static int 6522 vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6523 { 6524 vio_raw_data_msg_t *rmsg; 6525 vio_dring_msg_t *dmsg; 6526 uint64_t seq_num; 6527 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6528 6529 if (tagp->vio_subtype_env == VIO_DRING_DATA) { 6530 dmsg = (vio_dring_msg_t *)tagp; 6531 seq_num = dmsg->seq_num; 6532 } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { 6533 rmsg = (vio_raw_data_msg_t *)tagp; 6534 seq_num = rmsg->seq_num; 6535 } else { 6536 return (EINVAL); 6537 } 6538 6539 if (seq_num != ldcp->next_rxseq) { 6540 6541 /* seqnums don't match */ 6542 DWARN(vgenp, ldcp, 6543 "next_rxseq(0x%lx) != seq_num(0x%lx)\n", 6544 ldcp->next_rxseq, seq_num); 6545 6546 ldcp->need_ldc_reset = B_TRUE; 6547 return (EINVAL); 6548 6549 } 6550 6551 ldcp->next_rxseq++; 6552 6553 return (0); 6554 } 6555 6556 /* Check if the session id in the received message is valid */ 6557 static int 6558 vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6559 { 6560 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6561 6562 if (tagp->vio_sid != ldcp->peer_sid) { 6563 DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n", 6564 ldcp->peer_sid, tagp->vio_sid); 6565 return (VGEN_FAILURE); 6566 } 6567 else 6568 return (VGEN_SUCCESS); 6569 } 6570 6571 static caddr_t 6572 vgen_print_ethaddr(uint8_t *a, char *ebuf) 6573 { 6574 (void) sprintf(ebuf, 6575 "%x:%x:%x:%x:%x:%x", a[0], a[1], a[2], a[3], a[4], a[5]); 6576 return (ebuf); 6577 } 6578 6579 /* Handshake watchdog timeout handler */ 6580 static void 6581 vgen_hwatchdog(void *arg) 6582 { 6583 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 6584 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6585 6586 DWARN(vgenp, ldcp, 6587 "handshake timeout ldc(%lx) phase(%x) state(%x)\n", 6588 ldcp->hphase, ldcp->hstate); 6589 6590 mutex_enter(&ldcp->cblock); 6591 if (ldcp->cancel_htid) { 6592 ldcp->cancel_htid = 0; 6593 mutex_exit(&ldcp->cblock); 6594 return; 6595 } 6596 ldcp->htid = 0; 6597 ldcp->need_ldc_reset = B_TRUE; 6598 vgen_handshake_retry(ldcp); 6599 mutex_exit(&ldcp->cblock); 6600 } 6601 6602 static void 6603 vgen_print_hparams(vgen_hparams_t *hp) 6604 { 6605 uint8_t addr[6]; 6606 char ea[6]; 6607 ldc_mem_cookie_t *dc; 6608 6609 cmn_err(CE_CONT, "version_info:\n"); 6610 cmn_err(CE_CONT, 6611 "\tver_major: %d, ver_minor: %d, dev_class: %d\n", 6612 hp->ver_major, hp->ver_minor, hp->dev_class); 6613 6614 vnet_macaddr_ultostr(hp->addr, addr); 6615 cmn_err(CE_CONT, "attr_info:\n"); 6616 cmn_err(CE_CONT, "\tMTU: %lx, addr: %s\n", hp->mtu, 6617 vgen_print_ethaddr(addr, ea)); 6618 cmn_err(CE_CONT, 6619 "\taddr_type: %x, xfer_mode: %x, ack_freq: %x\n", 6620 hp->addr_type, hp->xfer_mode, hp->ack_freq); 6621 6622 dc = &hp->dring_cookie; 6623 cmn_err(CE_CONT, "dring_info:\n"); 6624 cmn_err(CE_CONT, 6625 "\tlength: %d, dsize: %d\n", hp->num_desc, hp->desc_size); 6626 cmn_err(CE_CONT, 6627 "\tldc_addr: 0x%lx, ldc_size: %ld\n", 6628 dc->addr, dc->size); 6629 cmn_err(CE_CONT, "\tdring_ident: 0x%lx\n", hp->dring_ident); 6630 } 6631 6632 static void 6633 vgen_print_ldcinfo(vgen_ldc_t *ldcp) 6634 { 6635 vgen_hparams_t *hp; 6636 6637 cmn_err(CE_CONT, "Channel Information:\n"); 6638 cmn_err(CE_CONT, 6639 "\tldc_id: 0x%lx, ldc_status: 0x%x\n", 6640 ldcp->ldc_id, ldcp->ldc_status); 6641 cmn_err(CE_CONT, 6642 "\tlocal_sid: 0x%x, peer_sid: 0x%x\n", 6643 ldcp->local_sid, ldcp->peer_sid); 6644 cmn_err(CE_CONT, 6645 "\thphase: 0x%x, hstate: 0x%x\n", 6646 ldcp->hphase, ldcp->hstate); 6647 6648 cmn_err(CE_CONT, "Local handshake params:\n"); 6649 hp = &ldcp->local_hparams; 6650 vgen_print_hparams(hp); 6651 6652 cmn_err(CE_CONT, "Peer handshake params:\n"); 6653 hp = &ldcp->peer_hparams; 6654 vgen_print_hparams(hp); 6655 } 6656 6657 /* 6658 * Send received packets up the stack. 6659 */ 6660 static void 6661 vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp) 6662 { 6663 vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; 6664 6665 if (ldcp->rcv_thread != NULL) { 6666 ASSERT(MUTEX_HELD(&ldcp->rxlock)); 6667 mutex_exit(&ldcp->rxlock); 6668 } else { 6669 ASSERT(MUTEX_HELD(&ldcp->cblock)); 6670 mutex_exit(&ldcp->cblock); 6671 } 6672 6673 vrx_cb(ldcp->portp->vhp, bp); 6674 6675 if (ldcp->rcv_thread != NULL) { 6676 mutex_enter(&ldcp->rxlock); 6677 } else { 6678 mutex_enter(&ldcp->cblock); 6679 } 6680 } 6681 6682 /* 6683 * vgen_ldc_rcv_worker -- A per LDC worker thread to receive data. 6684 * This thread is woken up by the LDC interrupt handler to process 6685 * LDC packets and receive data. 6686 */ 6687 static void 6688 vgen_ldc_rcv_worker(void *arg) 6689 { 6690 callb_cpr_t cprinfo; 6691 vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; 6692 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6693 6694 DBG1(vgenp, ldcp, "enter\n"); 6695 CALLB_CPR_INIT(&cprinfo, &ldcp->rcv_thr_lock, callb_generic_cpr, 6696 "vnet_rcv_thread"); 6697 mutex_enter(&ldcp->rcv_thr_lock); 6698 while (!(ldcp->rcv_thr_flags & VGEN_WTHR_STOP)) { 6699 6700 CALLB_CPR_SAFE_BEGIN(&cprinfo); 6701 /* 6702 * Wait until the data is received or a stop 6703 * request is received. 6704 */ 6705 while (!(ldcp->rcv_thr_flags & 6706 (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) { 6707 cv_wait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock); 6708 } 6709 CALLB_CPR_SAFE_END(&cprinfo, &ldcp->rcv_thr_lock) 6710 6711 /* 6712 * First process the stop request. 6713 */ 6714 if (ldcp->rcv_thr_flags & VGEN_WTHR_STOP) { 6715 DBG2(vgenp, ldcp, "stopped\n"); 6716 break; 6717 } 6718 ldcp->rcv_thr_flags &= ~VGEN_WTHR_DATARCVD; 6719 ldcp->rcv_thr_flags |= VGEN_WTHR_PROCESSING; 6720 mutex_exit(&ldcp->rcv_thr_lock); 6721 DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n"); 6722 vgen_handle_evt_read(ldcp); 6723 mutex_enter(&ldcp->rcv_thr_lock); 6724 ldcp->rcv_thr_flags &= ~VGEN_WTHR_PROCESSING; 6725 } 6726 6727 /* 6728 * Update the run status and wakeup the thread that 6729 * has sent the stop request. 6730 */ 6731 ldcp->rcv_thr_flags &= ~VGEN_WTHR_STOP; 6732 ldcp->rcv_thread = NULL; 6733 CALLB_CPR_EXIT(&cprinfo); 6734 6735 thread_exit(); 6736 DBG1(vgenp, ldcp, "exit\n"); 6737 } 6738 6739 /* vgen_stop_rcv_thread -- Co-ordinate with receive thread to stop it */ 6740 static void 6741 vgen_stop_rcv_thread(vgen_ldc_t *ldcp) 6742 { 6743 kt_did_t tid = 0; 6744 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6745 6746 DBG1(vgenp, ldcp, "enter\n"); 6747 /* 6748 * Send a stop request by setting the stop flag and 6749 * wait until the receive thread stops. 6750 */ 6751 mutex_enter(&ldcp->rcv_thr_lock); 6752 if (ldcp->rcv_thread != NULL) { 6753 tid = ldcp->rcv_thread->t_did; 6754 ldcp->rcv_thr_flags |= VGEN_WTHR_STOP; 6755 cv_signal(&ldcp->rcv_thr_cv); 6756 } 6757 mutex_exit(&ldcp->rcv_thr_lock); 6758 6759 if (tid != 0) { 6760 thread_join(tid); 6761 } 6762 DBG1(vgenp, ldcp, "exit\n"); 6763 } 6764 6765 /* 6766 * Wait for the channel rx-queue to be drained by allowing the receive 6767 * worker thread to read all messages from the rx-queue of the channel. 6768 * Assumption: further callbacks are disabled at this time. 6769 */ 6770 static void 6771 vgen_drain_rcv_thread(vgen_ldc_t *ldcp) 6772 { 6773 clock_t tm; 6774 clock_t wt; 6775 clock_t rv; 6776 6777 /* 6778 * If there is data in ldc rx queue, wait until the rx 6779 * worker thread runs and drains all msgs in the queue. 6780 */ 6781 wt = drv_usectohz(MILLISEC); 6782 6783 mutex_enter(&ldcp->rcv_thr_lock); 6784 6785 tm = ddi_get_lbolt() + wt; 6786 6787 /* 6788 * We need to check both bits - DATARCVD and PROCESSING, to be cleared. 6789 * If DATARCVD is set, that means the callback has signalled the worker 6790 * thread, but the worker hasn't started processing yet. If PROCESSING 6791 * is set, that means the thread is awake and processing. Note that the 6792 * DATARCVD state can only be seen once, as the assumption is that 6793 * further callbacks have been disabled at this point. 6794 */ 6795 while (ldcp->rcv_thr_flags & 6796 (VGEN_WTHR_DATARCVD | VGEN_WTHR_PROCESSING)) { 6797 rv = cv_timedwait(&ldcp->rcv_thr_cv, &ldcp->rcv_thr_lock, tm); 6798 if (rv == -1) { /* timeout */ 6799 /* 6800 * Note that the only way we return is due to a timeout; 6801 * we set the new time to wait, before we go back and 6802 * check the condition. The other(unlikely) possibility 6803 * is a premature wakeup(see cv_timedwait(9F)) in which 6804 * case we just continue to use the same time to wait. 6805 */ 6806 tm = ddi_get_lbolt() + wt; 6807 } 6808 } 6809 6810 mutex_exit(&ldcp->rcv_thr_lock); 6811 } 6812 6813 /* 6814 * vgen_dds_rx -- post DDS messages to vnet. 6815 */ 6816 static int 6817 vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) 6818 { 6819 vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp; 6820 vgen_t *vgenp = LDC_TO_VGEN(ldcp); 6821 6822 if (dmsg->dds_class != DDS_VNET_NIU) { 6823 DWARN(vgenp, ldcp, "Unknown DDS class, dropping"); 6824 return (EBADMSG); 6825 } 6826 vnet_dds_rx(vgenp->vnetp, dmsg); 6827 return (0); 6828 } 6829 6830 /* 6831 * vgen_dds_tx -- an interface called by vnet to send DDS messages. 6832 */ 6833 int 6834 vgen_dds_tx(void *arg, void *msg) 6835 { 6836 vgen_t *vgenp = arg; 6837 vio_dds_msg_t *dmsg = msg; 6838 vgen_portlist_t *plistp = &vgenp->vgenports; 6839 vgen_ldc_t *ldcp; 6840 vgen_ldclist_t *ldclp; 6841 int rv = EIO; 6842 6843 6844 READ_ENTER(&plistp->rwlock); 6845 ldclp = &(vgenp->vsw_portp->ldclist); 6846 READ_ENTER(&ldclp->rwlock); 6847 ldcp = ldclp->headp; 6848 if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) { 6849 goto vgen_dsend_exit; 6850 } 6851 6852 dmsg->tag.vio_sid = ldcp->local_sid; 6853 rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE); 6854 if (rv != VGEN_SUCCESS) { 6855 rv = EIO; 6856 } else { 6857 rv = 0; 6858 } 6859 6860 vgen_dsend_exit: 6861 RW_EXIT(&ldclp->rwlock); 6862 RW_EXIT(&plistp->rwlock); 6863 return (rv); 6864 6865 } 6866 6867 #if DEBUG 6868 6869 /* 6870 * Print debug messages - set to 0xf to enable all msgs 6871 */ 6872 static void 6873 debug_printf(const char *fname, vgen_t *vgenp, 6874 vgen_ldc_t *ldcp, const char *fmt, ...) 6875 { 6876 char buf[256]; 6877 char *bufp = buf; 6878 va_list ap; 6879 6880 if ((vgenp != NULL) && (vgenp->vnetp != NULL)) { 6881 (void) sprintf(bufp, "vnet%d:", 6882 ((vnet_t *)(vgenp->vnetp))->instance); 6883 bufp += strlen(bufp); 6884 } 6885 if (ldcp != NULL) { 6886 (void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id); 6887 bufp += strlen(bufp); 6888 } 6889 (void) sprintf(bufp, "%s: ", fname); 6890 bufp += strlen(bufp); 6891 6892 va_start(ap, fmt); 6893 (void) vsprintf(bufp, fmt, ap); 6894 va_end(ap); 6895 6896 if ((ldcp == NULL) ||(vgendbg_ldcid == -1) || 6897 (vgendbg_ldcid == ldcp->ldc_id)) { 6898 cmn_err(CE_CONT, "%s\n", buf); 6899 } 6900 } 6901 #endif 6902