1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/kmem.h> 28 #include <sys/conf.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/ksynch.h> 32 33 #include <sys/ib/clients/eoib/eib_impl.h> 34 35 /* 36 * Declarations private to this file 37 */ 38 static int eib_vnic_get_instance(eib_t *, int *); 39 static void eib_vnic_ret_instance(eib_t *, int); 40 static void eib_vnic_modify_enter(eib_t *, uint_t); 41 static void eib_vnic_modify_exit(eib_t *, uint_t); 42 static int eib_vnic_create_common(eib_t *, eib_vnic_t *, int *); 43 static int eib_vnic_set_partition(eib_t *, eib_vnic_t *, int *); 44 static void eib_vnic_make_vhub_mgid(uint8_t *, uint8_t, uint8_t *, uint8_t, 45 uint8_t, uint32_t, ib_gid_t *); 46 static int eib_vnic_attach_ctl_mcgs(eib_t *, eib_vnic_t *, int *); 47 static int eib_vnic_attach_vhub_table(eib_t *, eib_vnic_t *); 48 static int eib_vnic_attach_vhub_update(eib_t *, eib_vnic_t *); 49 static void eib_vnic_start_keepalives(eib_t *, eib_vnic_t *); 50 static int eib_vnic_lookup_dest(eib_vnic_t *, uint8_t *, uint16_t, 51 eib_vhub_map_t *, ibt_mcg_info_t *, int *); 52 static void eib_vnic_leave_all_data_mcgs(eib_t *, eib_vnic_t *); 53 static void eib_vnic_rejoin_data_mcgs(eib_t *, eib_vnic_t *); 54 static void eib_vnic_reattach_ctl_mcgs(eib_t *, eib_vnic_t *); 55 static void eib_rb_vnic_create_common(eib_t *, eib_vnic_t *, uint_t); 56 static void eib_rb_vnic_attach_ctl_mcgs(eib_t *, eib_vnic_t *); 57 static void eib_rb_vnic_attach_vhub_table(eib_t *, eib_vnic_t *); 58 static void eib_rb_vnic_attach_vhub_update(eib_t *, eib_vnic_t *); 59 static void eib_rb_vnic_start_keepalives(eib_t *, eib_vnic_t *); 60 static void eib_rb_vnic_join_data_mcg(eib_t *, eib_vnic_t *, uint8_t *); 61 62 /* 63 * Definitions private to this file 64 */ 65 #define EIB_VNIC_STRUCT_ALLOCD 0x0001 66 #define EIB_VNIC_GOT_INSTANCE 0x0002 67 #define EIB_VNIC_CREATE_COMMON_DONE 0x0004 68 #define EIB_VNIC_CTLQP_CREATED 0x0008 69 #define EIB_VNIC_DATAQP_CREATED 0x0010 70 #define EIB_VNIC_LOGIN_DONE 0x0020 71 #define EIB_VNIC_PARTITION_SET 0x0040 72 #define EIB_VNIC_RX_POSTED_TO_CTLQP 0x0080 73 #define EIB_VNIC_RX_POSTED_TO_DATAQP 0x0100 74 #define EIB_VNIC_ATTACHED_TO_CTL_MCGS 0x0200 75 #define EIB_VNIC_GOT_VHUB_TABLE 0x0400 76 #define EIB_VNIC_KEEPALIVES_STARTED 0x0800 77 #define EIB_VNIC_BROADCAST_JOINED 0x1000 78 79 /* 80 * Destination type 81 */ 82 #define EIB_TX_UNICAST 1 83 #define EIB_TX_MULTICAST 2 84 #define EIB_TX_BROADCAST 3 85 86 int 87 eib_vnic_create(eib_t *ss, uint8_t *macaddr, uint16_t vlan, eib_vnic_t **vnicp, 88 int *err) 89 { 90 eib_vnic_t *vnic = NULL; 91 boolean_t failed_vnic = B_FALSE; 92 uint_t progress = 0; 93 94 eib_vnic_modify_enter(ss, EIB_VN_BEING_CREATED); 95 96 /* 97 * When a previously created vnic is being resurrected due to a 98 * gateway reboot, there's a race possible where a creation request 99 * for the existing vnic could get filed with the vnic creator 100 * thread. So, before we go ahead with the creation of this vnic, 101 * make sure we already don't have the vnic. 102 */ 103 if (macaddr) { 104 if (eib_data_lookup_vnic(ss, macaddr, vlan, vnicp, 105 &failed_vnic) == EIB_E_SUCCESS) { 106 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_create: " 107 "vnic for mac=%x:%x:%x:%x:%x:%x, vlan=0x%x " 108 "already there, no duplicate creation", macaddr[0], 109 macaddr[1], macaddr[2], macaddr[3], macaddr[4], 110 macaddr[5], vlan); 111 112 eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED); 113 return (EIB_E_SUCCESS); 114 } else if (failed_vnic) { 115 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_create: " 116 "vnic for mac=%x:%x:%x:%x:%x:%x, vlan=0x%x " 117 "failed earlier, shouldn't be here at all", 118 macaddr[0], macaddr[1], macaddr[2], macaddr[3], 119 macaddr[4], macaddr[5], vlan); 120 121 *err = EEXIST; 122 123 eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED); 124 return (EIB_E_FAILURE); 125 } 126 } 127 128 /* 129 * Allocate a vnic structure for this instance 130 */ 131 vnic = kmem_zalloc(sizeof (eib_vnic_t), KM_SLEEP); 132 vnic->vn_ss = ss; 133 vnic->vn_instance = -1; 134 mutex_init(&vnic->vn_lock, NULL, MUTEX_DRIVER, NULL); 135 cv_init(&vnic->vn_cv, NULL, CV_DEFAULT, NULL); 136 137 progress |= EIB_VNIC_STRUCT_ALLOCD; 138 139 /* 140 * Get a vnic instance 141 */ 142 if (eib_vnic_get_instance(ss, &vnic->vn_instance) != EIB_E_SUCCESS) { 143 *err = EMFILE; 144 goto vnic_create_fail; 145 } 146 progress |= EIB_VNIC_GOT_INSTANCE; 147 148 /* 149 * Initialize vnic's basic parameters. Note that we set the 15-bit 150 * vnic id to send to gw during a login to be a 2-tuple of 151 * {devi_instance#, eoib_vnic_instance#}. 152 */ 153 vnic->vn_vlan = vlan; 154 if (macaddr) { 155 bcopy(macaddr, vnic->vn_macaddr, sizeof (vnic->vn_macaddr)); 156 } 157 vnic->vn_id = (uint16_t)EIB_VNIC_ID(ss->ei_instance, vnic->vn_instance); 158 159 /* 160 * Start up this vnic instance 161 */ 162 if (eib_vnic_create_common(ss, vnic, err) != EIB_E_SUCCESS) 163 goto vnic_create_fail; 164 165 progress |= EIB_VNIC_CREATE_COMMON_DONE; 166 167 /* 168 * Return the created vnic 169 */ 170 if (vnicp) { 171 *vnicp = vnic; 172 } 173 174 eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED); 175 return (EIB_E_SUCCESS); 176 177 vnic_create_fail: 178 eib_rb_vnic_create(ss, vnic, progress); 179 eib_vnic_modify_exit(ss, EIB_VN_BEING_CREATED); 180 return (EIB_E_FAILURE); 181 } 182 183 void 184 eib_vnic_delete(eib_t *ss, eib_vnic_t *vnic) 185 { 186 eib_vnic_modify_enter(ss, EIB_VN_BEING_DELETED); 187 eib_rb_vnic_create(ss, vnic, ~0); 188 eib_vnic_modify_exit(ss, EIB_VN_BEING_DELETED); 189 } 190 191 /*ARGSUSED*/ 192 int 193 eib_vnic_wait_for_login_ack(eib_t *ss, eib_vnic_t *vnic, int *err) 194 { 195 clock_t deadline; 196 int ret = EIB_E_SUCCESS; 197 198 deadline = ddi_get_lbolt() + drv_usectohz(EIB_LOGIN_TIMEOUT_USEC); 199 200 /* 201 * Wait for login ack/nack or wait time to get over. If we wake up 202 * with a login failure, record the reason. 203 */ 204 mutex_enter(&vnic->vn_lock); 205 while (vnic->vn_state == EIB_LOGIN_ACK_WAIT) { 206 if (cv_timedwait(&vnic->vn_cv, &vnic->vn_lock, 207 deadline) == -1) { 208 if (vnic->vn_state == EIB_LOGIN_ACK_WAIT) 209 vnic->vn_state = EIB_LOGIN_TIMED_OUT; 210 } 211 } 212 213 if (vnic->vn_state != EIB_LOGIN_ACK_RCVD) { 214 ret = EIB_E_FAILURE; 215 *err = (vnic->vn_state == EIB_LOGIN_TIMED_OUT) ? 216 ETIME : ECANCELED; 217 } 218 mutex_exit(&vnic->vn_lock); 219 220 return (ret); 221 } 222 223 void 224 eib_vnic_login_ack(eib_t *ss, eib_login_data_t *ld) 225 { 226 eib_vnic_t *vnic; 227 uint_t vnic_instance; 228 uint_t hdrs_sz; 229 uint16_t vnic_id; 230 int nack = 1; 231 232 /* 233 * The msb in the vnic id in login ack message is not 234 * part of our vNIC id. 235 */ 236 vnic_id = ld->ld_vnic_id & (~FIP_VL_VNIC_ID_MSBIT); 237 238 /* 239 * Now, we deconstruct the vnic id and determine the vnic 240 * instance number. If this vnic_instance number isn't 241 * valid or the vnic_id of the vnic for this instance 242 * number doesn't match in our records, we quit. 243 */ 244 vnic_instance = EIB_VNIC_INSTANCE(vnic_id); 245 if (vnic_instance >= EIB_MAX_VNICS) 246 return; 247 248 /* 249 * At this point, we haven't fully created the vnic, so 250 * this vnic should be present as ei_vnic_pending. 251 */ 252 mutex_enter(&ss->ei_vnic_lock); 253 if ((vnic = ss->ei_vnic_pending) == NULL) { 254 mutex_exit(&ss->ei_vnic_lock); 255 return; 256 } else if (vnic->vn_id != vnic_id) { 257 mutex_exit(&ss->ei_vnic_lock); 258 return; 259 } 260 mutex_exit(&ss->ei_vnic_lock); 261 262 /* 263 * First check if the vnic is still sleeping, waiting 264 * for login ack. If not, we might as well quit now. 265 */ 266 mutex_enter(&vnic->vn_lock); 267 if (vnic->vn_state != EIB_LOGIN_ACK_WAIT) { 268 mutex_exit(&vnic->vn_lock); 269 return; 270 } 271 272 /* 273 * We NACK the waiter under these conditions: 274 * 275 * . syndrome was set 276 * . vhub mtu is bigger than our max mtu (minus eoib/eth hdrs sz) 277 * . assigned vlan is different from requested vlan (except 278 * when we didn't request a specific vlan) 279 * . when the assigned mac is different from the requested mac 280 * (except when we didn't request a specific mac) 281 * . when the VP bit indicates that vlan tag should be used 282 * but we had not specified a vlan tag in our request 283 * . when the VP bit indicates that vlan tag should not be 284 * present and we'd specified a vlan tag in our request 285 * 286 * The last case is interesting: if we had not specified any vlan id 287 * in our request, but the gateway has assigned a vlan and asks us 288 * to use/expect that tag on every packet dealt by this vnic, it 289 * means effectively the EoIB driver has to insert/remove vlan 290 * tagging on this vnic traffic, since the nw layer on Solaris 291 * won't be using/expecting any tag on traffic for this vnic. This 292 * feature is not supported currently. 293 */ 294 hdrs_sz = EIB_ENCAP_HDR_SZ + sizeof (struct ether_header) + VLAN_TAGSZ; 295 if (ld->ld_syndrome) { 296 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: " 297 "non-zero syndrome 0x%lx, NACK", ld->ld_syndrome); 298 299 } else if (ld->ld_vhub_mtu > (ss->ei_props->ep_mtu - hdrs_sz)) { 300 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: " 301 "vhub mtu (0x%x) bigger than port mtu (0x%x), NACK", 302 ld->ld_vhub_mtu, ss->ei_props->ep_mtu); 303 304 } else if ((vnic->vn_vlan) && (vnic->vn_vlan != ld->ld_assigned_vlan)) { 305 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: " 306 "assigned vlan (0x%x) different from asked (0x%x), " 307 "for vnic id 0x%x, NACK", ld->ld_assigned_vlan, 308 vnic->vn_vlan, vnic->vn_id); 309 310 } else if (bcmp(vnic->vn_macaddr, eib_zero_mac, ETHERADDRL) && 311 bcmp(vnic->vn_macaddr, ld->ld_assigned_mac, ETHERADDRL)) { 312 uint8_t *asked, *got; 313 314 asked = vnic->vn_macaddr; 315 got = ld->ld_assigned_mac; 316 317 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: " 318 "assigned mac (%x:%x:%x:%x:%x:%x) different from " 319 "asked (%x:%x:%x:%x:%x:%x) for vnic id 0x%x, NACK", 320 got[0], got[1], got[2], got[3], got[4], got[5], asked[0], 321 asked[1], asked[2], asked[3], asked[4], asked[5]); 322 323 } else if ((vnic->vn_vlan == 0) && (ld->ld_vlan_in_packets)) { 324 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: " 325 "asked for tagless vlan, but VP flag is set " 326 "for vnic id 0x%x, NACK", vnic->vn_id); 327 328 } else if ((vnic->vn_vlan) && (!ld->ld_vlan_in_packets)) { 329 if (eib_wa_no_good_vp_flag) { 330 ld->ld_vlan_in_packets = 1; 331 ld->ld_vhub_id = EIB_VHUB_ID(ld->ld_gw_port_id, 332 ld->ld_assigned_vlan); 333 nack = 0; 334 } else { 335 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_login_ack: " 336 "vlan was assigned correctly, but VP flag is not " 337 "set for vnic id 0x%x, NACK", vnic->vn_id); 338 } 339 } else { 340 ld->ld_vhub_id = EIB_VHUB_ID(ld->ld_gw_port_id, 341 ld->ld_assigned_vlan); 342 nack = 0; 343 } 344 345 /* 346 * ACK/NACK the waiter 347 */ 348 if (nack) { 349 vnic->vn_state = EIB_LOGIN_NACK_RCVD; 350 } else { 351 bcopy(ld, &vnic->vn_login_data, sizeof (eib_login_data_t)); 352 vnic->vn_state = EIB_LOGIN_ACK_RCVD; 353 } 354 355 cv_signal(&vnic->vn_cv); 356 mutex_exit(&vnic->vn_lock); 357 } 358 359 int 360 eib_vnic_wait_for_table(eib_t *ss, eib_vnic_t *vnic, int *err) 361 { 362 clock_t deadline; 363 int ret = EIB_E_SUCCESS; 364 365 /* 366 * The EoIB spec does not detail exactly within what time a vhub table 367 * request is expected to be answered. However, it does mention that 368 * in the worst case, the vhub update messages from the gateway must 369 * be seen atleast once in 2.5 * GW_KA_PERIOD (already saved in 370 * pp_gw_ka_ticks), so we'll settle for that limit. 371 */ 372 deadline = ddi_get_lbolt() + ss->ei_gw_props->pp_gw_ka_ticks; 373 374 /* 375 * Wait for vhub table to be constructed. If we wake up with a 376 * vhub table construction failure, record the reason. 377 */ 378 mutex_enter(&vnic->vn_lock); 379 while (vnic->vn_state == EIB_LOGIN_TBL_WAIT) { 380 if (cv_timedwait(&vnic->vn_cv, &vnic->vn_lock, 381 deadline) == -1) { 382 if (vnic->vn_state == EIB_LOGIN_TBL_WAIT) 383 vnic->vn_state = EIB_LOGIN_TIMED_OUT; 384 } 385 } 386 387 if (vnic->vn_state != EIB_LOGIN_TBL_DONE) { 388 ret = EIB_E_FAILURE; 389 *err = (vnic->vn_state == EIB_LOGIN_TIMED_OUT) ? 390 ETIME : ECANCELED; 391 } 392 mutex_exit(&vnic->vn_lock); 393 394 return (ret); 395 } 396 397 void 398 eib_vnic_vhub_table_done(eib_vnic_t *vnic, uint_t result_state) 399 { 400 ASSERT(result_state == EIB_LOGIN_TBL_DONE || 401 result_state == EIB_LOGIN_TBL_FAILED); 402 403 /* 404 * Construction of vhub table for the vnic is done one way or 405 * the other. Set the login wait state appropriately and signal 406 * the waiter. If it's a vhub table failure, we shouldn't parse 407 * any more vhub table or vhub update packets until the vnic state 408 * is changed. 409 */ 410 mutex_enter(&vnic->vn_lock); 411 vnic->vn_state = result_state; 412 cv_signal(&vnic->vn_cv); 413 mutex_exit(&vnic->vn_lock); 414 } 415 416 int 417 eib_vnic_join_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac, 418 boolean_t rejoin, int *err) 419 { 420 eib_chan_t *chan = vnic->vn_data_chan; 421 eib_login_data_t *ld = &vnic->vn_login_data; 422 eib_mcg_t *mcg; 423 eib_mcg_t *elem; 424 eib_mcg_t *tail; 425 ibt_mcg_info_t *mcg_info; 426 ibt_mcg_attr_t mcg_attr; 427 ibt_status_t ret; 428 429 /* 430 * Compose the multicast MGID to join 431 */ 432 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t)); 433 434 eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix, 435 (uint8_t)EIB_MGID_VHUB_DATA, mcast_mac, ld->ld_n_mac_mcgid, 0, 436 ld->ld_vhub_id, &(mcg_attr.mc_mgid)); 437 mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey; 438 mcg_attr.mc_qkey = (ib_qkey_t)EIB_DATA_QKEY; 439 440 /* 441 * Allocate for and prepare the mcg to add to our list 442 */ 443 mcg_info = kmem_zalloc(sizeof (ibt_mcg_info_t), KM_NOSLEEP); 444 if (mcg_info == NULL) { 445 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: " 446 "no memory, failed to join mcg (mac=%x:%x:%x:%x:%x:%x)", 447 mcast_mac[0], mcast_mac[1], mcast_mac[2], 448 mcast_mac[3], mcast_mac[4], mcast_mac[5]); 449 450 *err = ENOMEM; 451 goto vnic_join_data_mcg_fail; 452 } 453 mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP); 454 if (mcg == NULL) { 455 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: " 456 "no memory, failed to join mcg (mac=%x:%x:%x:%x:%x:%x)", 457 mcast_mac[0], mcast_mac[1], mcast_mac[2], 458 mcast_mac[3], mcast_mac[4], mcast_mac[5]); 459 460 *err = ENOMEM; 461 goto vnic_join_data_mcg_fail; 462 } 463 mcg->mg_next = NULL; 464 mcg->mg_rgid = ss->ei_props->ep_sgid; 465 mcg->mg_mgid = mcg_attr.mc_mgid; 466 mcg->mg_join_state = IB_MC_JSTATE_FULL; 467 mcg->mg_mcginfo = mcg_info; 468 bcopy(mcast_mac, mcg->mg_mac, ETHERADDRL); 469 470 /* 471 * Join the multicast group 472 * 473 * Should we query for the mcg and join instead of attempting to 474 * join directly ? 475 */ 476 mcg_attr.mc_join_state = mcg->mg_join_state; 477 mcg_attr.mc_flow = 0; 478 mcg_attr.mc_tclass = 0; 479 mcg_attr.mc_sl = 0; 480 mcg_attr.mc_scope = 0; /* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */ 481 482 ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, mcg_info, NULL, NULL); 483 if (ret != IBT_SUCCESS) { 484 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_join_data_mcg: " 485 "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, qkey=0x%lx, " 486 "jstate=0x%x) failed, ret=%d", mcg_attr.mc_mgid.gid_prefix, 487 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, 488 mcg_attr.mc_qkey, mcg_attr.mc_join_state, ret); 489 490 *err = EINVAL; 491 goto vnic_join_data_mcg_fail; 492 } 493 494 /* 495 * Attach to the group to receive multicast messages 496 */ 497 ret = ibt_attach_mcg(chan->ch_chan, mcg_info); 498 if (ret != IBT_SUCCESS) { 499 *err = EINVAL; 500 501 ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 502 eib_reserved_gid, mcg->mg_join_state); 503 if (ret != EIB_E_SUCCESS) { 504 EIB_DPRINTF_WARN(ss->ei_instance, 505 "eib_vnic_join_data_mcg: " 506 "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) " 507 "failed, ret=%d", mcg->mg_mgid.gid_prefix, 508 mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret); 509 } 510 511 goto vnic_join_data_mcg_fail; 512 } 513 514 mutex_enter(&chan->ch_vhub_lock); 515 516 tail = NULL; 517 for (elem = chan->ch_vhub_data; elem != NULL; elem = elem->mg_next) { 518 if ((elem->mg_mgid.gid_prefix == mcg_attr.mc_mgid.gid_prefix) && 519 (elem->mg_mgid.gid_guid == mcg_attr.mc_mgid.gid_guid)) { 520 break; 521 } 522 tail = elem; 523 } 524 525 /* 526 * If we had't already joined to this mcg, add the newly joined mcg 527 * to the tail and return success 528 */ 529 if (elem == NULL) { 530 if (tail) 531 tail->mg_next = mcg; 532 else 533 chan->ch_vhub_data = mcg; 534 mutex_exit(&chan->ch_vhub_lock); 535 return (EIB_E_SUCCESS); 536 } 537 538 /* 539 * Duplicate. We need to leave one of the two joins. If "rejoin" 540 * was requested, leave the old join, otherwise leave the new join. 541 * 542 * Note that we must not detach the qp from the mcg, since if this 543 * was a dup, a second ibt_attach_mcg() above would've simply been 544 * a nop. 545 * 546 * Note also that the leave may not be successful here if our presence 547 * has been removed by the SM, but we need to do this to prevent leaks 548 * in ibtf. 549 */ 550 if (rejoin) { 551 ASSERT(elem->mg_mcginfo != NULL); 552 kmem_free(elem->mg_mcginfo, sizeof (ibt_mcg_info_t)); 553 (void) ibt_leave_mcg(elem->mg_rgid, elem->mg_mgid, 554 eib_reserved_gid, elem->mg_join_state); 555 /* 556 * Copy the new mcg over the old one (including the new 557 * mg_mcginfo), but preserve the link to the next element 558 * on the list 559 */ 560 mcg->mg_next = elem->mg_next; 561 bcopy(mcg, elem, sizeof (eib_mcg_t)); 562 } else { 563 ASSERT(mcg->mg_mcginfo != NULL); 564 kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t)); 565 (void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 566 eib_reserved_gid, mcg->mg_join_state); 567 } 568 mutex_exit(&chan->ch_vhub_lock); 569 570 kmem_free(mcg, sizeof (eib_mcg_t)); 571 return (EIB_E_SUCCESS); 572 573 vnic_join_data_mcg_fail: 574 if (mcg) { 575 kmem_free(mcg, sizeof (eib_mcg_t)); 576 } 577 if (mcg_info) { 578 kmem_free(mcg_info, sizeof (ibt_mcg_info_t)); 579 } 580 return (EIB_E_FAILURE); 581 } 582 583 int 584 eib_vnic_setup_dest(eib_vnic_t *vnic, eib_wqe_t *swqe, uint8_t *dmac, 585 uint16_t vlan) 586 { 587 eib_t *ss = vnic->vn_ss; 588 eib_stats_t *stats = ss->ei_stats; 589 eib_avect_t *av; 590 eib_vhub_map_t ucast; 591 ibt_mcg_info_t mcast; 592 ibt_status_t ret; 593 int dtype; 594 int rv; 595 596 /* 597 * Lookup the destination in the vhub table or in our mcg list 598 */ 599 rv = eib_vnic_lookup_dest(vnic, dmac, vlan, &ucast, &mcast, &dtype); 600 if (rv != EIB_E_SUCCESS) { 601 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_setup_dest: " 602 "eib_vnic_lookup_dest(dmac=%x:%x:%x:%x:%x:%x, vlan=0x%x) " 603 "failed", dmac[0], dmac[1], dmac[2], dmac[3], dmac[4], 604 dmac[5], vlan); 605 606 return (EIB_E_FAILURE); 607 } 608 609 /* 610 * If we found a unicast address, get an address vector for the lid 611 * and sl, modify the ud dest based on the address vector and return. 612 * If we found a multicast address, use the address vector in the 613 * mcg info to modify the ud dest and return. 614 */ 615 if (dtype == EIB_TX_UNICAST) { 616 if ((av = eib_ibt_hold_avect(ss, ucast.mp_lid, 617 ucast.mp_sl)) == NULL) { 618 EIB_DPRINTF_WARN(ss->ei_instance, 619 "eib_vnic_setup_dest: " 620 "eib_ibt_hold_avect(lid=0x%x, sl=0x%x) failed", 621 ucast.mp_lid, ucast.mp_sl); 622 623 return (EIB_E_FAILURE); 624 } 625 ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_DATA_QKEY, 626 ucast.mp_qpn, &av->av_vect); 627 628 eib_ibt_release_avect(ss, av); 629 630 if (ret != IBT_SUCCESS) { 631 EIB_DPRINTF_WARN(ss->ei_instance, 632 "eib_vnic_setup_dest: " 633 "ibt_modify_ud_dest(qpn=0x%lx, qkey=0x%lx) " 634 "failed, ret=%d", ucast.mp_qpn, EIB_DATA_QKEY, ret); 635 return (EIB_E_FAILURE); 636 } 637 } else { 638 ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_DATA_QKEY, 639 IB_MC_QPN, &(mcast.mc_adds_vect)); 640 641 if (dtype == EIB_TX_BROADCAST) 642 EIB_INCR_COUNTER(&stats->st_brdcstxmit); 643 else 644 EIB_INCR_COUNTER(&stats->st_multixmit); 645 646 if (ret != IBT_SUCCESS) { 647 EIB_DPRINTF_WARN(ss->ei_instance, 648 "eib_vnic_setup_dest: " 649 "ibt_modify_ud_dest(mc_qpn=0x%lx, qkey=0x%lx) " 650 "failed, ret=%d", IB_MC_QPN, EIB_DATA_QKEY, ret); 651 return (EIB_E_FAILURE); 652 } 653 } 654 655 return (EIB_E_SUCCESS); 656 } 657 658 void 659 eib_vnic_leave_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac) 660 { 661 eib_rb_vnic_join_data_mcg(ss, vnic, mcast_mac); 662 } 663 664 /*ARGSUSED*/ 665 void 666 eib_vnic_init_tables(eib_t *ss, eib_vnic_t *vnic) 667 { 668 eib_vhub_table_t *tbl; 669 eib_vhub_update_t *upd; 670 671 tbl = kmem_zalloc(sizeof (eib_vhub_table_t), KM_SLEEP); 672 mutex_init(&tbl->tb_lock, NULL, MUTEX_DRIVER, NULL); 673 tbl->tb_eport_state = FIP_EPORT_UP; 674 675 upd = kmem_zalloc(sizeof (eib_vhub_update_t), KM_SLEEP); 676 mutex_init(&upd->up_lock, NULL, MUTEX_DRIVER, NULL); 677 678 mutex_enter(&vnic->vn_lock); 679 vnic->vn_vhub_table = tbl; 680 vnic->vn_vhub_update = upd; 681 mutex_exit(&vnic->vn_lock); 682 } 683 684 /*ARGSUSED*/ 685 void 686 eib_vnic_fini_tables(eib_t *ss, eib_vnic_t *vnic, boolean_t clobber) 687 { 688 eib_vhub_update_t *upd; 689 eib_vhub_table_t *tbl; 690 eib_vhub_map_t *elem; 691 eib_vhub_map_t *nxt; 692 int i; 693 694 /* 695 * We come here only when we've either completely detached from 696 * the vhub multicast groups and so cannot receive anymore table 697 * or update control messages, or we've had a recent vhub table 698 * construction failure and the vnic state is currently 699 * EIB_LOGIN_TBL_FAILED and so won't parse any table or update 700 * control messages. Also, since we haven't completed the vnic 701 * creation, no one from the tx path will be accessing the 702 * vn_vhub_table entries either. All said, we're free to play 703 * around with the vnic's vn_vhub_table and vn_vhub_update here. 704 */ 705 706 mutex_enter(&vnic->vn_lock); 707 upd = vnic->vn_vhub_update; 708 tbl = vnic->vn_vhub_table; 709 if (clobber) { 710 vnic->vn_vhub_update = NULL; 711 vnic->vn_vhub_table = NULL; 712 } 713 mutex_exit(&vnic->vn_lock); 714 715 /* 716 * Destroy the vhub update entries if any 717 */ 718 if (upd) { 719 /* 720 * Wipe clean the list of vnic entries accumulated via 721 * vhub updates so far. Release eib_vhub_update_t only 722 * if explicitly asked to do so 723 */ 724 mutex_enter(&upd->up_lock); 725 for (elem = upd->up_vnic_entry; elem != NULL; elem = nxt) { 726 nxt = elem->mp_next; 727 kmem_free(elem, sizeof (eib_vhub_map_t)); 728 } 729 upd->up_vnic_entry = NULL; 730 upd->up_tusn = 0; 731 upd->up_eport_state = 0; 732 mutex_exit(&upd->up_lock); 733 734 if (clobber) { 735 mutex_destroy(&upd->up_lock); 736 kmem_free(upd, sizeof (eib_vhub_update_t)); 737 } 738 } 739 740 /* 741 * Destroy the vhub table entries 742 */ 743 if (tbl == NULL) 744 return; 745 746 /* 747 * Wipe clean the list of entries in the vhub table collected so 748 * far. Release eib_vhub_table_t only if explicitly asked to do so. 749 */ 750 mutex_enter(&tbl->tb_lock); 751 752 if (tbl->tb_gateway) { 753 kmem_free(tbl->tb_gateway, sizeof (eib_vhub_map_t)); 754 tbl->tb_gateway = NULL; 755 } 756 757 if (tbl->tb_unicast_miss) { 758 kmem_free(tbl->tb_unicast_miss, sizeof (eib_vhub_map_t)); 759 tbl->tb_unicast_miss = NULL; 760 } 761 762 if (tbl->tb_vhub_multicast) { 763 kmem_free(tbl->tb_vhub_multicast, sizeof (eib_vhub_map_t)); 764 tbl->tb_vhub_multicast = NULL; 765 } 766 767 if (!eib_wa_no_mcast_entries) { 768 for (i = 0; i < EIB_TB_NBUCKETS; i++) { 769 for (elem = tbl->tb_mcast_entry[i]; elem != NULL; 770 elem = nxt) { 771 nxt = elem->mp_next; 772 kmem_free(elem, sizeof (eib_vhub_map_t)); 773 } 774 tbl->tb_mcast_entry[i] = NULL; 775 } 776 } 777 778 for (i = 0; i < EIB_TB_NBUCKETS; i++) { 779 for (elem = tbl->tb_vnic_entry[i]; elem != NULL; elem = nxt) { 780 nxt = elem->mp_next; 781 kmem_free(elem, sizeof (eib_vhub_map_t)); 782 } 783 tbl->tb_vnic_entry[i] = NULL; 784 } 785 786 tbl->tb_tusn = 0; 787 tbl->tb_eport_state = 0; 788 tbl->tb_entries_seen = 0; 789 tbl->tb_entries_in_table = 0; 790 tbl->tb_checksum = 0; 791 792 mutex_exit(&tbl->tb_lock); 793 794 /* 795 * Don't throw away space created for holding vhub table if we haven't 796 * been explicitly asked to do so 797 */ 798 if (clobber) { 799 mutex_destroy(&tbl->tb_lock); 800 kmem_free(tbl, sizeof (eib_vhub_table_t)); 801 } 802 } 803 804 eib_chan_t * 805 eib_vnic_get_data_chan(eib_t *ss, int vinst) 806 { 807 eib_vnic_t *vnic; 808 eib_chan_t *chan = NULL; 809 810 if (vinst >= 0 && vinst < EIB_MAX_VNICS) { 811 mutex_enter(&ss->ei_vnic_lock); 812 if ((vnic = ss->ei_vnic[vinst]) != NULL) 813 chan = vnic->vn_data_chan; 814 mutex_exit(&ss->ei_vnic_lock); 815 } 816 817 return (chan); 818 } 819 820 void 821 eib_vnic_need_new(eib_t *ss, uint8_t *mac, uint16_t vlan) 822 { 823 eib_vnic_req_t *vrq; 824 825 EIB_INCR_COUNTER(&ss->ei_stats->st_noxmitbuf); 826 827 /* 828 * Create a new vnic request for this {mac,vlan} tuple 829 */ 830 vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_NOSLEEP); 831 if (vrq == NULL) { 832 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_need_new: " 833 "no memory, failed to queue new vnic creation request"); 834 return; 835 } 836 vrq->vr_next = NULL; 837 vrq->vr_req = EIB_CR_REQ_NEW_VNIC; 838 bcopy(mac, vrq->vr_mac, ETHERADDRL); 839 vrq->vr_vlan = vlan; 840 841 eib_vnic_enqueue_req(ss, vrq); 842 } 843 844 void 845 eib_vnic_enqueue_req(eib_t *ss, eib_vnic_req_t *vrq) 846 { 847 eib_vnic_req_t *elem = NULL; 848 uint8_t *m; 849 850 /* 851 * Enqueue this new vnic request with the vnic creator and 852 * signal it. 853 */ 854 m = vrq->vr_mac; 855 EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_vnic_enqueue_req: " 856 "BEGIN file request for creation of %x:%x:%x:%x:%x:%x, 0x%x", 857 m[0], m[1], m[2], m[3], m[4], m[5], vrq->vr_vlan); 858 859 860 mutex_enter(&ss->ei_vnic_req_lock); 861 862 /* 863 * Death request has the highest priority. If we've already been asked 864 * to die, we don't entertain any more requests. 865 */ 866 if (ss->ei_vnic_req) { 867 if (ss->ei_vnic_req->vr_req == EIB_CR_REQ_DIE) { 868 mutex_exit(&ss->ei_vnic_req_lock); 869 kmem_free(vrq, sizeof (eib_vnic_req_t)); 870 return; 871 } 872 } 873 874 if (vrq->vr_req == EIB_CR_REQ_DIE || vrq->vr_req == EIB_CR_REQ_FLUSH) { 875 vrq->vr_next = ss->ei_vnic_req; 876 ss->ei_vnic_req = vrq; 877 } else { 878 /* 879 * If there's already a creation request for this vnic that's 880 * being processed, return immediately without adding a new 881 * request. 882 */ 883 if ((elem = ss->ei_pending_vnic_req) != NULL) { 884 EIB_DPRINTF_DEBUG(ss->ei_instance, 885 "eib_vnic_enqueue_req: " 886 "ei_pending_vnic_req not NULL"); 887 888 if ((elem->vr_vlan == vrq->vr_vlan) && 889 (bcmp(elem->vr_mac, vrq->vr_mac, 890 ETHERADDRL) == 0)) { 891 EIB_DPRINTF_DEBUG(ss->ei_instance, 892 "eib_vnic_enqueue_req: " 893 "pending request already present for " 894 "%x:%x:%x:%x:%x:%x, 0x%x", m[0], m[1], m[2], 895 m[3], m[4], m[5], vrq->vr_vlan); 896 897 mutex_exit(&ss->ei_vnic_req_lock); 898 kmem_free(vrq, sizeof (eib_vnic_req_t)); 899 900 EIB_DPRINTF_DEBUG(ss->ei_instance, 901 "eib_vnic_enqueue_req: " 902 "END file request"); 903 return; 904 } 905 906 EIB_DPRINTF_DEBUG(ss->ei_instance, 907 "eib_vnic_enqueue_req: " 908 "NO pending request for %x:%x:%x:%x:%x:%x, 0x%x", 909 m[0], m[1], m[2], m[3], m[4], m[5], vrq->vr_vlan); 910 } 911 912 /* 913 * Or if there's one waiting in the queue for processing, do 914 * the same thing 915 */ 916 for (elem = ss->ei_vnic_req; elem; elem = elem->vr_next) { 917 /* 918 * If there's already a create request for this vnic 919 * waiting in the queue, return immediately 920 */ 921 if (elem->vr_req == EIB_CR_REQ_NEW_VNIC) { 922 if ((elem->vr_vlan == vrq->vr_vlan) && 923 (bcmp(elem->vr_mac, vrq->vr_mac, 924 ETHERADDRL) == 0)) { 925 926 EIB_DPRINTF_DEBUG(ss->ei_instance, 927 "eib_vnic_enqueue_req: " 928 "request already present for " 929 "%x:%x:%x:%x:%x:%x, 0x%x", m[0], 930 m[1], m[2], m[3], m[4], m[5], 931 vrq->vr_vlan); 932 933 mutex_exit(&ss->ei_vnic_req_lock); 934 kmem_free(vrq, sizeof (eib_vnic_req_t)); 935 936 EIB_DPRINTF_DEBUG(ss->ei_instance, 937 "eib_vnic_enqueue_req: " 938 "END file request"); 939 return; 940 } 941 } 942 943 if (elem->vr_next == NULL) { 944 EIB_DPRINTF_DEBUG(ss->ei_instance, 945 "eib_vnic_enqueue_req: " 946 "request not found, filing afresh"); 947 break; 948 } 949 } 950 951 /* 952 * Otherwise queue up this new creation request and signal the 953 * service thread. 954 */ 955 if (elem) { 956 elem->vr_next = vrq; 957 } else { 958 ss->ei_vnic_req = vrq; 959 } 960 } 961 962 cv_signal(&ss->ei_vnic_req_cv); 963 mutex_exit(&ss->ei_vnic_req_lock); 964 965 EIB_DPRINTF_DEBUG(ss->ei_instance, 966 "eib_vnic_enqueue_req: END file request"); 967 } 968 969 void 970 eib_vnic_update_failed_macs(eib_t *ss, uint8_t *old_mac, uint16_t old_vlan, 971 uint8_t *new_mac, uint16_t new_vlan) 972 { 973 eib_vnic_req_t *vrq; 974 eib_vnic_req_t *elem; 975 eib_vnic_req_t *prev; 976 977 vrq = kmem_zalloc(sizeof (eib_vnic_req_t), KM_NOSLEEP); 978 if (vrq == NULL) { 979 EIB_DPRINTF_WARN(ss->ei_instance, 980 "eib_vnic_update_failed_macs: " 981 "no memory, failed to drop old mac"); 982 } else { 983 vrq->vr_next = NULL; 984 vrq->vr_req = 0; /* unused */ 985 bcopy(old_mac, vrq->vr_mac, ETHERADDRL); 986 vrq->vr_vlan = old_vlan; 987 } 988 989 mutex_enter(&ss->ei_vnic_req_lock); 990 991 /* 992 * We'll search the failed vnics list to see if the new {mac,vlan} 993 * tuple is in there and remove it if present (since the new address 994 * is no longer "failed"). 995 */ 996 prev = NULL; 997 for (elem = ss->ei_failed_vnic_req; elem; elem = elem->vr_next) { 998 if ((bcmp(elem->vr_mac, new_mac, ETHERADDRL) == 0) && 999 (elem->vr_vlan == new_vlan)) { 1000 if (prev) { 1001 prev->vr_next = elem->vr_next; 1002 } else { 1003 ss->ei_failed_vnic_req = elem->vr_next; 1004 } 1005 elem->vr_next = NULL; 1006 break; 1007 } 1008 } 1009 if (elem) { 1010 kmem_free(elem, sizeof (eib_vnic_req_t)); 1011 } 1012 1013 /* 1014 * We'll also insert the old {mac,vlan} tuple to the "failed vnic req" 1015 * list (it shouldn't be there already), to avoid trying to recreate 1016 * the vnic we just explicitly discarded. 1017 */ 1018 if (vrq) { 1019 vrq->vr_next = ss->ei_failed_vnic_req; 1020 ss->ei_failed_vnic_req = vrq; 1021 } 1022 1023 mutex_exit(&ss->ei_vnic_req_lock); 1024 } 1025 1026 void 1027 eib_vnic_resurrect_zombies(eib_t *ss, uint8_t *vn0_mac) 1028 { 1029 int inst; 1030 1031 /* 1032 * We want to restart/relogin each vnic instance with the gateway, 1033 * but with the same vnic id and instance as before. 1034 */ 1035 while ((inst = EIB_FIND_LSB_SET(ss->ei_zombie_vnics)) != -1) { 1036 EIB_DPRINTF_DEBUG(ss->ei_instance, 1037 "eib_vnic_resurrect_zombies: " 1038 "calling eib_vnic_restart(vn_inst=%d)", inst); 1039 1040 eib_vnic_restart(ss, inst, vn0_mac); 1041 1042 EIB_DPRINTF_DEBUG(ss->ei_instance, 1043 "eib_vnic_resurrect_zombies: " 1044 "eib_vnic_restart(vn_inst=%d) done", inst); 1045 } 1046 } 1047 1048 void 1049 eib_vnic_restart(eib_t *ss, int inst, uint8_t *vn0_mac) 1050 { 1051 eib_vnic_t *vnic; 1052 eib_login_data_t *ld; 1053 uint8_t old_mac[ETHERADDRL]; 1054 int ret; 1055 int err; 1056 1057 if (inst < 0 || inst >= EIB_MAX_VNICS) { 1058 EIB_DPRINTF_WARN(ss->ei_instance, 1059 "eib_vnic_restart: " 1060 "vnic instance (%d) invalid", inst); 1061 return; 1062 } 1063 1064 eib_vnic_modify_enter(ss, EIB_VN_BEING_MODIFIED); 1065 if ((vnic = ss->ei_vnic[inst]) != NULL) { 1066 /* 1067 * Remember what mac was allocated for this vnic last time 1068 */ 1069 bcopy(vnic->vn_login_data.ld_assigned_mac, old_mac, ETHERADDRL); 1070 1071 /* 1072 * Tear down and restart this vnic instance 1073 */ 1074 eib_rb_vnic_create_common(ss, vnic, ~0); 1075 ret = eib_vnic_create_common(ss, vnic, &err); 1076 if (ret != EIB_E_SUCCESS) { 1077 EIB_DPRINTF_WARN(ss->ei_instance, 1078 "eib_vnic_restart: " 1079 "eib_vnic_create_common(vnic_inst=%d) failed, " 1080 "ret=%d", inst, err); 1081 } 1082 1083 /* 1084 * If this is vnic instance 0 and if our current assigned mac is 1085 * different from what was assigned last time, we need to pass 1086 * this information back to the caller, so the mac layer can be 1087 * appropriately informed. We will also queue up the old mac 1088 * and vlan in the "failed vnic req" list, so any future packets 1089 * to this address on this interface will be dropped. 1090 */ 1091 ld = &vnic->vn_login_data; 1092 if ((inst == 0) && 1093 (bcmp(ld->ld_assigned_mac, old_mac, ETHERADDRL) != 0)) { 1094 uint8_t *m = ld->ld_assigned_mac; 1095 1096 if (vn0_mac != NULL) { 1097 bcopy(ld->ld_assigned_mac, vn0_mac, 1098 ETHERADDRL); 1099 } 1100 1101 EIB_DPRINTF_VERBOSE(ss->ei_instance, 1102 "eib_vnic_restart: updating failed macs list " 1103 "old=%x:%x:%x:%x:%x:%x, new=%x:%x:%x:%x:%x:%x, " 1104 "vlan=0x%x", old_mac[0], old_mac[1], old_mac[2], 1105 old_mac[3], old_mac[4], old_mac[5], m[0], m[1], 1106 m[2], m[3], m[4], m[5], vnic->vn_vlan); 1107 1108 eib_vnic_update_failed_macs(ss, old_mac, vnic->vn_vlan, 1109 ld->ld_assigned_mac, vnic->vn_vlan); 1110 } 1111 1112 /* 1113 * No longer a zombie or need to rejoin mcgs 1114 */ 1115 mutex_enter(&ss->ei_vnic_lock); 1116 ss->ei_zombie_vnics &= (~((uint64_t)1 << inst)); 1117 ss->ei_rejoin_vnics &= (~((uint64_t)1 << inst)); 1118 mutex_exit(&ss->ei_vnic_lock); 1119 } 1120 eib_vnic_modify_exit(ss, EIB_VN_BEING_MODIFIED); 1121 } 1122 1123 void 1124 eib_vnic_rejoin_mcgs(eib_t *ss) 1125 { 1126 eib_vnic_t *vnic; 1127 int inst; 1128 1129 /* 1130 * For each vnic that still requires re-join, go through the 1131 * control channels and data channel and reattach/rejoin mcgs. 1132 */ 1133 mutex_enter(&ss->ei_vnic_lock); 1134 while ((inst = EIB_FIND_LSB_SET(ss->ei_rejoin_vnics)) != -1) { 1135 if ((vnic = ss->ei_vnic[inst]) != NULL) { 1136 eib_vnic_reattach_ctl_mcgs(ss, vnic); 1137 eib_vnic_rejoin_data_mcgs(ss, vnic); 1138 } 1139 ss->ei_rejoin_vnics &= (~((uint64_t)1 << inst)); 1140 } 1141 mutex_exit(&ss->ei_vnic_lock); 1142 } 1143 1144 void 1145 eib_rb_vnic_create(eib_t *ss, eib_vnic_t *vnic, uint_t progress) 1146 { 1147 if (progress & EIB_VNIC_CREATE_COMMON_DONE) { 1148 eib_rb_vnic_create_common(ss, vnic, ~0); 1149 } 1150 1151 if (progress & EIB_VNIC_GOT_INSTANCE) { 1152 eib_vnic_ret_instance(ss, vnic->vn_instance); 1153 vnic->vn_instance = -1; 1154 } 1155 1156 if (progress & EIB_VNIC_STRUCT_ALLOCD) { 1157 cv_destroy(&vnic->vn_cv); 1158 mutex_destroy(&vnic->vn_lock); 1159 kmem_free(vnic, sizeof (eib_vnic_t)); 1160 } 1161 } 1162 1163 /* 1164 * Currently, we only allow 64 vnics per eoib device instance, for 1165 * reasons described in eib.h (see EIB_VNIC_ID() definition), so we 1166 * could use a simple bitmap to assign the vnic instance numbers. 1167 * Once we start allowing more vnics per device instance, this 1168 * allocation scheme will need to be changed. 1169 */ 1170 static int 1171 eib_vnic_get_instance(eib_t *ss, int *vinst) 1172 { 1173 int bitpos; 1174 uint64_t nval; 1175 1176 mutex_enter(&ss->ei_vnic_lock); 1177 1178 /* 1179 * What we have is the active vnics list -- the in-use vnics are 1180 * indicated by a 1 in the bit position, and the free ones are 1181 * indicated by 0. We need to find the least significant '0' bit 1182 * to get the first free vnic instance. Or we could bit-reverse 1183 * the active list and locate the least significant '1'. 1184 */ 1185 nval = ~(ss->ei_active_vnics); 1186 if (nval == 0) 1187 return (EIB_E_FAILURE); 1188 1189 /* 1190 * The single bit-position values in a 64-bit integer are relatively 1191 * prime with 67, so performing a modulus division with 67 guarantees 1192 * a unique number between 0 and 63 for each value (setbit_mod67[]). 1193 */ 1194 bitpos = EIB_FIND_LSB_SET(nval); 1195 if (bitpos == -1) 1196 return (EIB_E_FAILURE); 1197 1198 ss->ei_active_vnics |= ((uint64_t)1 << bitpos); 1199 *vinst = bitpos; 1200 1201 mutex_exit(&ss->ei_vnic_lock); 1202 1203 return (EIB_E_SUCCESS); 1204 } 1205 1206 static void 1207 eib_vnic_ret_instance(eib_t *ss, int vinst) 1208 { 1209 mutex_enter(&ss->ei_vnic_lock); 1210 1211 if (vinst >= EIB_MAX_VNICS) { 1212 EIB_DPRINTF_WARN(ss->ei_instance, 1213 "eib_vnic_ret_instance: " 1214 "vnic instance (%d) invalid", vinst); 1215 } else if ((ss->ei_active_vnics & ((uint64_t)1 << vinst)) == 0) { 1216 EIB_DPRINTF_WARN(ss->ei_instance, 1217 "eib_vnic_ret_instance: " 1218 "vnic instance (%d) not active!", vinst); 1219 } else { 1220 ss->ei_active_vnics &= (~((uint64_t)1 << vinst)); 1221 } 1222 1223 mutex_exit(&ss->ei_vnic_lock); 1224 } 1225 1226 static void 1227 eib_vnic_modify_enter(eib_t *ss, uint_t op) 1228 { 1229 mutex_enter(&ss->ei_vnic_lock); 1230 while (ss->ei_vnic_state & EIB_VN_BEING_MODIFIED) 1231 cv_wait(&ss->ei_vnic_cv, &ss->ei_vnic_lock); 1232 1233 ss->ei_vnic_state |= op; 1234 mutex_exit(&ss->ei_vnic_lock); 1235 } 1236 1237 static void 1238 eib_vnic_modify_exit(eib_t *ss, uint_t op) 1239 { 1240 mutex_enter(&ss->ei_vnic_lock); 1241 ss->ei_vnic_state &= (~op); 1242 cv_broadcast(&ss->ei_vnic_cv); 1243 mutex_exit(&ss->ei_vnic_lock); 1244 } 1245 1246 static int 1247 eib_vnic_create_common(eib_t *ss, eib_vnic_t *vnic, int *err) 1248 { 1249 uint_t progress = 0; 1250 1251 /* 1252 * When we receive login acks within this vnic creation 1253 * routine we need a way to retrieve the vnic structure 1254 * from the vnic instance, so store this somewhere. Note 1255 * that there can be only one outstanding vnic creation 1256 * at any point of time, so we only need one vnic struct. 1257 */ 1258 mutex_enter(&ss->ei_vnic_lock); 1259 ASSERT(ss->ei_vnic_pending == NULL); 1260 ss->ei_vnic_pending = vnic; 1261 mutex_exit(&ss->ei_vnic_lock); 1262 1263 /* 1264 * Create a control qp for this vnic 1265 */ 1266 if (eib_ctl_create_qp(ss, vnic, err) != EIB_E_SUCCESS) { 1267 EIB_DPRINTF_WARN(ss->ei_instance, 1268 "eib_vnic_create_common: " 1269 "eib_ctl_create_qp(vn_id=0x%x) failed, ret=%d", 1270 vnic->vn_id, *err); 1271 goto vnic_create_common_fail; 1272 } 1273 progress |= EIB_VNIC_CTLQP_CREATED; 1274 1275 /* 1276 * Create a data qp for this vnic 1277 */ 1278 if (eib_data_create_qp(ss, vnic, err) != EIB_E_SUCCESS) { 1279 EIB_DPRINTF_WARN(ss->ei_instance, 1280 "eib_vnic_create_common: " 1281 "eib_data_create_qp(vn_id=0x%x) failed, ret=%d", 1282 vnic->vn_id, *err); 1283 goto vnic_create_common_fail; 1284 } 1285 progress |= EIB_VNIC_DATAQP_CREATED; 1286 1287 /* 1288 * Login to the gateway with this vnic's parameters 1289 */ 1290 if (eib_fip_login(ss, vnic, err) != EIB_E_SUCCESS) { 1291 EIB_DPRINTF_WARN(ss->ei_instance, 1292 "eib_vnic_create_common: " 1293 "eib_fip_login(vn_id=0x%x) failed, ret=%d", 1294 vnic->vn_id, *err); 1295 goto vnic_create_common_fail; 1296 } 1297 progress |= EIB_VNIC_LOGIN_DONE; 1298 1299 /* 1300 * Associate the control and data qps for the vnic with the 1301 * vHUB partition 1302 */ 1303 if (eib_vnic_set_partition(ss, vnic, err) != EIB_E_SUCCESS) { 1304 EIB_DPRINTF_WARN(ss->ei_instance, 1305 "eib_vnic_create_common: " 1306 "eib_vnic_set_partition(vn_id=0x%x) failed, ret=%d", 1307 vnic->vn_id, *err); 1308 goto vnic_create_common_fail; 1309 } 1310 progress |= EIB_VNIC_PARTITION_SET; 1311 1312 /* 1313 * Post initial set of rx buffers on the control qp to the HCA 1314 */ 1315 if (eib_chan_post_rx(ss, vnic->vn_ctl_chan, NULL) != EIB_E_SUCCESS) { 1316 EIB_DPRINTF_WARN(ss->ei_instance, 1317 "eib_vnic_create_common: " 1318 "eib_chan_post_rx(vn_id=0x%x, CTL_QP) failed, ret=%d", 1319 vnic->vn_id, *err); 1320 1321 *err = ENOMEM; 1322 goto vnic_create_common_fail; 1323 } 1324 progress |= EIB_VNIC_RX_POSTED_TO_CTLQP; 1325 1326 /* 1327 * Post initial set of rx buffers on the data qp to the HCA 1328 */ 1329 if (eib_chan_post_rx(ss, vnic->vn_data_chan, NULL) != EIB_E_SUCCESS) { 1330 EIB_DPRINTF_WARN(ss->ei_instance, 1331 "eib_vnic_create_common: " 1332 "eib_chan_post_rx(vn_id=0x%x, DATA_QP) failed, ret=%d", 1333 vnic->vn_id, *err); 1334 1335 *err = ENOMEM; 1336 goto vnic_create_common_fail; 1337 } 1338 progress |= EIB_VNIC_RX_POSTED_TO_DATAQP; 1339 1340 /* 1341 * Attach to the vHUB table and vHUB update multicast groups 1342 */ 1343 if (eib_vnic_attach_ctl_mcgs(ss, vnic, err) != EIB_E_SUCCESS) { 1344 EIB_DPRINTF_WARN(ss->ei_instance, 1345 "eib_vnic_create_common: " 1346 "eib_vnic_attach_ctl_mcgs(vn_id=0x%x) failed, ret=%d", 1347 vnic->vn_id, *err); 1348 goto vnic_create_common_fail; 1349 } 1350 progress |= EIB_VNIC_ATTACHED_TO_CTL_MCGS; 1351 1352 /* 1353 * Send the vHUB table request and construct the vhub table 1354 */ 1355 if (eib_fip_vhub_table(ss, vnic, err) != EIB_E_SUCCESS) { 1356 EIB_DPRINTF_WARN(ss->ei_instance, 1357 "eib_vnic_create_common: " 1358 "eib_fip_vhub_table(vn_id=0x%x) failed, ret=%d", 1359 vnic->vn_id, *err); 1360 goto vnic_create_common_fail; 1361 } 1362 progress |= EIB_VNIC_GOT_VHUB_TABLE; 1363 1364 /* 1365 * Detach from the vHUB table mcg (we no longer need the vHUB 1366 * table messages) and start the keepalives for this vnic. 1367 */ 1368 eib_vnic_start_keepalives(ss, vnic); 1369 eib_rb_vnic_attach_vhub_table(ss, vnic); 1370 1371 progress |= EIB_VNIC_KEEPALIVES_STARTED; 1372 1373 /* 1374 * All ethernet vnics are automatically members of the broadcast 1375 * group for the vlan they are participating in, so join the 1376 * ethernet broadcast group. Note that when we restart vnics, 1377 * we rejoin the mcgs, so we pass B_TRUE to eib_vnic_join_data_mcg(). 1378 */ 1379 if (eib_vnic_join_data_mcg(ss, vnic, eib_broadcast_mac, B_TRUE, 1380 err) != EIB_E_SUCCESS) { 1381 EIB_DPRINTF_WARN(ss->ei_instance, 1382 "eib_vnic_create_common: " 1383 "eib_vnic_join_data_mcg(vn_id=0x%x, BCAST_GROUP) failed, " 1384 "ret=%d", vnic->vn_id, *err); 1385 goto vnic_create_common_fail; 1386 } 1387 progress |= EIB_VNIC_BROADCAST_JOINED; 1388 1389 mutex_enter(&ss->ei_vnic_lock); 1390 if (ss->ei_vnic[vnic->vn_instance] == NULL) { 1391 ss->ei_vnic[vnic->vn_instance] = vnic; 1392 } 1393 ss->ei_vnic_pending = NULL; 1394 mutex_exit(&ss->ei_vnic_lock); 1395 1396 return (EIB_E_SUCCESS); 1397 1398 vnic_create_common_fail: 1399 eib_rb_vnic_create_common(ss, vnic, progress); 1400 return (EIB_E_FAILURE); 1401 } 1402 1403 static int 1404 eib_vnic_set_partition(eib_t *ss, eib_vnic_t *vnic, int *err) 1405 { 1406 int ret; 1407 1408 /* 1409 * Associate the control channel with the vhub partition 1410 */ 1411 ret = eib_ibt_modify_chan_pkey(ss, vnic->vn_ctl_chan, 1412 vnic->vn_login_data.ld_vhub_pkey); 1413 if (ret != EIB_E_SUCCESS) { 1414 EIB_DPRINTF_WARN(ss->ei_instance, 1415 "eib_vnic_set_partition: " 1416 "eib_ibt_modify_chan_pkey(vn_id=0x%x, CTL_CHAN, " 1417 "vhub_pkey=0x%x) failed", vnic->vn_id, 1418 vnic->vn_login_data.ld_vhub_pkey); 1419 *err = EINVAL; 1420 return (EIB_E_FAILURE); 1421 } 1422 1423 /* 1424 * Now, do the same thing for the data channel. Note that if a 1425 * failure happens, the channel state(s) are left as-is, since 1426 * it is pointless to try to change them back using the same 1427 * interfaces that have just failed. 1428 */ 1429 ret = eib_ibt_modify_chan_pkey(ss, vnic->vn_data_chan, 1430 vnic->vn_login_data.ld_vhub_pkey); 1431 if (ret != EIB_E_SUCCESS) { 1432 EIB_DPRINTF_WARN(ss->ei_instance, 1433 "eib_vnic_set_partition: " 1434 "eib_ibt_modify_chan_pkey(vn_id=0x%x, DATA_CHAN, " 1435 "vhub_pkey=0x%x) failed", vnic->vn_id, 1436 vnic->vn_login_data.ld_vhub_pkey); 1437 *err = EINVAL; 1438 return (EIB_E_FAILURE); 1439 } 1440 1441 return (EIB_E_SUCCESS); 1442 } 1443 1444 static void 1445 eib_vnic_make_vhub_mgid(uint8_t *mg_prefix, uint8_t mg_type, 1446 uint8_t *mcast_mac, uint8_t n_mac, uint8_t rss_hash, uint32_t vhub_id, 1447 ib_gid_t *mgid) 1448 { 1449 eib_mgid_t em; 1450 uint64_t dmac_mask; 1451 uint64_t dmac = 0; 1452 uint8_t *dmac_str = (uint8_t *)&dmac; 1453 uint_t vhub_id_nw; 1454 uint8_t *vhub_id_str = (uint8_t *)&vhub_id_nw; 1455 1456 /* 1457 * Copy mgid prefix and type 1458 */ 1459 bcopy(mg_prefix, em.gd_spec.sp_mgid_prefix, FIP_MGID_PREFIX_LEN); 1460 em.gd_spec.sp_type = mg_type; 1461 1462 /* 1463 * Take n_mac bits from mcast_mac and copy dmac 1464 */ 1465 bcopy(mcast_mac, dmac_str + 2, ETHERADDRL); 1466 dmac_mask = ((uint64_t)1 << n_mac) - 1; 1467 dmac_mask = htonll(dmac_mask); 1468 dmac &= dmac_mask; 1469 bcopy(dmac_str + 2, em.gd_spec.sp_dmac, ETHERADDRL); 1470 1471 /* 1472 * Copy rss hash and prepare vhub id from gw port id and vlan 1473 */ 1474 em.gd_spec.sp_rss_hash = rss_hash; 1475 1476 vhub_id_nw = htonl(vhub_id); 1477 bcopy(vhub_id_str + 1, em.gd_spec.sp_vhub_id, FIP_VHUBID_LEN); 1478 1479 /* 1480 * Ok, now we've assembled the mgid as per EoIB spec. We now have to 1481 * represent it in the way Solaris IBTF wants it and return (sigh). 1482 */ 1483 mgid->gid_prefix = ntohll(em.gd_sol.gid_prefix); 1484 mgid->gid_guid = ntohll(em.gd_sol.gid_guid); 1485 } 1486 1487 static int 1488 eib_vnic_attach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic, int *err) 1489 { 1490 /* 1491 * Get tb_vhub_table and tb_vhub_update allocated and ready before 1492 * attaching to the vhub table and vhub update mcgs 1493 */ 1494 eib_vnic_init_tables(ss, vnic); 1495 1496 if (eib_vnic_attach_vhub_update(ss, vnic) != EIB_E_SUCCESS) { 1497 EIB_DPRINTF_WARN(ss->ei_instance, 1498 "eib_vnic_attach_ctl_mcgs: " 1499 "eib_vnic_attach_vhub_update(vn_id=0x%x) failed", 1500 vnic->vn_id); 1501 1502 *err = EINVAL; 1503 eib_vnic_fini_tables(ss, vnic, B_TRUE); 1504 return (EIB_E_FAILURE); 1505 } 1506 1507 if (eib_vnic_attach_vhub_table(ss, vnic) != EIB_E_SUCCESS) { 1508 EIB_DPRINTF_WARN(ss->ei_instance, 1509 "eib_vnic_attach_ctl_mcgs: " 1510 "eib_vnic_attach_vhub_table(vn_id=0x%x) failed", 1511 vnic->vn_id); 1512 1513 *err = EINVAL; 1514 eib_rb_vnic_attach_vhub_update(ss, vnic); 1515 eib_vnic_fini_tables(ss, vnic, B_TRUE); 1516 return (EIB_E_FAILURE); 1517 } 1518 1519 return (EIB_E_SUCCESS); 1520 } 1521 1522 static int 1523 eib_vnic_attach_vhub_table(eib_t *ss, eib_vnic_t *vnic) 1524 { 1525 eib_chan_t *chan = vnic->vn_ctl_chan; 1526 eib_login_data_t *ld = &vnic->vn_login_data; 1527 eib_mcg_t *mcg; 1528 ibt_mcg_info_t *tbl_mcginfo; 1529 ibt_mcg_attr_t mcg_attr; 1530 ibt_status_t ret; 1531 uint_t entries; 1532 1533 /* 1534 * Compose the MGID for receiving VHUB table 1535 */ 1536 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t)); 1537 1538 eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix, 1539 (uint8_t)EIB_MGID_VHUB_TABLE, eib_broadcast_mac, ld->ld_n_mac_mcgid, 1540 0, ld->ld_vhub_id, &(mcg_attr.mc_mgid)); 1541 mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey; 1542 mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY; 1543 1544 /* 1545 * Locate the multicast group for receiving vhub table 1546 */ 1547 ret = ibt_query_mcg(ss->ei_props->ep_sgid, &mcg_attr, 1, 1548 &tbl_mcginfo, &entries); 1549 if (ret != IBT_SUCCESS) { 1550 EIB_DPRINTF_WARN(ss->ei_instance, 1551 "eib_vnic_attach_vhub_table: " 1552 "ibt_query_mcg(mgid=%llx.%llx, pkey=0x%x) failed, " 1553 "ret=%d", mcg_attr.mc_mgid.gid_prefix, 1554 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, ret); 1555 return (EIB_E_FAILURE); 1556 } 1557 1558 /* 1559 * Allocate for and prepare the mcg to add to our list 1560 */ 1561 mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP); 1562 if (mcg == NULL) { 1563 EIB_DPRINTF_WARN(ss->ei_instance, 1564 "eib_vnic_attach_vhub_table: " 1565 "no memory, failed to attach to vhub table " 1566 "(mgid=%llx.%llx, pkey=0x%x)", mcg_attr.mc_mgid.gid_prefix, 1567 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey); 1568 ibt_free_mcg_info(tbl_mcginfo, 1); 1569 return (EIB_E_FAILURE); 1570 } 1571 1572 mcg->mg_next = NULL; 1573 mcg->mg_rgid = ss->ei_props->ep_sgid; 1574 mcg->mg_mgid = mcg_attr.mc_mgid; 1575 mcg->mg_join_state = IB_MC_JSTATE_FULL; 1576 mcg->mg_mcginfo = tbl_mcginfo; 1577 bcopy(eib_broadcast_mac, mcg->mg_mac, ETHERADDRL); 1578 1579 /* 1580 * Join the multicast group 1581 */ 1582 mcg_attr.mc_join_state = mcg->mg_join_state; 1583 mcg_attr.mc_flow = tbl_mcginfo->mc_adds_vect.av_flow; 1584 mcg_attr.mc_tclass = tbl_mcginfo->mc_adds_vect.av_tclass; 1585 mcg_attr.mc_sl = tbl_mcginfo->mc_adds_vect.av_srvl; 1586 mcg_attr.mc_scope = 0; /* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */ 1587 1588 ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, tbl_mcginfo, NULL, NULL); 1589 if (ret != IBT_SUCCESS) { 1590 EIB_DPRINTF_WARN(ss->ei_instance, 1591 "eib_vnic_attach_vhub_table: " 1592 "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, jstate=0x%x) " 1593 "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix, 1594 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, 1595 mcg_attr.mc_join_state, ret); 1596 1597 kmem_free(mcg, sizeof (eib_mcg_t)); 1598 ibt_free_mcg_info(tbl_mcginfo, 1); 1599 return (EIB_E_FAILURE); 1600 } 1601 1602 /* 1603 * Attach to the multicast group to receive tbl multicasts 1604 */ 1605 ret = ibt_attach_mcg(chan->ch_chan, tbl_mcginfo); 1606 if (ret != IBT_SUCCESS) { 1607 EIB_DPRINTF_WARN(ss->ei_instance, 1608 "eib_vnic_attach_vhub_table: " 1609 "ibt_attach_mcg(mgid=%llx.%llx, pkey=0x%x) " 1610 "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix, 1611 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey); 1612 1613 (void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 1614 eib_reserved_gid, mcg->mg_join_state); 1615 kmem_free(mcg, sizeof (eib_mcg_t)); 1616 ibt_free_mcg_info(tbl_mcginfo, 1); 1617 return (EIB_E_FAILURE); 1618 } 1619 1620 mutex_enter(&chan->ch_vhub_lock); 1621 chan->ch_vhub_table = mcg; 1622 mutex_exit(&chan->ch_vhub_lock); 1623 1624 return (EIB_E_SUCCESS); 1625 } 1626 1627 static int 1628 eib_vnic_attach_vhub_update(eib_t *ss, eib_vnic_t *vnic) 1629 { 1630 eib_chan_t *chan = vnic->vn_ctl_chan; 1631 eib_login_data_t *ld = &vnic->vn_login_data; 1632 eib_mcg_t *mcg; 1633 ibt_mcg_info_t *upd_mcginfo; 1634 ibt_mcg_attr_t mcg_attr; 1635 ibt_status_t ret; 1636 uint_t entries; 1637 1638 /* 1639 * Compose the MGID for receiving VHUB updates 1640 */ 1641 bzero(&mcg_attr, sizeof (ibt_mcg_attr_t)); 1642 1643 eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix, 1644 (uint8_t)EIB_MGID_VHUB_UPDATE, eib_broadcast_mac, 1645 ld->ld_n_mac_mcgid, 0, ld->ld_vhub_id, &(mcg_attr.mc_mgid)); 1646 mcg_attr.mc_pkey = (ib_pkey_t)ld->ld_vhub_pkey; 1647 mcg_attr.mc_qkey = (ib_qkey_t)EIB_FIP_QKEY; 1648 1649 /* 1650 * Locate the multicast group for receiving vhub updates 1651 */ 1652 ret = ibt_query_mcg(ss->ei_props->ep_sgid, &mcg_attr, 1, 1653 &upd_mcginfo, &entries); 1654 if (ret != IBT_SUCCESS) { 1655 EIB_DPRINTF_WARN(ss->ei_instance, 1656 "eib_vnic_attach_vhub_update: " 1657 "ibt_query_mcg(mgid=%llx.%llx, pkey=0x%x) failed, " 1658 "ret=%d", mcg_attr.mc_mgid.gid_prefix, 1659 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, ret); 1660 return (EIB_E_FAILURE); 1661 } 1662 1663 /* 1664 * Allocate for and prepare the mcg to add to our list 1665 */ 1666 mcg = kmem_zalloc(sizeof (eib_mcg_t), KM_NOSLEEP); 1667 if (mcg == NULL) { 1668 EIB_DPRINTF_WARN(ss->ei_instance, 1669 "eib_vnic_attach_vhub_update: " 1670 "no memory, failed to attach to vhub update " 1671 "(mgid=%llx.%llx, pkey=0x%x)", mcg_attr.mc_mgid.gid_prefix, 1672 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey); 1673 1674 ibt_free_mcg_info(upd_mcginfo, 1); 1675 return (EIB_E_FAILURE); 1676 } 1677 1678 mcg->mg_next = NULL; 1679 mcg->mg_rgid = ss->ei_props->ep_sgid; 1680 mcg->mg_mgid = mcg_attr.mc_mgid; 1681 mcg->mg_join_state = IB_MC_JSTATE_FULL; 1682 mcg->mg_mcginfo = upd_mcginfo; 1683 bcopy(eib_broadcast_mac, mcg->mg_mac, ETHERADDRL); 1684 1685 /* 1686 * Join the multicast group 1687 */ 1688 mcg_attr.mc_join_state = mcg->mg_join_state; 1689 mcg_attr.mc_flow = upd_mcginfo->mc_adds_vect.av_flow; 1690 mcg_attr.mc_tclass = upd_mcginfo->mc_adds_vect.av_tclass; 1691 mcg_attr.mc_sl = upd_mcginfo->mc_adds_vect.av_srvl; 1692 mcg_attr.mc_scope = 0; /* IB_MC_SCOPE_SUBNET_LOCAL perhaps ? */ 1693 1694 ret = ibt_join_mcg(mcg->mg_rgid, &mcg_attr, upd_mcginfo, NULL, NULL); 1695 if (ret != IBT_SUCCESS) { 1696 EIB_DPRINTF_WARN(ss->ei_instance, 1697 "eib_vnic_attach_vhub_update: " 1698 "ibt_join_mcg(mgid=%llx.%llx, pkey=0x%x, jstate=0x%x) " 1699 "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix, 1700 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey, 1701 mcg_attr.mc_join_state, ret); 1702 1703 kmem_free(mcg, sizeof (eib_mcg_t)); 1704 ibt_free_mcg_info(upd_mcginfo, 1); 1705 return (EIB_E_FAILURE); 1706 } 1707 1708 /* 1709 * Attach to the multicast group to receive upd multicasts 1710 */ 1711 ret = ibt_attach_mcg(chan->ch_chan, upd_mcginfo); 1712 if (ret != IBT_SUCCESS) { 1713 EIB_DPRINTF_WARN(ss->ei_instance, 1714 "eib_vnic_attach_vhub_update: " 1715 "ibt_attach_mcg(mgid=%llx.%llx, pkey=0x%x) " 1716 "failed, ret=%d", mcg_attr.mc_mgid.gid_prefix, 1717 mcg_attr.mc_mgid.gid_guid, mcg_attr.mc_pkey); 1718 1719 (void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 1720 eib_reserved_gid, mcg->mg_join_state); 1721 kmem_free(mcg, sizeof (eib_mcg_t)); 1722 ibt_free_mcg_info(upd_mcginfo, 1); 1723 return (EIB_E_FAILURE); 1724 } 1725 1726 mutex_enter(&chan->ch_vhub_lock); 1727 chan->ch_vhub_update = mcg; 1728 mutex_exit(&chan->ch_vhub_lock); 1729 1730 return (EIB_E_SUCCESS); 1731 } 1732 1733 static void 1734 eib_vnic_start_keepalives(eib_t *ss, eib_vnic_t *vnic) 1735 { 1736 eib_ka_vnics_t *kav; 1737 eib_ka_vnics_t *elem; 1738 int err; 1739 1740 kav = kmem_zalloc(sizeof (eib_ka_vnics_t), KM_SLEEP); 1741 kav->ka_vnic = vnic; 1742 kav->ka_next = NULL; 1743 1744 /* 1745 * Send the first keepalive and then queue this vnic up with 1746 * the keepalives manager 1747 */ 1748 (void) eib_fip_heartbeat(ss, vnic, &err); 1749 1750 mutex_enter(&ss->ei_ka_vnics_lock); 1751 for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) { 1752 if (elem->ka_next == NULL) 1753 break; 1754 } 1755 if (elem) { 1756 elem->ka_next = kav; 1757 } else { 1758 ss->ei_ka_vnics = kav; 1759 } 1760 mutex_exit(&ss->ei_ka_vnics_lock); 1761 } 1762 1763 /*ARGSUSED*/ 1764 static int 1765 eib_vnic_lookup_dest(eib_vnic_t *vnic, uint8_t *dmac, uint16_t vlan, 1766 eib_vhub_map_t *ucast, ibt_mcg_info_t *mcast, int *dtype) 1767 { 1768 eib_t *ss = vnic->vn_ss; 1769 eib_vhub_map_t *elem; 1770 eib_mcg_t *mcg; 1771 eib_chan_t *chan = vnic->vn_data_chan; 1772 eib_login_data_t *ld = &vnic->vn_login_data; 1773 eib_vhub_map_t *gw; 1774 eib_vhub_table_t *tbl; 1775 uint8_t bkt = (dmac[ETHERADDRL-1]) % EIB_TB_NBUCKETS; 1776 ib_gid_t mgid; 1777 1778 /* 1779 * If this was a unicast dmac, locate the vhub entry matching the 1780 * unicast dmac in our vhub table. If it's not found, return the 1781 * gateway entry 1782 */ 1783 if (EIB_UNICAST_MAC(dmac)) { 1784 1785 mutex_enter(&vnic->vn_lock); 1786 if ((tbl = vnic->vn_vhub_table) == NULL) { 1787 mutex_exit(&vnic->vn_lock); 1788 return (EIB_E_FAILURE); 1789 } 1790 1791 mutex_enter(&tbl->tb_lock); 1792 gw = tbl->tb_gateway; 1793 for (elem = tbl->tb_vnic_entry[bkt]; elem != NULL; 1794 elem = elem->mp_next) { 1795 if (bcmp(elem->mp_mac, dmac, ETHERADDRL) == 0) 1796 break; 1797 } 1798 mutex_exit(&tbl->tb_lock); 1799 1800 if ((elem == NULL) && (gw == NULL)) { 1801 mutex_exit(&vnic->vn_lock); 1802 return (EIB_E_FAILURE); 1803 } 1804 1805 *dtype = EIB_TX_UNICAST; 1806 if (elem) { 1807 bcopy(elem, ucast, sizeof (eib_vhub_map_t)); 1808 } else { 1809 bcopy(gw, ucast, sizeof (eib_vhub_map_t)); 1810 } 1811 mutex_exit(&vnic->vn_lock); 1812 1813 return (EIB_E_SUCCESS); 1814 } 1815 1816 /* 1817 * Is it a broadcast ? 1818 */ 1819 *dtype = (bcmp(dmac, eib_broadcast_mac, ETHERADDRL) == 0) ? 1820 EIB_TX_BROADCAST : EIB_TX_MULTICAST; 1821 1822 /* 1823 * If this was a multicast dmac, prepare the mgid and look for it 1824 * in the list of mcgs we've joined and use the address vector from 1825 * the mcginfo stored there. 1826 * 1827 * Note that since we don't have a way to associate each vlan with 1828 * the mcg (see eib_m_multicast()), we'll prepare the mgid to use 1829 * the broadcast channel all the time. 1830 */ 1831 eib_vnic_make_vhub_mgid(ld->ld_gw_mgid_prefix, 1832 (uint8_t)EIB_MGID_VHUB_DATA, eib_broadcast_mac, ld->ld_n_mac_mcgid, 1833 0, ld->ld_vhub_id, &mgid); 1834 1835 mutex_enter(&chan->ch_vhub_lock); 1836 for (mcg = chan->ch_vhub_data; mcg; mcg = mcg->mg_next) { 1837 if ((mcg->mg_mgid.gid_prefix == mgid.gid_prefix) && 1838 (mcg->mg_mgid.gid_guid == mgid.gid_guid)) { 1839 break; 1840 } 1841 } 1842 if (mcg == NULL) { 1843 mutex_exit(&chan->ch_vhub_lock); 1844 1845 EIB_DPRINTF_WARN(ss->ei_instance, "eib_vnic_lookup_dest: " 1846 "could not find mgid %llx.%llx", 1847 mgid.gid_prefix, mgid.gid_guid); 1848 1849 return (EIB_E_FAILURE); 1850 } 1851 1852 bcopy(mcg->mg_mcginfo, mcast, sizeof (ibt_mcg_info_t)); 1853 mutex_exit(&chan->ch_vhub_lock); 1854 1855 return (EIB_E_SUCCESS); 1856 } 1857 1858 /*ARGSUSED*/ 1859 static void 1860 eib_vnic_leave_all_data_mcgs(eib_t *ss, eib_vnic_t *vnic) 1861 { 1862 eib_chan_t *chan = vnic->vn_data_chan; 1863 eib_mcg_t *mcglist; 1864 eib_mcg_t *mcg; 1865 eib_mcg_t *nxt = NULL; 1866 ibt_status_t ret; 1867 1868 /* 1869 * First, take the ch_vhub_data mcg chain out of chan 1870 */ 1871 mutex_enter(&chan->ch_vhub_lock); 1872 mcglist = chan->ch_vhub_data; 1873 chan->ch_vhub_data = NULL; 1874 mutex_exit(&chan->ch_vhub_lock); 1875 1876 /* 1877 * Go through the chain of mcgs we've joined, detach the qp from the 1878 * mcg, leave the group and free all associated stuff 1879 */ 1880 for (mcg = mcglist; mcg != NULL; mcg = nxt) { 1881 nxt = mcg->mg_next; 1882 1883 ret = ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo); 1884 if (ret != IBT_SUCCESS) { 1885 EIB_DPRINTF_WARN(ss->ei_instance, 1886 "eib_vnic_leave_all_data_mcgs: " 1887 "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, " 1888 "mgid=%llx.%llx) failed, ret=%d", chan->ch_chan, 1889 mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix, 1890 mcg->mg_mgid.gid_guid, ret); 1891 } 1892 1893 ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 1894 eib_reserved_gid, mcg->mg_join_state); 1895 if (ret != IBT_SUCCESS) { 1896 EIB_DPRINTF_WARN(ss->ei_instance, 1897 "eib_vnic_leave_all_data_mcgs: " 1898 "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) " 1899 "failed, ret=%d", mcg->mg_mgid.gid_prefix, 1900 mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret); 1901 } 1902 1903 if (mcg->mg_mcginfo) 1904 kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t)); 1905 1906 kmem_free(mcg, sizeof (eib_mcg_t)); 1907 } 1908 } 1909 1910 static void 1911 eib_vnic_rejoin_data_mcgs(eib_t *ss, eib_vnic_t *vnic) 1912 { 1913 eib_chan_t *chan = vnic->vn_data_chan; 1914 eib_mcg_t *mcglist; 1915 eib_mcg_t *mcg; 1916 eib_mcg_t *next; 1917 int err; 1918 1919 /* 1920 * Grab the current list of mcgs 1921 */ 1922 mutex_enter(&chan->ch_vhub_lock); 1923 mcglist = chan->ch_vhub_data; 1924 chan->ch_vhub_data = NULL; 1925 mutex_exit(&chan->ch_vhub_lock); 1926 1927 /* 1928 * When rejoin data mcgs is called, we may not even be marked as 1929 * joined in SM's records. But we still have to leave the old 1930 * one first to prevent leaks in ibtf. 1931 */ 1932 for (mcg = mcglist; mcg != NULL; mcg = next) { 1933 next = mcg->mg_next; 1934 mcg->mg_next = NULL; 1935 1936 (void) ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo); 1937 (void) ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 1938 eib_reserved_gid, mcg->mg_join_state); 1939 1940 if (eib_vnic_join_data_mcg(ss, vnic, mcg->mg_mac, B_TRUE, 1941 &err) != EIB_E_SUCCESS) { 1942 uint8_t *m; 1943 1944 m = mcg->mg_mac; 1945 EIB_DPRINTF_WARN(ss->ei_instance, 1946 "eib_vnic_rejoin_data_mcgs: " 1947 "eib_vnic_join_data_mcg(mcmac=%x:%x:%x:%x:%x:%x) " 1948 "failed, ret=%d", m[0], m[1], m[2], m[3], 1949 m[4], m[5], err); 1950 } 1951 if (mcg->mg_mcginfo) { 1952 kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t)); 1953 } 1954 kmem_free(mcg, sizeof (eib_mcg_t)); 1955 } 1956 } 1957 1958 static void 1959 eib_vnic_reattach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic) 1960 { 1961 /* 1962 * For reattaching to control mcgs, we will not reinitialize the 1963 * vhub table/vhub update we've constructed. We'll simply detach 1964 * from the table and update mcgs and reattach to them. Hopefully, 1965 * we wouldn't have missed any updates and won't have to restart 1966 * the vnic. 1967 */ 1968 eib_rb_vnic_attach_vhub_table(ss, vnic); 1969 eib_rb_vnic_attach_vhub_update(ss, vnic); 1970 1971 if (eib_vnic_attach_vhub_update(ss, vnic) != EIB_E_SUCCESS) { 1972 EIB_DPRINTF_WARN(ss->ei_instance, 1973 "eib_vnic_reattach_ctl_mcgs: " 1974 "eib_vnic_attach_vhub_update(vn_id=0x%x) failed", 1975 vnic->vn_id); 1976 } 1977 1978 if (eib_vnic_attach_vhub_table(ss, vnic) != EIB_E_SUCCESS) { 1979 EIB_DPRINTF_WARN(ss->ei_instance, 1980 "eib_vnic_reattach_ctl_mcgs: " 1981 "eib_vnic_attach_vhub_table(vn_id=0x%x) failed", 1982 vnic->vn_id); 1983 1984 eib_rb_vnic_attach_vhub_update(ss, vnic); 1985 } 1986 } 1987 1988 static void 1989 eib_rb_vnic_create_common(eib_t *ss, eib_vnic_t *vnic, uint_t progress) 1990 { 1991 int err; 1992 1993 mutex_enter(&ss->ei_vnic_lock); 1994 ss->ei_vnic[vnic->vn_instance] = NULL; 1995 ss->ei_vnic_pending = NULL; 1996 mutex_exit(&ss->ei_vnic_lock); 1997 1998 if (progress & EIB_VNIC_BROADCAST_JOINED) { 1999 eib_vnic_leave_all_data_mcgs(ss, vnic); 2000 } 2001 2002 if (progress & EIB_VNIC_KEEPALIVES_STARTED) { 2003 eib_rb_vnic_start_keepalives(ss, vnic); 2004 } 2005 2006 if (progress & EIB_VNIC_ATTACHED_TO_CTL_MCGS) { 2007 eib_rb_vnic_attach_ctl_mcgs(ss, vnic); 2008 } 2009 2010 if (progress & EIB_VNIC_LOGIN_DONE) { 2011 (void) eib_fip_logout(ss, vnic, &err); 2012 } 2013 2014 if (progress & EIB_VNIC_DATAQP_CREATED) { 2015 eib_rb_data_create_qp(ss, vnic); 2016 } 2017 2018 if (progress & EIB_VNIC_CTLQP_CREATED) { 2019 eib_rb_ctl_create_qp(ss, vnic); 2020 } 2021 } 2022 2023 static void 2024 eib_rb_vnic_attach_ctl_mcgs(eib_t *ss, eib_vnic_t *vnic) 2025 { 2026 /* 2027 * Detach from the vhub table and vhub update mcgs before blowing 2028 * up vn_vhub_table and vn_vhub_update, since these are assumed to 2029 * be available by the control cq handler. 2030 */ 2031 eib_rb_vnic_attach_vhub_table(ss, vnic); 2032 eib_rb_vnic_attach_vhub_update(ss, vnic); 2033 eib_vnic_fini_tables(ss, vnic, B_TRUE); 2034 } 2035 2036 /*ARGSUSED*/ 2037 static void 2038 eib_rb_vnic_attach_vhub_table(eib_t *ss, eib_vnic_t *vnic) 2039 { 2040 eib_chan_t *chan = vnic->vn_ctl_chan; 2041 eib_mcg_t *mcg; 2042 ibt_channel_hdl_t chan_hdl; 2043 ibt_status_t ret; 2044 2045 if (chan == NULL) 2046 return; 2047 2048 mutex_enter(&chan->ch_vhub_lock); 2049 chan_hdl = chan->ch_chan; 2050 mcg = chan->ch_vhub_table; 2051 chan->ch_vhub_table = NULL; 2052 mutex_exit(&chan->ch_vhub_lock); 2053 2054 if (chan_hdl && mcg) { 2055 ret = ibt_detach_mcg(chan_hdl, mcg->mg_mcginfo); 2056 if (ret != IBT_SUCCESS) { 2057 EIB_DPRINTF_WARN(ss->ei_instance, 2058 "eib_rb_vnic_attach_vhub_table: " 2059 "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, " 2060 "mgid=%llx.%llx) failed, ret=%d", chan_hdl, 2061 mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix, 2062 mcg->mg_mgid.gid_guid, ret); 2063 } 2064 2065 ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 2066 eib_reserved_gid, mcg->mg_join_state); 2067 if (ret != IBT_SUCCESS) { 2068 EIB_DPRINTF_WARN(ss->ei_instance, 2069 "eib_rb_vnic_attach_vhub_table: " 2070 "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) " 2071 "failed, ret=%d", mcg->mg_mgid.gid_prefix, 2072 mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret); 2073 } 2074 2075 if (mcg->mg_mcginfo) { 2076 ibt_free_mcg_info(mcg->mg_mcginfo, 1); 2077 } 2078 kmem_free(mcg, sizeof (eib_mcg_t)); 2079 } 2080 } 2081 2082 /*ARGSUSED*/ 2083 static void 2084 eib_rb_vnic_attach_vhub_update(eib_t *ss, eib_vnic_t *vnic) 2085 { 2086 eib_chan_t *chan = vnic->vn_ctl_chan; 2087 eib_mcg_t *mcg; 2088 ibt_channel_hdl_t chan_hdl; 2089 ibt_status_t ret; 2090 2091 if (chan == NULL) 2092 return; 2093 2094 mutex_enter(&chan->ch_vhub_lock); 2095 chan_hdl = chan->ch_chan; 2096 mcg = chan->ch_vhub_update; 2097 chan->ch_vhub_update = NULL; 2098 mutex_exit(&chan->ch_vhub_lock); 2099 2100 if (chan_hdl && mcg) { 2101 ret = ibt_detach_mcg(chan_hdl, mcg->mg_mcginfo); 2102 if (ret != IBT_SUCCESS) { 2103 EIB_DPRINTF_WARN(ss->ei_instance, 2104 "eib_rb_vnic_attach_vhub_update: " 2105 "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, " 2106 "mgid=%llx.%llx) failed, ret=%d", chan_hdl, 2107 mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix, 2108 mcg->mg_mgid.gid_guid, ret); 2109 } 2110 2111 ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, 2112 eib_reserved_gid, mcg->mg_join_state); 2113 if (ret != IBT_SUCCESS) { 2114 EIB_DPRINTF_WARN(ss->ei_instance, 2115 "eib_rb_vnic_attach_vhub_update: " 2116 "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) " 2117 "failed, ret=%d", mcg->mg_mgid.gid_prefix, 2118 mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret); 2119 } 2120 2121 if (mcg->mg_mcginfo) { 2122 ibt_free_mcg_info(mcg->mg_mcginfo, 1); 2123 } 2124 kmem_free(mcg, sizeof (eib_mcg_t)); 2125 } 2126 } 2127 2128 /*ARGSUSED*/ 2129 static void 2130 eib_rb_vnic_start_keepalives(eib_t *ss, eib_vnic_t *vnic) 2131 { 2132 eib_ka_vnics_t *prev; 2133 eib_ka_vnics_t *elem; 2134 2135 /* 2136 * We only need to locate and remove the vnic entry from the 2137 * keepalives manager list 2138 */ 2139 2140 mutex_enter(&ss->ei_ka_vnics_lock); 2141 2142 prev = NULL; 2143 for (elem = ss->ei_ka_vnics; elem; elem = elem->ka_next) { 2144 if (elem->ka_vnic == vnic) 2145 break; 2146 2147 prev = elem; 2148 } 2149 if (elem == NULL) { 2150 EIB_DPRINTF_DEBUG(ss->ei_instance, 2151 "eib_rb_vnic_start_keepalives: no keepalive element found " 2152 "for vnic 0x%llx (vn_inst=%d) with keepalive manager", 2153 vnic, vnic->vn_instance); 2154 } else { 2155 if (prev) { 2156 prev->ka_next = elem->ka_next; 2157 } else { 2158 ss->ei_ka_vnics = elem->ka_next; 2159 } 2160 kmem_free(elem, sizeof (eib_ka_vnics_t)); 2161 } 2162 mutex_exit(&ss->ei_ka_vnics_lock); 2163 } 2164 2165 /*ARGSUSED*/ 2166 static void 2167 eib_rb_vnic_join_data_mcg(eib_t *ss, eib_vnic_t *vnic, uint8_t *mcast_mac) 2168 { 2169 eib_chan_t *chan = vnic->vn_data_chan; 2170 eib_mcg_t *prev; 2171 eib_mcg_t *mcg; 2172 ibt_status_t ret; 2173 2174 /* 2175 * Search our list and remove the item if found 2176 */ 2177 mutex_enter(&chan->ch_vhub_lock); 2178 2179 prev = NULL; 2180 for (mcg = chan->ch_vhub_data; mcg != NULL; mcg = mcg->mg_next) { 2181 if (bcmp(mcg->mg_mac, mcast_mac, ETHERADDRL) == 0) 2182 break; 2183 prev = mcg; 2184 } 2185 2186 if (mcg == NULL) { 2187 mutex_exit(&chan->ch_vhub_lock); 2188 return; 2189 } 2190 2191 if (prev != NULL) 2192 prev->mg_next = mcg->mg_next; 2193 else 2194 chan->ch_vhub_data = mcg->mg_next; 2195 2196 mcg->mg_next = NULL; 2197 2198 mutex_exit(&chan->ch_vhub_lock); 2199 2200 /* 2201 * Detach data channel qp from the mcg, leave the group and free 2202 * all associated stuff 2203 */ 2204 ret = ibt_detach_mcg(chan->ch_chan, mcg->mg_mcginfo); 2205 if (ret != IBT_SUCCESS) { 2206 EIB_DPRINTF_WARN(ss->ei_instance, 2207 "eib_rb_vnic_join_data_mcg: " 2208 "ibt_detach_mcg(chan_hdl=0x%llx, mcinfo=0x%llx, " 2209 "mgid=%llx.%llx) failed, ret=%d", chan->ch_chan, 2210 mcg->mg_mcginfo, mcg->mg_mgid.gid_prefix, 2211 mcg->mg_mgid.gid_guid, ret); 2212 } 2213 2214 ret = ibt_leave_mcg(mcg->mg_rgid, mcg->mg_mgid, eib_reserved_gid, 2215 mcg->mg_join_state); 2216 if (ret != IBT_SUCCESS) { 2217 EIB_DPRINTF_WARN(ss->ei_instance, 2218 "eib_rb_vnic_join_data_mcg: " 2219 "ibt_leave_mcg(mgid=%llx.%llx, jstate=0x%x) " 2220 "failed, ret=%d", mcg->mg_mgid.gid_prefix, 2221 mcg->mg_mgid.gid_guid, mcg->mg_join_state, ret); 2222 } 2223 2224 if (mcg->mg_mcginfo) 2225 kmem_free(mcg->mg_mcginfo, sizeof (ibt_mcg_info_t)); 2226 2227 kmem_free(mcg, sizeof (eib_mcg_t)); 2228 } 2229