1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/sunddi.h> 29 #include <sys/ksynch.h> 30 #include <sys/byteorder.h> 31 32 #include <sys/ib/clients/eoib/eib_impl.h> 33 34 /* 35 * Declarations private to this file 36 */ 37 static int eib_fip_make_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 38 static int eib_fip_make_update(eib_t *, eib_vnic_t *, eib_wqe_t *, int, int *); 39 static int eib_fip_make_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 40 static int eib_fip_make_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 41 static int eib_fip_make_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 42 43 static int eib_fip_send_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 44 static int eib_fip_send_update(eib_t *, eib_vnic_t *, eib_wqe_t *, 45 uint_t, int *); 46 static int eib_fip_send_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 47 static int eib_fip_send_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 48 static int eib_fip_send_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *); 49 50 static int eib_fip_parse_vhub_table(uint8_t *, eib_vnic_t *); 51 static int eib_fip_parse_vhub_update(uint8_t *, eib_vnic_t *); 52 static void eib_fip_update_eport_state(eib_t *, eib_vhub_table_t *, 53 eib_vhub_update_t *, boolean_t, uint8_t); 54 static void eib_fip_queue_tbl_entry(eib_vhub_table_t *, eib_vhub_map_t *, 55 uint32_t, uint8_t); 56 static void eib_fip_queue_upd_entry(eib_vhub_update_t *, eib_vhub_map_t *, 57 uint32_t, uint8_t); 58 static void eib_fip_queue_gw_entry(eib_vnic_t *, eib_vhub_table_t *, uint32_t, 59 uint8_t); 60 static int eib_fip_apply_updates(eib_t *, eib_vhub_table_t *, 61 eib_vhub_update_t *); 62 static void eib_fip_dequeue_tbl_entry(eib_vhub_table_t *, uint8_t *, uint32_t, 63 uint8_t); 64 static eib_vhub_map_t *eib_fip_get_vhub_map(void); 65 66 /* 67 * Definitions private to this file 68 */ 69 const char eib_vendor_mellanox[] = { 70 0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78 71 }; 72 73 /* 74 * The three requests to the gateway - request a vHUB table, request a 75 * vHUB update (aka keepalive) and vNIC logout - all need the same 76 * vnic identity descriptor to be sent with different flag settings. 77 * 78 * vHUB table: R=1, U=0, TUSN=last, subcode=KEEPALIVE 79 * keepalive/vHUB update: R=0, U=1, TUSN=last, subcode=KEEPALIVE 80 * vNIC logout: R=0, U=0, TUSN=0, subcode=LOGOUT 81 */ 82 #define EIB_UPD_REQ_TABLE 1 83 #define EIB_UPD_REQ_KA 2 84 #define EIB_UPD_REQ_LOGOUT 3 85 86 int 87 eib_fip_login(eib_t *ss, eib_vnic_t *vnic, int *err) 88 { 89 eib_wqe_t *swqe; 90 int ret; 91 int ntries = 0; 92 93 do { 94 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) { 95 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_login: " 96 "no swqe available, not sending " 97 "vnic login request"); 98 *err = ENOMEM; 99 return (EIB_E_FAILURE); 100 } 101 102 ret = eib_fip_make_login(ss, vnic, swqe, err); 103 if (ret != EIB_E_SUCCESS) { 104 eib_rsrc_return_swqe(ss, swqe, NULL); 105 return (EIB_E_FAILURE); 106 } 107 108 ret = eib_fip_send_login(ss, vnic, swqe, err); 109 if (ret != EIB_E_SUCCESS) { 110 eib_rsrc_return_swqe(ss, swqe, NULL); 111 return (EIB_E_FAILURE); 112 } 113 114 ret = eib_vnic_wait_for_login_ack(ss, vnic, err); 115 if (ret == EIB_E_SUCCESS) 116 break; 117 118 } while ((*err == ETIME) && (ntries++ < EIB_MAX_LOGIN_ATTEMPTS)); 119 120 return (ret); 121 } 122 123 int 124 eib_fip_vhub_table(eib_t *ss, eib_vnic_t *vnic, int *err) 125 { 126 eib_wqe_t *swqe; 127 int ret; 128 int ntries = 0; 129 130 do { 131 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) { 132 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_vhub_table: " 133 "no swqe available, not sending " 134 "vhub table request"); 135 *err = ENOMEM; 136 return (EIB_E_FAILURE); 137 } 138 139 ret = eib_fip_make_table(ss, vnic, swqe, err); 140 if (ret != EIB_E_SUCCESS) { 141 eib_rsrc_return_swqe(ss, swqe, NULL); 142 return (EIB_E_FAILURE); 143 } 144 145 ret = eib_fip_send_table(ss, vnic, swqe, err); 146 if (ret != EIB_E_SUCCESS) { 147 eib_rsrc_return_swqe(ss, swqe, NULL); 148 return (EIB_E_FAILURE); 149 } 150 151 ret = eib_vnic_wait_for_table(ss, vnic, err); 152 if (ret == EIB_E_SUCCESS) { 153 return (EIB_E_SUCCESS); 154 } 155 156 /* 157 * If we'd failed in constructing a proper vhub table above, 158 * the vnic login state would be set to EIB_LOGIN_TBL_FAILED. 159 * We need to clean up any pending entries from the vhub 160 * table and vhub update structures and reset the vnic state 161 * to EIB_LOGIN_ACK_RCVD before we can try again. 162 */ 163 eib_vnic_fini_tables(ss, vnic, B_FALSE); 164 mutex_enter(&vnic->vn_lock); 165 vnic->vn_state = EIB_LOGIN_ACK_RCVD; 166 mutex_exit(&vnic->vn_lock); 167 168 } while ((*err == ETIME) && (ntries++ < EIB_MAX_VHUB_TBL_ATTEMPTS)); 169 170 return (EIB_E_FAILURE); 171 } 172 173 int 174 eib_fip_heartbeat(eib_t *ss, eib_vnic_t *vnic, int *err) 175 { 176 eib_wqe_t *swqe; 177 int ntries = 0; 178 int ret; 179 180 /* 181 * Even if we're running low on the wqe resource, we want to be 182 * able to grab a wqe to send the keepalive, to avoid getting 183 * logged out by the gateway, so we use EIB_WPRI_HI. 184 */ 185 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_HI)) == NULL) { 186 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_heartbeat: " 187 "no swqe available, not sending heartbeat"); 188 return (EIB_E_FAILURE); 189 } 190 191 while (ntries++ < EIB_MAX_KA_ATTEMPTS) { 192 ret = eib_fip_make_ka(ss, vnic, swqe, err); 193 if (ret != EIB_E_SUCCESS) 194 continue; 195 196 ret = eib_fip_send_ka(ss, vnic, swqe, err); 197 if (ret == EIB_E_SUCCESS) 198 break; 199 } 200 201 if (ret != EIB_E_SUCCESS) 202 eib_rsrc_return_swqe(ss, swqe, NULL); 203 204 return (ret); 205 } 206 207 int 208 eib_fip_logout(eib_t *ss, eib_vnic_t *vnic, int *err) 209 { 210 eib_wqe_t *swqe; 211 int ret; 212 213 /* 214 * This routine is only called after the vnic has successfully 215 * logged in to the gateway. If that's really the case, there 216 * is nothing in terms of resources we need to release: the swqe 217 * that was acquired during login has already been posted, the 218 * work has been completed and the swqe has also been reaped back 219 * into the free pool. The only thing we need to rollback is the 220 * fact that we're logged in to the gateway at all -- and the way 221 * to do this is to send a logout request. 222 */ 223 if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) { 224 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_logout: " 225 "no swqe available, not sending logout"); 226 return (EIB_E_FAILURE); 227 } 228 229 ret = eib_fip_make_logout(ss, vnic, swqe, err); 230 if (ret != EIB_E_SUCCESS) { 231 eib_rsrc_return_swqe(ss, swqe, NULL); 232 return (EIB_E_FAILURE); 233 } 234 235 ret = eib_fip_send_logout(ss, vnic, swqe, err); 236 if (ret != EIB_E_SUCCESS) { 237 eib_rsrc_return_swqe(ss, swqe, NULL); 238 return (EIB_E_FAILURE); 239 } 240 241 return (EIB_E_SUCCESS); 242 } 243 244 int 245 eib_fip_parse_login_ack(eib_t *ss, uint8_t *pkt, eib_login_data_t *ld) 246 { 247 fip_login_ack_t *ack; 248 fip_basic_hdr_t *hdr; 249 fip_desc_iba_t *iba; 250 fip_desc_vnic_login_t *login; 251 fip_desc_partition_t *partition; 252 ib_guid_t guid; 253 uint32_t syn_ctl_qpn; 254 uint16_t sl_portid; 255 uint16_t flags_vlan; 256 uint16_t opcode; 257 uint8_t subcode; 258 259 /* 260 * Note that 'pkt' is always atleast double-word aligned 261 * when it is passed to us, so we can cast it without any 262 * problems. 263 */ 264 ack = (fip_login_ack_t *)(void *)pkt; 265 hdr = &(ack->ak_fip_header); 266 267 /* 268 * Verify that the opcode is EoIB 269 */ 270 if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) { 271 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: " 272 "unsupported opcode 0x%x in login ack, ignoring", 273 opcode); 274 return (EIB_E_FAILURE); 275 } 276 277 /* 278 * The admin qp in the EoIB driver should receive only the login 279 * acknowledgements 280 */ 281 subcode = hdr->hd_subcode; 282 if (subcode != FIP_SUBCODE_G_VNIC_LOGIN_ACK) { 283 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: " 284 "unexpected subcode 0x%x received by adm qp, ignoring", 285 subcode); 286 return (EIB_E_FAILURE); 287 } 288 289 /* 290 * Verify if the descriptor list length in the received packet is 291 * valid if the workaround to disable it explicitly is absent. 292 */ 293 if (!eib_wa_no_desc_list_len) { 294 uint_t pkt_data_sz; 295 296 pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2; 297 if (pkt_data_sz < sizeof (fip_login_ack_t)) { 298 EIB_DPRINTF_WARN(ss->ei_instance, 299 "eib_fip_parse_login_ack: " 300 "login ack desc list len (0x%lx) too small " 301 "(min 0x%lx)", 302 pkt_data_sz, sizeof (fip_login_ack_t)); 303 return (EIB_E_FAILURE); 304 } 305 } 306 307 /* 308 * Validate all the header and descriptor types and lengths 309 */ 310 if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID || 311 hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) { 312 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: " 313 "invalid type/len in basic hdr: expected (0x%x,0x%x), " 314 "got (0x%x,0x%x)", FIP_DESC_TYPE_VENDOR_ID, 315 FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len); 316 return (EIB_E_FAILURE); 317 } 318 iba = &(ack->ak_iba); 319 if (iba->ia_type != FIP_DESC_TYPE_IBA || 320 iba->ia_len != FIP_DESC_LEN_IBA) { 321 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: " 322 "invalid type/len in iba desc: expected (0x%x,0x%x), " 323 "got (0x%x,0x%x)", FIP_DESC_TYPE_IBA, FIP_DESC_LEN_IBA, 324 iba->ia_type, iba->ia_len); 325 return (EIB_E_FAILURE); 326 } 327 login = &(ack->ak_vnic_login); 328 if (login->vl_type != FIP_DESC_TYPE_VNIC_LOGIN || 329 login->vl_len != FIP_DESC_LEN_VNIC_LOGIN) { 330 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: " 331 "invalid type/len in login desc: expected (0x%x,0x%x), " 332 "got (0x%x,0x%x)", FIP_DESC_TYPE_VNIC_LOGIN, 333 FIP_DESC_LEN_VNIC_LOGIN, login->vl_type, login->vl_len); 334 return (EIB_E_FAILURE); 335 } 336 partition = &(ack->ak_vhub_partition); 337 if (partition->pn_type != FIP_DESC_TYPE_PARTITION || 338 partition->pn_len != FIP_DESC_LEN_PARTITION) { 339 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: " 340 "invalid type/len in partition desc: expected (0x%x,0x%x), " 341 "got (0x%x,0x%x)", FIP_DESC_TYPE_PARTITION, 342 FIP_DESC_LEN_PARTITION, partition->pn_type, 343 partition->pn_len); 344 return (EIB_E_FAILURE); 345 } 346 347 /* 348 * Note that we'll return the vnic id as-is. The msb is not actually 349 * part of the vnic id in our internal records, so we'll mask it out 350 * later before we do our searches. 351 */ 352 ld->ld_vnic_id = ntohs(login->vl_vnic_id); 353 354 syn_ctl_qpn = ntohl(login->vl_syndrome_ctl_qpn); 355 356 /* 357 * If the syndrome indicates a nack, we're done. No need to collect 358 * any more information 359 */ 360 ld->ld_syndrome = (uint8_t)((syn_ctl_qpn & FIP_VL_SYN_MASK) >> 361 FIP_VL_SYN_SHIFT); 362 if (ld->ld_syndrome) { 363 return (EIB_E_SUCCESS); 364 } 365 366 /* 367 * Let's get the rest of the information out of the login ack 368 */ 369 sl_portid = ntohs(iba->ia_sl_portid); 370 ld->ld_gw_port_id = sl_portid & FIP_IBA_PORTID_MASK; 371 ld->ld_gw_sl = (sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT; 372 373 ld->ld_gw_data_qpn = ntohl(iba->ia_qpn) & FIP_IBA_QPN_MASK; 374 ld->ld_gw_lid = ntohs(iba->ia_lid); 375 376 bcopy(iba->ia_guid, &guid, sizeof (ib_guid_t)); 377 ld->ld_gw_guid = ntohll(guid); 378 ld->ld_vhub_mtu = ntohs(login->vl_mtu); 379 bcopy(login->vl_mac, ld->ld_assigned_mac, ETHERADDRL); 380 bcopy(login->vl_gw_mgid_prefix, ld->ld_gw_mgid_prefix, 381 FIP_MGID_PREFIX_LEN); 382 ld->ld_n_rss_mcgid = login->vl_flags_rss & FIP_VL_N_RSS_MCGID_MASK; 383 ld->ld_n_mac_mcgid = login->vl_n_mac_mcgid & FIP_VL_N_MAC_MCGID_MASK; 384 ld->ld_gw_ctl_qpn = (syn_ctl_qpn & FIP_VL_CTL_QPN_MASK); 385 386 flags_vlan = ntohs(login->vl_flags_vlan); 387 ld->ld_assigned_vlan = flags_vlan & FIP_VL_VLAN_MASK; 388 ld->ld_vlan_in_packets = (flags_vlan & FIP_VL_FLAGS_VP) ? 1 : 0; 389 bcopy(login->vl_vnic_name, ld->ld_vnic_name, FIP_VNIC_NAME_LEN); 390 391 ld->ld_vhub_pkey = ntohs(partition->pn_pkey); 392 393 return (EIB_E_SUCCESS); 394 } 395 396 int 397 eib_fip_parse_ctl_pkt(uint8_t *pkt, eib_vnic_t *vnic) 398 { 399 eib_t *ss = vnic->vn_ss; 400 fip_vhub_pkt_t *vhb; 401 fip_basic_hdr_t *hdr; 402 uint16_t opcode; 403 uint8_t subcode; 404 uint_t vnic_state; 405 int ret = EIB_E_FAILURE; 406 407 /* 408 * Note that 'pkt' is always atleast double-word aligned when it is 409 * passed to us, so we can cast it without any problems. 410 */ 411 vhb = (fip_vhub_pkt_t *)(void *)pkt; 412 hdr = &(vhb->hb_fip_header); 413 414 /* 415 * Verify that the opcode is EoIB 416 */ 417 if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) { 418 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: " 419 "unsupported opcode 0x%x in ctl pkt, ignoring", 420 opcode); 421 return (EIB_E_FAILURE); 422 } 423 424 mutex_enter(&vnic->vn_lock); 425 vnic_state = vnic->vn_state; 426 mutex_exit(&vnic->vn_lock); 427 428 /* 429 * The ctl qp in the EoIB driver should receive only vHUB messages 430 */ 431 subcode = hdr->hd_subcode; 432 if (subcode == FIP_SUBCODE_G_VHUB_UPDATE) { 433 if (vnic_state != EIB_LOGIN_TBL_WAIT && 434 vnic_state != EIB_LOGIN_TBL_INPROG && 435 vnic_state != EIB_LOGIN_TBL_DONE && 436 vnic_state != EIB_LOGIN_DONE) { 437 438 EIB_DPRINTF_WARN(ss->ei_instance, 439 "eib_fip_parse_ctl_pkt: unexpected vnic state " 440 "(0x%lx) for subcode (VHUB_UPDATE 0x%x)", 441 vnic_state, subcode); 442 return (EIB_E_FAILURE); 443 } 444 445 ret = eib_fip_parse_vhub_update(pkt, vnic); 446 447 } else if (subcode == FIP_SUBCODE_G_VHUB_TABLE) { 448 if ((vnic_state != EIB_LOGIN_TBL_WAIT) && 449 (vnic_state != EIB_LOGIN_TBL_INPROG)) { 450 451 EIB_DPRINTF_WARN(ss->ei_instance, 452 "eib_fip_parse_ctl_pkt: unexpected vnic state " 453 "(0x%lx) for subcode (VHUB_TABLE 0x%x)", 454 vnic_state, subcode); 455 return (EIB_E_FAILURE); 456 } 457 458 ret = eib_fip_parse_vhub_table(pkt, vnic); 459 460 } else { 461 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: " 462 "unexpected subcode 0x%x for ctl pkt", subcode); 463 } 464 465 if (ret == EIB_E_SUCCESS) { 466 /* 467 * Update last gateway heartbeat received time and 468 * gateway eport state. The eport state should only 469 * be updated if the vnic's vhub table has been fully 470 * constructed. 471 */ 472 mutex_enter(&ss->ei_vnic_lock); 473 ss->ei_gw_last_heartbeat = ddi_get_lbolt64(); 474 if (vnic_state == EIB_LOGIN_TBL_DONE || 475 vnic_state == EIB_LOGIN_DONE) { 476 ss->ei_gw_eport_state = 477 vnic->vn_vhub_table->tb_eport_state; 478 } 479 mutex_exit(&ss->ei_vnic_lock); 480 } 481 482 return (ret); 483 } 484 485 static int 486 eib_fip_make_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 487 { 488 fip_login_t *login; 489 fip_proto_t *proto; 490 fip_basic_hdr_t *hdr; 491 fip_desc_iba_t *iba; 492 fip_desc_vnic_login_t *vlg; 493 ib_gid_t port_gid; 494 ib_guid_t port_guid; 495 uint16_t sl_portid; 496 uint16_t flags_vlan; 497 498 uint16_t gw_portid = ss->ei_gw_props->pp_gw_portid; 499 uint16_t sl = ss->ei_gw_props->pp_gw_sl; 500 uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va); 501 uint_t pktsz = swqe->qe_sgl.ds_len; 502 uint_t login_sz = sizeof (fip_login_t); 503 504 if (pktsz < login_sz) { 505 *err = EINVAL; 506 507 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_login: " 508 "send buffer size (0x%lx) too small to send" 509 "login request (min 0x%lx)", 510 pktsz, login_sz); 511 return (EIB_E_FAILURE); 512 } 513 514 /* 515 * Lint complains that there may be an alignment issue here, 516 * but we know that the "pkt" is atleast double-word aligned, 517 * so it's ok. 518 */ 519 login = (fip_login_t *)(void *)pkt; 520 bzero(pkt, login_sz); 521 522 /* 523 * Fill in the FIP protocol version 524 */ 525 proto = &login->lg_proto_version; 526 proto->pr_version = FIP_PROTO_VERSION; 527 528 /* 529 * Fill in the basic header 530 */ 531 hdr = &login->lg_fip_header; 532 hdr->hd_opcode = htons(FIP_OPCODE_EOIB); 533 hdr->hd_subcode = FIP_SUBCODE_H_VNIC_LOGIN; 534 hdr->hd_desc_list_len = htons((login_sz >> 2) - 2); 535 hdr->hd_flags = 0; 536 hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID; 537 hdr->hd_len = FIP_DESC_LEN_VENDOR_ID; 538 bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN); 539 540 /* 541 * Fill in the Infiniband Address descriptor 542 */ 543 iba = &login->lg_iba; 544 iba->ia_type = FIP_DESC_TYPE_IBA; 545 iba->ia_len = FIP_DESC_LEN_IBA; 546 bcopy(eib_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN); 547 iba->ia_qpn = htonl(vnic->vn_data_chan->ch_qpn); 548 549 sl_portid = (gw_portid & FIP_IBA_PORTID_MASK) | 550 ((sl << FIP_IBA_SL_SHIFT) & FIP_IBA_SL_MASK); 551 iba->ia_sl_portid = htons(sl_portid); 552 553 iba->ia_lid = htons(ss->ei_props->ep_blid); 554 555 port_gid = ss->ei_props->ep_sgid; 556 port_guid = htonll(port_gid.gid_guid); 557 bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN); 558 559 /* 560 * Now, fill in the vNIC Login descriptor 561 */ 562 563 vlg = &login->lg_vnic_login; 564 vlg->vl_type = FIP_DESC_TYPE_VNIC_LOGIN; 565 vlg->vl_len = FIP_DESC_LEN_VNIC_LOGIN; 566 bcopy(eib_vendor_mellanox, vlg->vl_vendor_id, FIP_VENDOR_LEN); 567 568 /* 569 * Only for the physlink instance 0, we ask the gateway to assign 570 * the mac address and a VLAN (tagless, actually). For this vnic 571 * only, we do not set the H bit. All other vnics are created by 572 * Solaris admin and will have the H bit set. Note also that we 573 * need to clear the vnic id's most significant bit for those that 574 * are administered by the gateway, so vnic0's vnic_id's msb should 575 * be 0 as well. 576 */ 577 if (vnic->vn_instance == 0) { 578 vlg->vl_vnic_id = htons(vnic->vn_id); 579 flags_vlan = vnic->vn_vlan & FIP_VL_VLAN_MASK; 580 } else { 581 vlg->vl_vnic_id = htons(vnic->vn_id | FIP_VL_VNIC_ID_MSBIT); 582 flags_vlan = (vnic->vn_vlan & FIP_VL_VLAN_MASK) | 583 FIP_VL_FLAGS_H | FIP_VL_FLAGS_M; 584 585 if (vnic->vn_vlan & FIP_VL_VLAN_MASK) 586 flags_vlan |= (FIP_VL_FLAGS_V | FIP_VL_FLAGS_VP); 587 } 588 589 vlg->vl_flags_vlan = htons(flags_vlan); 590 bcopy(vnic->vn_macaddr, vlg->vl_mac, ETHERADDRL); 591 592 /* 593 * We aren't ready to enable rss, so we set the RSS bit and 594 * the n_rss_mcgid field to 0. Set the mac mcgid to 0 as well. 595 */ 596 vlg->vl_flags_rss = 0; 597 vlg->vl_n_mac_mcgid = 0; 598 599 /* 600 * Set the syndrome to 0 and pass the control qpn 601 */ 602 vlg->vl_syndrome_ctl_qpn = 603 htonl(vnic->vn_ctl_chan->ch_qpn & FIP_VL_CTL_QPN_MASK); 604 605 /* 606 * Try to set as unique a name as possible for this vnic 607 */ 608 (void) snprintf((char *)(vlg->vl_vnic_name), FIP_VNIC_NAME_LEN, 609 "eoib_%02x_%02x", ss->ei_instance, vnic->vn_instance); 610 611 /* 612 * Adjust the ds_len in the sgl to indicate the size of this 613 * request before returning 614 */ 615 swqe->qe_sgl.ds_len = login_sz; 616 617 return (EIB_E_SUCCESS); 618 } 619 620 static int 621 eib_fip_make_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int req, 622 int *err) 623 { 624 fip_keep_alive_t *ka; 625 fip_proto_t *proto; 626 fip_basic_hdr_t *hdr; 627 fip_desc_vnic_identity_t *vid; 628 ib_gid_t port_gid; 629 ib_guid_t port_guid; 630 uint32_t flags_vhub_id; 631 632 uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va); 633 uint_t pktsz = swqe->qe_sgl.ds_len; 634 uint_t ka_sz = sizeof (fip_keep_alive_t); 635 636 if (pktsz < ka_sz) { 637 *err = EINVAL; 638 639 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_update: " 640 "send buffer size (0x%lx) too small to send" 641 "keepalive/update request (min 0x%lx)", 642 pktsz, ka_sz); 643 return (EIB_E_FAILURE); 644 } 645 646 /* 647 * Lint complains that there may be an alignment issue here, 648 * but we know that the "pkt" is atleast double-word aligned, 649 * so it's ok. 650 */ 651 ka = (fip_keep_alive_t *)(void *)pkt; 652 bzero(pkt, ka_sz); 653 654 /* 655 * Fill in the FIP protocol version 656 */ 657 proto = &ka->ka_proto_version; 658 proto->pr_version = FIP_PROTO_VERSION; 659 660 /* 661 * Fill in the basic header 662 */ 663 hdr = &ka->ka_fip_header; 664 hdr->hd_opcode = htons(FIP_OPCODE_EOIB); 665 hdr->hd_subcode = (req == EIB_UPD_REQ_LOGOUT) ? 666 FIP_SUBCODE_H_VNIC_LOGOUT : FIP_SUBCODE_H_KEEP_ALIVE; 667 hdr->hd_desc_list_len = htons((ka_sz >> 2) - 2); 668 hdr->hd_flags = 0; 669 hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID; 670 hdr->hd_len = FIP_DESC_LEN_VENDOR_ID; 671 bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN); 672 673 /* 674 * Fill in the vNIC Identity descriptor 675 */ 676 vid = &ka->ka_vnic_identity; 677 678 vid->vi_type = FIP_DESC_TYPE_VNIC_IDENTITY; 679 vid->vi_len = FIP_DESC_LEN_VNIC_IDENTITY; 680 bcopy(eib_vendor_mellanox, vid->vi_vendor_id, FIP_VENDOR_LEN); 681 682 flags_vhub_id = vnic->vn_login_data.ld_vhub_id; 683 if (vnic->vn_login_data.ld_vlan_in_packets) { 684 flags_vhub_id |= FIP_VI_FLAG_VP; 685 } 686 if (req == EIB_UPD_REQ_TABLE) { 687 flags_vhub_id |= FIP_VI_FLAG_R; 688 } else if (req == EIB_UPD_REQ_KA) { 689 flags_vhub_id |= FIP_VI_FLAG_U; 690 } 691 vid->vi_flags_vhub_id = htonl(flags_vhub_id); 692 693 vid->vi_tusn = (req != EIB_UPD_REQ_LOGOUT) ? 694 htonl(vnic->vn_vhub_table->tb_tusn) : 0; 695 696 vid->vi_vnic_id = htons(vnic->vn_login_data.ld_vnic_id); 697 bcopy(vnic->vn_login_data.ld_assigned_mac, vid->vi_mac, ETHERADDRL); 698 699 port_gid = ss->ei_props->ep_sgid; 700 port_guid = htonll(port_gid.gid_guid); 701 bcopy(&port_guid, vid->vi_port_guid, FIP_GUID_LEN); 702 bcopy(vnic->vn_login_data.ld_vnic_name, vid->vi_vnic_name, 703 FIP_VNIC_NAME_LEN); 704 705 /* 706 * Adjust the ds_len in the sgl to indicate the size of this 707 * request before returning 708 */ 709 swqe->qe_sgl.ds_len = ka_sz; 710 711 return (EIB_E_SUCCESS); 712 } 713 714 static int 715 eib_fip_make_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 716 { 717 return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_TABLE, err)); 718 } 719 720 static int 721 eib_fip_make_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 722 { 723 return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_KA, err)); 724 } 725 726 static int 727 eib_fip_make_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 728 { 729 return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_LOGOUT, err)); 730 } 731 732 static int 733 eib_fip_send_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 734 { 735 eib_avect_t *av; 736 eib_chan_t *chan = ss->ei_admin_chan; 737 ibt_status_t ret; 738 739 /* 740 * Get an address vector for this destination 741 */ 742 if ((av = eib_ibt_hold_avect(ss, ss->ei_gw_props->pp_gw_lid, 743 ss->ei_gw_props->pp_gw_sl)) == NULL) { 744 *err = ENOMEM; 745 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: " 746 "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed", 747 ss->ei_gw_props->pp_gw_lid, ss->ei_gw_props->pp_gw_sl); 748 return (EIB_E_FAILURE); 749 } 750 751 /* 752 * Modify the UD destination handle to the gateway 753 */ 754 ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY, 755 ss->ei_gw_props->pp_gw_ctrl_qpn, &av->av_vect); 756 757 eib_ibt_release_avect(ss, av); 758 if (ret != IBT_SUCCESS) { 759 *err = EINVAL; 760 761 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: " 762 "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, " 763 "ret=%d", ss->ei_gw_props->pp_gw_ctrl_qpn, 764 EIB_FIP_QKEY, ret); 765 return (EIB_E_FAILURE); 766 } 767 768 /* 769 * Send the login packet to the destination gateway. Posting 770 * the login and setting the login state to wait-for-ack should 771 * ideally be atomic to avoid race. 772 */ 773 mutex_enter(&vnic->vn_lock); 774 ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL); 775 if (ret != IBT_SUCCESS) { 776 mutex_exit(&vnic->vn_lock); 777 *err = EINVAL; 778 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: " 779 "ibt_post_send() failed for vnic id 0x%x, ret=%d", 780 vnic->vn_id, ret); 781 return (EIB_E_FAILURE); 782 } 783 vnic->vn_state = EIB_LOGIN_ACK_WAIT; 784 785 mutex_enter(&chan->ch_tx_lock); 786 chan->ch_tx_posted++; 787 mutex_exit(&chan->ch_tx_lock); 788 789 mutex_exit(&vnic->vn_lock); 790 791 return (EIB_E_SUCCESS); 792 } 793 794 static int 795 eib_fip_send_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, 796 uint_t nxt_state, int *err) 797 { 798 eib_login_data_t *ld = &vnic->vn_login_data; 799 eib_chan_t *chan = vnic->vn_ctl_chan; 800 eib_avect_t *av; 801 ibt_status_t ret; 802 803 /* 804 * Get an address vector for this destination 805 */ 806 if ((av = eib_ibt_hold_avect(ss, ld->ld_gw_lid, 807 ld->ld_gw_sl)) == NULL) { 808 *err = ENOMEM; 809 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: " 810 "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed", 811 ld->ld_gw_lid, ld->ld_gw_sl); 812 return (EIB_E_FAILURE); 813 } 814 815 /* 816 * Modify the UD destination handle to the destination appropriately 817 */ 818 ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY, 819 ld->ld_gw_ctl_qpn, &av->av_vect); 820 821 eib_ibt_release_avect(ss, av); 822 if (ret != IBT_SUCCESS) { 823 *err = EINVAL; 824 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: " 825 "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, " 826 "ret=%d", ld->ld_gw_ctl_qpn, EIB_FIP_QKEY, ret); 827 return (EIB_E_FAILURE); 828 } 829 830 /* 831 * Send the update packet to the destination. Posting the update request 832 * and setting the login state to wait-for-vhub_table needs to be atomic 833 * to avoid race. 834 */ 835 mutex_enter(&vnic->vn_lock); 836 ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL); 837 if (ret != IBT_SUCCESS) { 838 mutex_exit(&vnic->vn_lock); 839 *err = EINVAL; 840 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: " 841 "ibt_post_send() failed for vnic id 0x%x, ret=%d", 842 vnic->vn_id, ret); 843 return (EIB_E_FAILURE); 844 } 845 vnic->vn_state = nxt_state; 846 847 mutex_enter(&chan->ch_tx_lock); 848 chan->ch_tx_posted++; 849 mutex_exit(&chan->ch_tx_lock); 850 851 mutex_exit(&vnic->vn_lock); 852 853 return (EIB_E_SUCCESS); 854 } 855 856 static int 857 eib_fip_send_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 858 { 859 return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_TBL_WAIT, err)); 860 } 861 862 static int 863 eib_fip_send_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 864 { 865 return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_DONE, err)); 866 } 867 868 static int 869 eib_fip_send_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err) 870 { 871 return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGOUT_DONE, err)); 872 } 873 874 static int 875 eib_fip_parse_vhub_table(uint8_t *pkt, eib_vnic_t *vnic) 876 { 877 fip_vhub_table_t *tbl; 878 fip_desc_vhub_table_t *desc_tbl; 879 fip_vhub_table_entry_t *entry; 880 fip_basic_hdr_t *hdr; 881 eib_t *ss = vnic->vn_ss; 882 eib_login_data_t *ld = &vnic->vn_login_data; 883 eib_vhub_table_t *etbl = vnic->vn_vhub_table; 884 eib_vhub_update_t *eupd = vnic->vn_vhub_update; 885 eib_vhub_map_t *newmap; 886 887 uint32_t *ipkt; 888 uint32_t init_checksum = 0; 889 uint32_t tusn; 890 uint32_t vhub_id; 891 uint_t entries_in_pkt; 892 uint_t ndx; 893 uint_t i; 894 895 /* 896 * If we're here receiving vhub table messages, we certainly should 897 * have the vhub table structure allocated and present at this point. 898 */ 899 if (etbl == NULL) { 900 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 901 "vhub table missing for vnic id 0x%x", vnic->vn_id); 902 return (EIB_E_FAILURE); 903 } 904 905 /* 906 * Note that 'pkt' is always atleast double-word aligned when it is 907 * passed to us, so we can cast it without any problems. 908 */ 909 ipkt = (uint32_t *)(void *)pkt; 910 tbl = (fip_vhub_table_t *)(void *)pkt; 911 hdr = &(tbl->vt_fip_header); 912 913 /* 914 * Validate all the header and descriptor types and lengths 915 */ 916 if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID || 917 hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) { 918 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 919 "invalid type/len in fip basic header, " 920 "exp (0x%x,0x%x), got (0x%x,0x%x)", 921 FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID, 922 hdr->hd_type, hdr->hd_len); 923 return (EIB_E_FAILURE); 924 } 925 desc_tbl = &(tbl->vt_vhub_table); 926 if (desc_tbl->tb_type != FIP_DESC_TYPE_VHUB_TABLE) { 927 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 928 "invalid type in vhub desc, exp 0x%x, got 0x%x", 929 FIP_DESC_TYPE_VHUB_TABLE, desc_tbl->tb_type); 930 return (EIB_E_FAILURE); 931 } 932 933 /* 934 * Verify that the vhub id is ok for this vnic 935 */ 936 vhub_id = ntohl(desc_tbl->tb_flags_vhub_id) & FIP_TB_VHUB_ID_MASK; 937 if (vhub_id != ld->ld_vhub_id) { 938 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 939 "invalid vhub id in vhub table pkt: exp 0x%x, got 0x%x", 940 ld->ld_vhub_id, vhub_id); 941 return (EIB_E_FAILURE); 942 } 943 944 /* 945 * Count the number of vhub table entries in this packet 946 */ 947 entries_in_pkt = (desc_tbl->tb_len - FIP_DESC_VHUB_TABLE_WORDS) / 948 FIP_VHUB_TABLE_ENTRY_WORDS; 949 950 /* 951 * While we're here, also compute the 32-bit 2's complement carry- 952 * discarded checksum of the vHUB table descriptor in this packet 953 * till the first vhub table entry. 954 */ 955 for (i = 0; i < FIP_DESC_VHUB_TABLE_WORDS; i++) 956 init_checksum += ipkt[i]; 957 958 /* 959 * Initialize the vhub's Table Update Sequence Number (tusn), 960 * checksum and record the total number of entries in in the table 961 * if this is the first pkt of the table. 962 */ 963 tusn = ntohl(desc_tbl->tb_tusn); 964 if (desc_tbl->tb_hdr & FIP_TB_HDR_FIRST) { 965 etbl->tb_entries_in_table = ntohs(desc_tbl->tb_table_size); 966 etbl->tb_tusn = tusn; 967 etbl->tb_checksum = 0; 968 969 mutex_enter(&vnic->vn_lock); 970 vnic->vn_state = EIB_LOGIN_TBL_INPROG; 971 mutex_exit(&vnic->vn_lock); 972 } 973 974 /* 975 * First, middle or last, the current table TUSN we have must match this 976 * packet's TUSN. 977 */ 978 if (etbl->tb_tusn != tusn) { 979 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 980 "unexpected TUSN (0x%lx) during vhub table construction, " 981 "expected 0x%lx", etbl->tb_tusn, tusn); 982 goto vhub_table_fail; 983 } 984 985 /* 986 * See if we've overrun/underrun our original entries count 987 */ 988 if ((etbl->tb_entries_seen + entries_in_pkt) > 989 etbl->tb_entries_in_table) { 990 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 991 "vhub table overrun, total_exp=%d, so_far=%d, this_pkt=%d", 992 etbl->tb_entries_in_table, etbl->tb_entries_seen, 993 entries_in_pkt); 994 goto vhub_table_fail; 995 } else if (((etbl->tb_entries_seen + entries_in_pkt) < 996 etbl->tb_entries_in_table) && 997 (desc_tbl->tb_hdr & FIP_TB_HDR_LAST)) { 998 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: " 999 "vhub table underrun, total_exp=%d, so_far=%d, last_pkt=%d", 1000 etbl->tb_entries_in_table, etbl->tb_entries_seen, 1001 entries_in_pkt); 1002 goto vhub_table_fail; 1003 } 1004 1005 /* 1006 * Process and add the entries we have in this packet 1007 */ 1008 etbl->tb_checksum += init_checksum; 1009 entry = (fip_vhub_table_entry_t *)(void *) 1010 ((uint8_t *)desc_tbl + FIP_DESC_VHUB_TABLE_SZ); 1011 1012 for (ndx = 0; ndx < entries_in_pkt; ndx++, entry++) { 1013 /* 1014 * Allocate a eib_vhub_map_t, copy the current entry details 1015 * and chain it to the appropriate queue. 1016 */ 1017 if ((newmap = eib_fip_get_vhub_map()) == NULL) { 1018 EIB_DPRINTF_WARN(ss->ei_instance, 1019 "eib_fip_parse_vhub_table: no memory for vhub " 1020 "table entry, ignoring this vhub table packet"); 1021 goto vhub_table_fail; 1022 } 1023 1024 ASSERT((entry->te_v_rss_type & FIP_TE_VALID) == FIP_TE_VALID); 1025 newmap->mp_v_rss_type = entry->te_v_rss_type; 1026 bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL); 1027 newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK); 1028 newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK); 1029 newmap->mp_lid = ntohs(entry->te_lid); 1030 newmap->mp_tusn = tusn; 1031 newmap->mp_next = NULL; 1032 1033 /* 1034 * The vhub table messages do not provide status on eport 1035 * state, so we'll simply assume that the eport is up. 1036 */ 1037 eib_fip_queue_tbl_entry(etbl, newmap, tusn, FIP_EPORT_UP); 1038 1039 /* 1040 * Update table checksum with this entry's computed checksum 1041 */ 1042 ipkt = (uint32_t *)entry; 1043 for (i = 0; i < FIP_VHUB_TABLE_ENTRY_WORDS; i++) 1044 etbl->tb_checksum += ipkt[i]; 1045 } 1046 etbl->tb_entries_seen += entries_in_pkt; 1047 1048 /* 1049 * If this is the last packet of this vhub table, complete vhub 1050 * table by verifying checksum and applying all the vhub updates 1051 * that may have come in while we were constructing this table. 1052 */ 1053 if (desc_tbl->tb_hdr & FIP_TB_HDR_LAST) { 1054 1055 ipkt = (uint32_t *)entry; 1056 if (!eib_wa_no_good_vhub_cksum) { 1057 if (*ipkt != etbl->tb_checksum) { 1058 EIB_DPRINTF_VERBOSE(ss->ei_instance, 1059 "eib_fip_parse_vhub_table: " 1060 "vhub table checksum invalid, " 1061 "computed=0x%lx, found=0x%lx", 1062 etbl->tb_checksum, *ipkt); 1063 } 1064 } 1065 1066 /* 1067 * Per the EoIB specification, the gateway is supposed to 1068 * include its address information for data messages in the 1069 * vhub table. But we've observed that it doesn't do this 1070 * (with the current version). If this is the case, we'll 1071 * hand-create and add a vhub map for the gateway from the 1072 * information we got in login ack. 1073 */ 1074 if (etbl->tb_gateway == NULL) 1075 eib_fip_queue_gw_entry(vnic, etbl, tusn, FIP_EPORT_UP); 1076 1077 /* 1078 * Apply pending vhub updates and reset table counters needed 1079 * during table construction. 1080 */ 1081 if (eib_fip_apply_updates(ss, etbl, eupd) != EIB_E_SUCCESS) 1082 goto vhub_table_fail; 1083 1084 etbl->tb_entries_seen = 0; 1085 etbl->tb_entries_in_table = 0; 1086 1087 eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_DONE); 1088 } 1089 1090 return (EIB_E_SUCCESS); 1091 1092 vhub_table_fail: 1093 eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_FAILED); 1094 return (EIB_E_FAILURE); 1095 } 1096 1097 static int 1098 eib_fip_parse_vhub_update(uint8_t *pkt, eib_vnic_t *vnic) 1099 { 1100 fip_vhub_update_t *upd; 1101 fip_desc_vhub_update_t *desc_upd; 1102 fip_vhub_table_entry_t *entry; 1103 fip_basic_hdr_t *hdr; 1104 eib_t *ss = vnic->vn_ss; 1105 eib_login_data_t *ld = &vnic->vn_login_data; 1106 eib_vhub_table_t *etbl = vnic->vn_vhub_table; 1107 eib_vhub_update_t *eupd = vnic->vn_vhub_update; 1108 eib_vhub_map_t *newmap; 1109 boolean_t vhub_tbl_done; 1110 uint32_t eport_vp_vhub_id; 1111 uint32_t vhub_id; 1112 uint32_t tusn; 1113 uint32_t prev_tusn; 1114 uint8_t eport_state; 1115 1116 /* 1117 * We should have the vhub table allocated as long as we're receiving 1118 * vhub control messages. 1119 */ 1120 if (etbl == NULL) { 1121 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: " 1122 "vhub table missing for vnic id 0x%x", vnic->vn_id); 1123 return (EIB_E_FAILURE); 1124 } 1125 1126 mutex_enter(&vnic->vn_lock); 1127 vhub_tbl_done = ((vnic->vn_state == EIB_LOGIN_TBL_DONE) || 1128 (vnic->vn_state == EIB_LOGIN_DONE)) ? B_TRUE : B_FALSE; 1129 mutex_exit(&vnic->vn_lock); 1130 1131 /* 1132 * Note that 'pkt' is always atleast double-word aligned when it is 1133 * passed to us, so we can cast it without any problems. 1134 */ 1135 upd = (fip_vhub_update_t *)(void *)pkt; 1136 hdr = &(upd->vu_fip_header); 1137 1138 /* 1139 * Validate all the header and descriptor types and lengths 1140 */ 1141 if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID || 1142 hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) { 1143 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: " 1144 "invalid type/len in fip basic header, " 1145 "exp (0x%x,0x%x), got (0x%x,0x%x)", 1146 FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID, 1147 hdr->hd_type, hdr->hd_len); 1148 return (EIB_E_FAILURE); 1149 } 1150 desc_upd = &(upd->vu_vhub_update); 1151 if (desc_upd->up_type != FIP_DESC_TYPE_VHUB_UPDATE || 1152 desc_upd->up_len != FIP_DESC_LEN_VHUB_UPDATE) { 1153 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: " 1154 "invalid type/len in vhub update desc: " 1155 "exp (0x%x,0x%x), got (0x%x,0x%x)", 1156 FIP_DESC_TYPE_VHUB_UPDATE, FIP_DESC_LEN_VHUB_UPDATE, 1157 desc_upd->up_type, desc_upd->up_len); 1158 return (EIB_E_FAILURE); 1159 } 1160 1161 /* 1162 * Verify that the vhub id is ok for this vnic and save the eport state 1163 */ 1164 eport_vp_vhub_id = ntohl(desc_upd->up_eport_vp_vhub_id); 1165 1166 vhub_id = eport_vp_vhub_id & FIP_UP_VHUB_ID_MASK; 1167 if (vhub_id != ld->ld_vhub_id) { 1168 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: " 1169 "invalid vhub id in vhub update pkt: exp 0x%x, got 0x%x", 1170 ld->ld_vhub_id, vhub_id); 1171 return (EIB_E_FAILURE); 1172 } 1173 eport_state = (uint8_t)((eport_vp_vhub_id >> FIP_UP_EPORT_STATE_SHIFT) & 1174 FIP_UP_EPORT_STATE_MASK); 1175 1176 /* 1177 * If this is the first update we receive, any tusn is ok. Otherwise, 1178 * make sure the tusn we see in the packet is appropriate. 1179 */ 1180 tusn = ntohl(desc_upd->up_tusn); 1181 prev_tusn = vhub_tbl_done ? etbl->tb_tusn : eupd->up_tusn; 1182 1183 if (prev_tusn != 0) { 1184 if (tusn == prev_tusn) { 1185 eib_fip_update_eport_state(ss, etbl, eupd, 1186 vhub_tbl_done, eport_state); 1187 return (EIB_E_SUCCESS); 1188 } 1189 if (tusn != (prev_tusn + 1)) { 1190 EIB_DPRINTF_WARN(ss->ei_instance, 1191 "eib_fip_parse_vhub_update: " 1192 "out of order TUSN received (exp 0x%lx, " 1193 "got 0x%lx), dropping pkt", prev_tusn + 1, tusn); 1194 return (EIB_E_FAILURE); 1195 } 1196 } 1197 1198 /* 1199 * EoIB expects only type 0 (vnic address) entries to maintain the 1200 * context table 1201 */ 1202 entry = &(desc_upd->up_tbl_entry); 1203 ASSERT((entry->te_v_rss_type & FIP_TE_TYPE_MASK) == FIP_TE_TYPE_VNIC); 1204 1205 /* 1206 * If the vHUB table has already been fully constructed and if we've 1207 * now received a notice to remove a vnic entry from it, do it. 1208 */ 1209 if ((vhub_tbl_done) && 1210 ((entry->te_v_rss_type & FIP_TE_VALID) == 0)) { 1211 eib_fip_dequeue_tbl_entry(etbl, entry->te_mac, 1212 tusn, eport_state); 1213 1214 if (bcmp(entry->te_mac, ld->ld_assigned_mac, ETHERADDRL) == 0) { 1215 uint8_t *mymac; 1216 1217 mymac = entry->te_mac; 1218 EIB_DPRINTF_WARN(ss->ei_instance, 1219 "eib_fip_parse_vhub_update: " 1220 "vhub update pkt received to kill self " 1221 "(%x:%x:%x:%x:%x:%x)", mymac[0], mymac[1], mymac[2], 1222 mymac[3], mymac[4], mymac[5]); 1223 1224 return (EIB_E_FAILURE); 1225 } 1226 return (EIB_E_SUCCESS); 1227 } 1228 1229 /* 1230 * Otherwise, allocate a new eib_vhub_map_t and fill it in with 1231 * the details of the new entry 1232 */ 1233 if ((newmap = eib_fip_get_vhub_map()) == NULL) { 1234 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: " 1235 "no memory for vhub update entry, will be ignoring" 1236 "this vhub update packet"); 1237 return (EIB_E_FAILURE); 1238 } 1239 1240 newmap->mp_v_rss_type = entry->te_v_rss_type; 1241 bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL); 1242 newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK); 1243 newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK); 1244 newmap->mp_lid = ntohs(entry->te_lid); 1245 newmap->mp_tusn = tusn; 1246 newmap->mp_next = NULL; 1247 1248 /* 1249 * Update the full vhub table or chain it to the list of pending 1250 * updates depending on if the vhub table construction is over 1251 * or not. 1252 */ 1253 if (vhub_tbl_done) { 1254 eib_fip_queue_tbl_entry(etbl, newmap, tusn, eport_state); 1255 } else { 1256 eib_fip_queue_upd_entry(eupd, newmap, tusn, eport_state); 1257 } 1258 1259 return (EIB_E_SUCCESS); 1260 } 1261 1262 static void 1263 eib_fip_update_eport_state(eib_t *ss, eib_vhub_table_t *tbl, 1264 eib_vhub_update_t *upd, boolean_t tbl_done, uint8_t eport_state) 1265 { 1266 if (tbl_done) { 1267 mutex_enter(&tbl->tb_lock); 1268 if (tbl->tb_eport_state != eport_state) { 1269 EIB_DPRINTF_DEBUG(ss->ei_instance, 1270 "eib_fip_update_eport_state: " 1271 "eport state changing from %d to %d", 1272 tbl->tb_eport_state, eport_state); 1273 tbl->tb_eport_state = eport_state; 1274 } 1275 mutex_exit(&tbl->tb_lock); 1276 } else { 1277 mutex_enter(&upd->up_lock); 1278 if (upd->up_eport_state != eport_state) { 1279 EIB_DPRINTF_DEBUG(ss->ei_instance, 1280 "eib_fip_update_eport_state: " 1281 "eport state changing from %d to %d", 1282 upd->up_eport_state, eport_state); 1283 upd->up_eport_state = eport_state; 1284 } 1285 mutex_exit(&upd->up_lock); 1286 } 1287 } 1288 1289 static void 1290 eib_fip_queue_tbl_entry(eib_vhub_table_t *tbl, eib_vhub_map_t *map, 1291 uint32_t tusn, uint8_t eport_state) 1292 { 1293 uint8_t bkt; 1294 1295 mutex_enter(&tbl->tb_lock); 1296 1297 switch (map->mp_v_rss_type & FIP_TE_TYPE_MASK) { 1298 case FIP_TE_TYPE_GATEWAY: 1299 if (tbl->tb_gateway) { 1300 kmem_free(tbl->tb_gateway, 1301 sizeof (eib_vhub_map_t)); 1302 } 1303 tbl->tb_gateway = map; 1304 break; 1305 1306 case FIP_TE_TYPE_UNICAST_MISS: 1307 if (tbl->tb_unicast_miss) { 1308 kmem_free(tbl->tb_unicast_miss, 1309 sizeof (eib_vhub_map_t)); 1310 } 1311 tbl->tb_unicast_miss = map; 1312 break; 1313 1314 case FIP_TE_TYPE_VHUB_MULTICAST: 1315 if (tbl->tb_vhub_multicast) { 1316 kmem_free(tbl->tb_vhub_multicast, 1317 sizeof (eib_vhub_map_t)); 1318 } 1319 tbl->tb_vhub_multicast = map; 1320 break; 1321 1322 case FIP_TE_TYPE_MULTICAST_ENTRY: 1323 /* 1324 * If multicast entry types are not to be specially 1325 * processed, treat them like regular vnic addresses. 1326 */ 1327 if (!eib_wa_no_mcast_entries) { 1328 bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS; 1329 map->mp_next = tbl->tb_mcast_entry[bkt]; 1330 tbl->tb_mcast_entry[bkt] = map; 1331 break; 1332 } 1333 /*FALLTHROUGH*/ 1334 1335 case FIP_TE_TYPE_VNIC: 1336 bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS; 1337 map->mp_next = tbl->tb_vnic_entry[bkt]; 1338 tbl->tb_vnic_entry[bkt] = map; 1339 break; 1340 } 1341 1342 tbl->tb_tusn = tusn; 1343 tbl->tb_eport_state = eport_state; 1344 1345 mutex_exit(&tbl->tb_lock); 1346 } 1347 1348 static void 1349 eib_fip_queue_upd_entry(eib_vhub_update_t *upd, eib_vhub_map_t *map, 1350 uint32_t tusn, uint8_t eport_state) 1351 { 1352 eib_vhub_map_t *tail; 1353 1354 /* 1355 * The eib_vhub_update_t list is only touched/traversed when the 1356 * control cq handler is parsing either update or table message, 1357 * or by the table cleanup routine when we aren't attached to any 1358 * control mcgs. Bottom line is that this list traversal is always 1359 * single-threaded and we could probably do away with the lock. 1360 */ 1361 mutex_enter(&upd->up_lock); 1362 for (tail = upd->up_vnic_entry; tail != NULL; tail = tail->mp_next) { 1363 if (tail->mp_next == NULL) 1364 break; 1365 } 1366 if (tail) { 1367 tail->mp_next = map; 1368 } else { 1369 upd->up_vnic_entry = map; 1370 } 1371 1372 upd->up_tusn = tusn; 1373 upd->up_eport_state = eport_state; 1374 1375 mutex_exit(&upd->up_lock); 1376 } 1377 1378 static void 1379 eib_fip_queue_gw_entry(eib_vnic_t *vnic, eib_vhub_table_t *tbl, uint32_t tusn, 1380 uint8_t eport_state) 1381 { 1382 eib_t *ss = vnic->vn_ss; 1383 eib_vhub_map_t *newmap; 1384 eib_login_data_t *ld = &vnic->vn_login_data; 1385 1386 if ((newmap = eib_fip_get_vhub_map()) == NULL) { 1387 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_queue_gw_entry: " 1388 "no memory to queue gw entry, transactions could fail"); 1389 return; 1390 } 1391 1392 newmap->mp_v_rss_type = FIP_TE_VALID | FIP_TE_TYPE_GATEWAY; 1393 bcopy(eib_zero_mac, newmap->mp_mac, ETHERADDRL); 1394 newmap->mp_qpn = ld->ld_gw_data_qpn; 1395 newmap->mp_sl = ld->ld_gw_sl; 1396 newmap->mp_lid = ld->ld_gw_lid; 1397 newmap->mp_tusn = tusn; 1398 newmap->mp_next = NULL; 1399 1400 eib_fip_queue_tbl_entry(tbl, newmap, tusn, eport_state); 1401 } 1402 1403 static int 1404 eib_fip_apply_updates(eib_t *ss, eib_vhub_table_t *tbl, eib_vhub_update_t *upd) 1405 { 1406 eib_vhub_map_t *list; 1407 eib_vhub_map_t *map; 1408 eib_vhub_map_t *nxt; 1409 uint32_t tbl_tusn = tbl->tb_tusn; 1410 1411 /* 1412 * Take the update list out 1413 */ 1414 mutex_enter(&upd->up_lock); 1415 list = upd->up_vnic_entry; 1416 upd->up_vnic_entry = NULL; 1417 mutex_exit(&upd->up_lock); 1418 1419 /* 1420 * Skip any updates with older/same tusn as our vhub table 1421 */ 1422 nxt = NULL; 1423 for (map = list; (map) && (map->mp_tusn <= tbl_tusn); map = nxt) { 1424 nxt = map->mp_next; 1425 kmem_free(map, sizeof (eib_vhub_map_t)); 1426 } 1427 1428 if (map == NULL) 1429 return (EIB_E_SUCCESS); 1430 1431 /* 1432 * If we missed any updates between table tusn and the first 1433 * update tusn we got, we need to fail. 1434 */ 1435 if (map->mp_tusn > (tbl_tusn + 1)) { 1436 EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_apply_updates: " 1437 "vhub update missed tusn(s), expected=0x%lx, got=0x%lx", 1438 (tbl_tusn + 1), map->mp_tusn); 1439 for (; map != NULL; map = nxt) { 1440 nxt = map->mp_next; 1441 kmem_free(map, sizeof (eib_vhub_map_t)); 1442 } 1443 return (EIB_E_FAILURE); 1444 } 1445 1446 /* 1447 * If everything is fine, apply all the updates we received 1448 */ 1449 for (; map != NULL; map = nxt) { 1450 nxt = map->mp_next; 1451 map->mp_next = NULL; 1452 1453 if (map->mp_v_rss_type & FIP_TE_VALID) { 1454 eib_fip_queue_tbl_entry(tbl, map, upd->up_tusn, 1455 upd->up_eport_state); 1456 } else { 1457 eib_fip_dequeue_tbl_entry(tbl, map->mp_mac, 1458 upd->up_tusn, upd->up_eport_state); 1459 kmem_free(map, sizeof (eib_vhub_map_t)); 1460 } 1461 } 1462 1463 return (EIB_E_SUCCESS); 1464 } 1465 1466 static void 1467 eib_fip_dequeue_tbl_entry(eib_vhub_table_t *tbl, uint8_t *mac, uint32_t tusn, 1468 uint8_t eport_state) 1469 { 1470 uint8_t bkt; 1471 eib_vhub_map_t *prev; 1472 eib_vhub_map_t *elem; 1473 1474 bkt = (mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS; 1475 1476 mutex_enter(&tbl->tb_lock); 1477 1478 /* 1479 * Note that for EoIB, the vhub table is maintained using only 1480 * vnic entry updates 1481 */ 1482 prev = NULL; 1483 for (elem = tbl->tb_vnic_entry[bkt]; elem; elem = elem->mp_next) { 1484 if (bcmp(elem->mp_mac, mac, ETHERADDRL) == 0) 1485 break; 1486 prev = elem; 1487 } 1488 1489 if (prev && elem) { 1490 prev->mp_next = elem->mp_next; 1491 kmem_free(elem, sizeof (eib_vhub_map_t)); 1492 } 1493 1494 tbl->tb_tusn = tusn; 1495 tbl->tb_eport_state = eport_state; 1496 1497 mutex_exit(&tbl->tb_lock); 1498 } 1499 1500 static eib_vhub_map_t * 1501 eib_fip_get_vhub_map(void) 1502 { 1503 return (kmem_zalloc(sizeof (eib_vhub_map_t), KM_NOSLEEP)); 1504 } 1505