1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/kmem.h> 28 #include <sys/conf.h> 29 #include <sys/ddi.h> 30 #include <sys/sunddi.h> 31 #include <sys/sunndi.h> 32 #include <sys/ksynch.h> 33 #include <sys/callb.h> 34 #include <sys/ib/mgt/sm_attr.h> /* SM_INIT_TYPE_REPLY_... */ 35 36 #include <sys/ib/clients/eoib/enx_impl.h> 37 38 /* 39 * Static function declarations 40 */ 41 static void eibnx_gw_is_alive(eibnx_gw_info_t *); 42 static void eibnx_gw_is_aware(eibnx_thr_info_t *, eibnx_gw_info_t *, boolean_t); 43 static void eibnx_process_rx(eibnx_thr_info_t *, ibt_wc_t *, eibnx_wqe_t *); 44 static void eibnx_handle_wcerr(uint8_t, eibnx_wqe_t *, eibnx_thr_info_t *); 45 static void eibnx_handle_login_ack(eibnx_thr_info_t *, uint8_t *); 46 static void eibnx_handle_gw_rebirth(eibnx_thr_info_t *, uint16_t); 47 static void eibnx_handle_gw_info_update(eibnx_thr_info_t *, uint16_t, void *); 48 static int eibnx_replace_portinfo(eibnx_thr_info_t *, ibt_hca_portinfo_t *, 49 uint_t); 50 static void eibnx_handle_port_events(ibt_hca_hdl_t, uint8_t); 51 static void eibnx_handle_hca_attach(ib_guid_t); 52 static void eibnx_handle_hca_detach(ib_guid_t); 53 54 /* 55 * NDI event handle we need 56 */ 57 extern ndi_event_hdl_t enx_ndi_event_hdl; 58 59 /* 60 * SM's init type reply flags 61 */ 62 #define ENX_PORT_ATTR_LOADED(itr) \ 63 (((itr) & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) 64 #define ENX_PORT_ATTR_NOT_PRESERVED(itr) \ 65 (((itr) & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0) 66 #define ENX_PORT_PRES_NOT_PRESERVED(itr) \ 67 (((itr) & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) == 0) 68 69 /* 70 * Port monitor progress flags (all flag values should be non-zero) 71 */ 72 #define ENX_MON_LINKSTATE_UP 0x01 73 #define ENX_MON_FOUND_MCGS 0x02 74 #define ENX_MON_SETUP_CQ 0x04 75 #define ENX_MON_SETUP_UD_CHAN 0x08 76 #define ENX_MON_SETUP_BUFS 0x10 77 #define ENX_MON_SETUP_CQ_HDLR 0x20 78 #define ENX_MON_JOINED_MCGS 0x40 79 #define ENX_MON_MULTICAST_SLCT 0x80 80 #define ENX_MON_MAX 0xFF 81 82 /* 83 * Per-port thread to solicit, monitor and discover EoIB gateways 84 * and create the corresponding EoIB driver instances on the host. 85 */ 86 void 87 eibnx_port_monitor(eibnx_thr_info_t *info) 88 { 89 clock_t solicit_period_ticks; 90 clock_t deadline; 91 kmutex_t ci_lock; 92 callb_cpr_t ci; 93 char thr_name[MAXNAMELEN]; 94 95 (void) snprintf(thr_name, MAXNAMELEN, ENX_PORT_MONITOR, 96 info->ti_pi->p_port_num); 97 98 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 99 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, thr_name); 100 101 info->ti_progress = 0; 102 103 /* 104 * If the port is not active yet, wait for a port up event. The 105 * async handler, when it sees a port-up event, is expected to 106 * update the port_monitor's portinfo structure's p_linkstate 107 * and wake us up with ENX_EVENT_LINK_UP. 108 */ 109 while (info->ti_pi->p_linkstate != IBT_PORT_ACTIVE) { 110 mutex_enter(&info->ti_event_lock); 111 while ((info->ti_event & 112 (ENX_EVENT_LINK_UP | ENX_EVENT_DIE)) == 0) { 113 mutex_enter(&ci_lock); 114 CALLB_CPR_SAFE_BEGIN(&ci); 115 mutex_exit(&ci_lock); 116 117 cv_wait(&info->ti_event_cv, &info->ti_event_lock); 118 119 mutex_enter(&ci_lock); 120 CALLB_CPR_SAFE_END(&ci, &ci_lock); 121 mutex_exit(&ci_lock); 122 } 123 if (info->ti_event & ENX_EVENT_DIE) { 124 mutex_exit(&info->ti_event_lock); 125 goto port_monitor_exit; 126 } 127 info->ti_event &= (~ENX_EVENT_LINK_UP); 128 mutex_exit(&info->ti_event_lock); 129 } 130 info->ti_progress |= ENX_MON_LINKSTATE_UP; 131 132 /* 133 * Locate the multicast groups for sending solicit requests 134 * to the GW and receiving advertisements from the GW. If 135 * either of the mcg is not present, wait for them to be 136 * created by the GW. 137 */ 138 while (eibnx_find_mgroups(info) != ENX_E_SUCCESS) { 139 mutex_enter(&info->ti_event_lock); 140 while ((info->ti_event & 141 (ENX_EVENT_MCGS_AVAILABLE | ENX_EVENT_DIE)) == 0) { 142 mutex_enter(&ci_lock); 143 CALLB_CPR_SAFE_BEGIN(&ci); 144 mutex_exit(&ci_lock); 145 146 cv_wait(&info->ti_event_cv, &info->ti_event_lock); 147 148 mutex_enter(&ci_lock); 149 CALLB_CPR_SAFE_END(&ci, &ci_lock); 150 mutex_exit(&ci_lock); 151 } 152 if (info->ti_event & ENX_EVENT_DIE) { 153 mutex_exit(&info->ti_event_lock); 154 goto port_monitor_exit; 155 } 156 info->ti_event &= (~ENX_EVENT_MCGS_AVAILABLE); 157 mutex_exit(&info->ti_event_lock); 158 } 159 info->ti_progress |= ENX_MON_FOUND_MCGS; 160 161 /* 162 * Setup a shared CQ 163 */ 164 if (eibnx_setup_cq(info) != ENX_E_SUCCESS) { 165 ENX_DPRINTF_ERR("eibnx_setup_cq() failed, terminating " 166 "port monitor for (hca_guid=0x%llx, port_num=0x%x)", 167 info->ti_hca_guid, info->ti_pi->p_port_num); 168 goto port_monitor_exit; 169 } 170 info->ti_progress |= ENX_MON_SETUP_CQ; 171 172 /* 173 * Setup UD channel 174 */ 175 if (eibnx_setup_ud_channel(info) != ENX_E_SUCCESS) { 176 ENX_DPRINTF_ERR("eibnx_setup_ud_channel() failed, terminating " 177 "port monitor for (hca_guid=0x%llx, port_num=0x%x)", 178 info->ti_hca_guid, info->ti_pi->p_port_num); 179 goto port_monitor_exit; 180 } 181 info->ti_progress |= ENX_MON_SETUP_UD_CHAN; 182 183 /* 184 * Allocate/initialize any tx/rx buffers 185 */ 186 if (eibnx_setup_bufs(info) != ENX_E_SUCCESS) { 187 ENX_DPRINTF_ERR("eibnx_setup_bufs() failed, terminating " 188 "port monitor for (hca_guid=0x%llx, port_num=0x%x)", 189 info->ti_hca_guid, info->ti_pi->p_port_num); 190 goto port_monitor_exit; 191 } 192 info->ti_progress |= ENX_MON_SETUP_BUFS; 193 194 /* 195 * Setup completion handler 196 */ 197 if (eibnx_setup_cq_handler(info) != ENX_E_SUCCESS) { 198 ENX_DPRINTF_ERR("eibnx_setup_cq_handler() failed, terminating " 199 "port monitor for (hca_guid=0x%llx, port_num=0x%x)", 200 info->ti_hca_guid, info->ti_pi->p_port_num); 201 goto port_monitor_exit; 202 } 203 info->ti_progress |= ENX_MON_SETUP_CQ_HDLR; 204 205 /* 206 * Join EoIB multicast groups 207 */ 208 if (eibnx_join_mcgs(info) != ENX_E_SUCCESS) { 209 ENX_DPRINTF_ERR("eibnx_join_mcgs() failed, terminating ", 210 "port monitor for (hca_guid=0x%llx, port_num=0x%x)", 211 info->ti_hca_guid, info->ti_pi->p_port_num); 212 goto port_monitor_exit; 213 } 214 info->ti_progress |= ENX_MON_JOINED_MCGS; 215 216 /* 217 * Send SOLICIT pkt to the EoIB multicast group 218 */ 219 if (eibnx_fip_solicit_mcast(info) != ENX_E_SUCCESS) { 220 ENX_DPRINTF_ERR("eibnx_fip_solicit_mcast() failed, terminating " 221 "port monitor for (hca_guid=0x%llx, port_num=0x%x)", 222 info->ti_hca_guid, info->ti_pi->p_port_num); 223 goto port_monitor_exit; 224 } 225 info->ti_progress |= ENX_MON_MULTICAST_SLCT; 226 227 mutex_enter(&info->ti_event_lock); 228 229 solicit_period_ticks = drv_usectohz(ENX_DFL_SOLICIT_PERIOD_USEC); 230 231 periodic_solicit: 232 deadline = ddi_get_lbolt() + solicit_period_ticks; 233 while ((info->ti_event & (ENX_EVENT_TIMED_OUT | ENX_EVENT_DIE)) == 0) { 234 mutex_enter(&ci_lock); 235 CALLB_CPR_SAFE_BEGIN(&ci); 236 mutex_exit(&ci_lock); 237 238 if (cv_timedwait(&info->ti_event_cv, &info->ti_event_lock, 239 deadline) == -1) { 240 info->ti_event |= ENX_EVENT_TIMED_OUT; 241 } 242 243 mutex_enter(&ci_lock); 244 CALLB_CPR_SAFE_END(&ci, &ci_lock); 245 mutex_exit(&ci_lock); 246 } 247 248 if (info->ti_event & ENX_EVENT_DIE) { 249 mutex_exit(&info->ti_event_lock); 250 goto port_monitor_exit; 251 } 252 253 if (info->ti_event & ENX_EVENT_TIMED_OUT) { 254 if (eibnx_fip_solicit_ucast(info, 255 &solicit_period_ticks) != ENX_E_SUCCESS) { 256 ENX_DPRINTF_WARN("failed to send solicit ucast to " 257 "gateways (hca_guid=0x%llx, port_num=0x%x)", 258 info->ti_hca_guid, info->ti_pi->p_port_num); 259 } 260 info->ti_event &= ~ENX_EVENT_TIMED_OUT; 261 } 262 263 goto periodic_solicit; 264 265 port_monitor_exit: 266 if (info->ti_progress & ENX_MON_MULTICAST_SLCT) { 267 eibnx_cleanup_port_nodes(info); 268 info->ti_progress &= (~ENX_MON_MULTICAST_SLCT); 269 } 270 if (info->ti_progress & ENX_MON_JOINED_MCGS) { 271 eibnx_rb_join_mcgs(info); 272 info->ti_progress &= (~ENX_MON_JOINED_MCGS); 273 } 274 if (info->ti_progress & ENX_MON_SETUP_CQ_HDLR) { 275 eibnx_rb_setup_cq_handler(info); 276 info->ti_progress &= (~ENX_MON_SETUP_CQ_HDLR); 277 } 278 if (info->ti_progress & ENX_MON_SETUP_BUFS) { 279 eibnx_rb_setup_bufs(info); 280 info->ti_progress &= (~ENX_MON_SETUP_BUFS); 281 } 282 if (info->ti_progress & ENX_MON_SETUP_UD_CHAN) { 283 eibnx_rb_setup_ud_channel(info); 284 info->ti_progress &= (~ENX_MON_SETUP_UD_CHAN); 285 } 286 if (info->ti_progress & ENX_MON_SETUP_CQ) { 287 eibnx_rb_setup_cq(info); 288 info->ti_progress &= (~ENX_MON_SETUP_CQ); 289 } 290 if (info->ti_progress & ENX_MON_FOUND_MCGS) { 291 eibnx_rb_find_mgroups(info); 292 info->ti_progress &= (~ENX_MON_FOUND_MCGS); 293 } 294 295 mutex_enter(&ci_lock); 296 CALLB_CPR_EXIT(&ci); 297 mutex_destroy(&ci_lock); 298 } 299 300 /* 301 * Async subnet notices handler registered with IBTF 302 */ 303 /*ARGSUSED*/ 304 void 305 eibnx_subnet_notices_handler(void *arg, ib_gid_t gid, 306 ibt_subnet_event_code_t sn_evcode, ibt_subnet_event_t *sn_event) 307 { 308 eibnx_t *ss = enx_global_ss; 309 eibnx_thr_info_t *ti; 310 ib_gid_t notice_gid; 311 312 switch (sn_evcode) { 313 case IBT_SM_EVENT_MCG_CREATED: 314 notice_gid = sn_event->sm_notice_gid; 315 316 if ((notice_gid.gid_prefix == enx_solicit_mgid.gid_prefix && 317 notice_gid.gid_guid == enx_solicit_mgid.gid_guid) || 318 (notice_gid.gid_prefix == enx_advertise_mgid.gid_prefix && 319 notice_gid.gid_guid == enx_advertise_mgid.gid_guid)) { 320 321 mutex_enter(&ss->nx_lock); 322 for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) { 323 mutex_enter(&ti->ti_event_lock); 324 ti->ti_event |= ENX_EVENT_MCGS_AVAILABLE; 325 cv_broadcast(&ti->ti_event_cv); 326 mutex_exit(&ti->ti_event_lock); 327 } 328 mutex_exit(&ss->nx_lock); 329 } 330 break; 331 332 case IBT_SM_EVENT_MCG_DELETED: 333 break; 334 335 default: 336 break; 337 } 338 } 339 340 /* 341 * Async event handler registered with IBTF 342 */ 343 /*ARGSUSED*/ 344 void 345 eibnx_async_handler(void *clnt_pvt, ibt_hca_hdl_t hca, 346 ibt_async_code_t code, ibt_async_event_t *event) 347 { 348 switch (code) { 349 case IBT_ERROR_CATASTROPHIC_CHAN: 350 case IBT_ERROR_INVALID_REQUEST_CHAN: 351 case IBT_ERROR_ACCESS_VIOLATION_CHAN: 352 case IBT_ERROR_CQ: 353 case IBT_ERROR_CATASTROPHIC_SRQ: 354 ENX_DPRINTF_ERR("ibt ERROR event 0x%x received " 355 "(hca_guid=0x%llx)", code, event->ev_hca_guid); 356 break; 357 358 case IBT_ERROR_PORT_DOWN: 359 ENX_DPRINTF_WARN("ibt PORT_DOWN event received " 360 "(hca_guid=0x%llx, port_num=0x%x)", 361 event->ev_hca_guid, event->ev_port); 362 break; 363 364 case IBT_EVENT_PORT_UP: 365 ENX_DPRINTF_WARN("ibt PORT_UP event received " 366 "(hca_guid=0x%llx, port_num=0x%x)", 367 event->ev_hca_guid, event->ev_port); 368 eibnx_handle_port_events(hca, event->ev_port); 369 break; 370 371 case IBT_PORT_CHANGE_EVENT: 372 ENX_DPRINTF_WARN("ibt PORT_CHANGE event received " 373 "(hca_guid=0x%llx, port_num=0x%x)", 374 event->ev_hca_guid, event->ev_port); 375 eibnx_handle_port_events(hca, event->ev_port); 376 break; 377 378 case IBT_CLNT_REREG_EVENT: 379 ENX_DPRINTF_WARN("ibt CLNT_REREG event received " 380 "(hca_guid=0x%llx, port_num=0x%x)", 381 event->ev_hca_guid, event->ev_port); 382 eibnx_handle_port_events(hca, event->ev_port); 383 break; 384 385 case IBT_HCA_ATTACH_EVENT: 386 ENX_DPRINTF_VERBOSE("ibt HCA_ATTACH event received " 387 "(new hca_guid=0x%llx)", event->ev_hca_guid); 388 eibnx_handle_hca_attach(event->ev_hca_guid); 389 break; 390 391 case IBT_HCA_DETACH_EVENT: 392 ENX_DPRINTF_VERBOSE("ibt HCA_DETACH event received " 393 "(target hca_guid=0x%llx)", event->ev_hca_guid); 394 eibnx_handle_hca_detach(event->ev_hca_guid); 395 break; 396 397 default: 398 ENX_DPRINTF_VERBOSE("ibt UNSUPPORTED event 0x%x received " 399 "(hca_guid=0x%llx)", code, event->ev_hca_guid); 400 break; 401 } 402 } 403 404 boolean_t 405 eibnx_is_gw_dead(eibnx_gw_info_t *gwi) 406 { 407 int64_t cur_lbolt; 408 409 cur_lbolt = ddi_get_lbolt64(); 410 411 mutex_enter(&gwi->gw_adv_lock); 412 if ((cur_lbolt - gwi->gw_adv_last_lbolt) > gwi->gw_adv_timeout_ticks) { 413 gwi->gw_adv_flag = ENX_GW_DEAD; 414 mutex_exit(&gwi->gw_adv_lock); 415 return (B_TRUE); 416 } 417 mutex_exit(&gwi->gw_adv_lock); 418 419 return (B_FALSE); 420 } 421 422 static void 423 eibnx_gw_is_alive(eibnx_gw_info_t *gwi) 424 { 425 /* 426 * We've just received a multicast advertisement from this 427 * gateway. Multicast or unicast, this means that the gateway 428 * is alive. Record this timestamp (in ticks). 429 */ 430 mutex_enter(&gwi->gw_adv_lock); 431 gwi->gw_adv_last_lbolt = ddi_get_lbolt64(); 432 if (gwi->gw_adv_flag == ENX_GW_DEAD) { 433 gwi->gw_adv_flag = ENX_GW_ALIVE; 434 } 435 mutex_exit(&gwi->gw_adv_lock); 436 } 437 438 static void 439 eibnx_gw_is_aware(eibnx_thr_info_t *info, eibnx_gw_info_t *gwi, 440 boolean_t gwi_changed) 441 { 442 eib_gw_info_t eib_gwi; 443 boolean_t post_rebirth_event = B_FALSE; 444 445 /* 446 * We're here when we receive a unicast advertisement from a 447 * gateway. If this gateway was discovered earlier but was in 448 * a dead state, this means it has come back alive and become 449 * aware of us. We may need to inform any EoIB children 450 * waiting for notification. Note that if this gateway is 451 * being discovered for the first time now, we wouldn't have 452 * created the binding eoib node for it (we will do that when 453 * we return from this routine), so the "rebirth" and "gw info 454 * update" event postings will be NOPs. 455 */ 456 mutex_enter(&gwi->gw_adv_lock); 457 gwi->gw_adv_last_lbolt = ddi_get_lbolt64(); 458 if (gwi->gw_adv_flag != ENX_GW_AWARE) { 459 post_rebirth_event = B_TRUE; 460 } 461 gwi->gw_adv_flag = ENX_GW_AWARE; 462 mutex_exit(&gwi->gw_adv_lock); 463 464 /* 465 * If we have a gateway information update event, we post that 466 * first, so any rebirth event processed later will have the 467 * correct gateway information. 468 */ 469 if (gwi_changed) { 470 eib_gwi.gi_system_guid = gwi->gw_system_guid; 471 eib_gwi.gi_guid = gwi->gw_guid; 472 eib_gwi.gi_sn_prefix = gwi->gw_addr.ga_gid.gid_prefix; 473 eib_gwi.gi_adv_period = gwi->gw_adv_period; 474 eib_gwi.gi_ka_period = gwi->gw_ka_period; 475 eib_gwi.gi_vnic_ka_period = gwi->gw_vnic_ka_period; 476 eib_gwi.gi_ctrl_qpn = gwi->gw_ctrl_qpn; 477 eib_gwi.gi_lid = gwi->gw_lid; 478 eib_gwi.gi_portid = gwi->gw_portid; 479 eib_gwi.gi_num_net_vnics = gwi->gw_num_net_vnics; 480 eib_gwi.gi_flag_available = gwi->gw_flag_available; 481 eib_gwi.gi_is_host_adm_vnics = gwi->gw_is_host_adm_vnics; 482 eib_gwi.gi_sl = gwi->gw_sl; 483 eib_gwi.gi_n_rss_qpn = gwi->gw_n_rss_qpn; 484 bcopy(gwi->gw_system_name, eib_gwi.gi_system_name, 485 EIB_GW_SYSNAME_LEN); 486 bcopy(gwi->gw_port_name, eib_gwi.gi_port_name, 487 EIB_GW_PORTNAME_LEN); 488 bcopy(gwi->gw_vendor_id, eib_gwi.gi_vendor_id, 489 EIB_GW_VENDOR_LEN); 490 491 eibnx_handle_gw_info_update(info, eib_gwi.gi_portid, &eib_gwi); 492 } 493 if (post_rebirth_event) { 494 eibnx_handle_gw_rebirth(info, gwi->gw_portid); 495 } 496 } 497 498 /* 499 * Thread to create eoib nodes and online instances 500 */ 501 void 502 eibnx_create_eoib_node(void) 503 { 504 eibnx_t *ss = enx_global_ss; 505 eibnx_nodeq_t *node; 506 kmutex_t ci_lock; 507 callb_cpr_t ci; 508 509 mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); 510 CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, ENX_NODE_CREATOR); 511 512 wait_for_node_to_create: 513 mutex_enter(&ss->nx_nodeq_lock); 514 515 while ((ss->nx_nodeq == NULL) && (ss->nx_nodeq_thr_die == 0)) { 516 mutex_enter(&ci_lock); 517 CALLB_CPR_SAFE_BEGIN(&ci); 518 mutex_exit(&ci_lock); 519 520 cv_wait(&ss->nx_nodeq_cv, &ss->nx_nodeq_lock); 521 522 mutex_enter(&ci_lock); 523 CALLB_CPR_SAFE_END(&ci, &ci_lock); 524 mutex_exit(&ci_lock); 525 } 526 527 /* 528 * If this is not really a work item, but a request for us to 529 * die, throwaway all pending work requests and just die. 530 */ 531 if (ss->nx_nodeq_thr_die) { 532 while (ss->nx_nodeq) { 533 node = ss->nx_nodeq; 534 ss->nx_nodeq = node->nc_next; 535 node->nc_next = NULL; 536 537 kmem_free(node, sizeof (eibnx_nodeq_t)); 538 } 539 mutex_exit(&ss->nx_nodeq_lock); 540 541 mutex_enter(&ci_lock); 542 CALLB_CPR_EXIT(&ci); 543 mutex_destroy(&ci_lock); 544 545 return; 546 } 547 548 /* 549 * Grab the first node entry from the queue 550 */ 551 ASSERT(ss->nx_nodeq != NULL); 552 node = ss->nx_nodeq; 553 ss->nx_nodeq = node->nc_next; 554 node->nc_next = NULL; 555 556 mutex_exit(&ss->nx_nodeq_lock); 557 558 (void) eibnx_configure_node(node->nc_info, node->nc_gwi, NULL); 559 560 kmem_free(node, sizeof (eibnx_nodeq_t)); 561 goto wait_for_node_to_create; 562 563 /*NOTREACHED*/ 564 } 565 566 /* 567 * Tx and Rx completion interrupt handler. Guaranteed to be single 568 * threaded and nonreentrant for this CQ. 569 */ 570 void 571 eibnx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg) 572 { 573 eibnx_thr_info_t *info = arg; 574 575 if (info->ti_cq_hdl != cq_hdl) { 576 ENX_DPRINTF_DEBUG("eibnx_comp_intr: " 577 "cq_hdl(0x%llx) != info->ti_cq_hdl(0x%llx), " 578 "ignoring completion", cq_hdl, info->ti_cq_hdl); 579 return; 580 } 581 582 ASSERT(info->ti_softint_hdl != NULL); 583 584 (void) ddi_intr_trigger_softint(info->ti_softint_hdl, NULL); 585 } 586 587 /* 588 * Send and Receive completion handler functions for EoIB nexus 589 */ 590 591 /*ARGSUSED*/ 592 uint_t 593 eibnx_comp_handler(caddr_t arg1, caddr_t arg2) 594 { 595 eibnx_thr_info_t *info = (eibnx_thr_info_t *)arg1; 596 ibt_wc_t *wc; 597 eibnx_wqe_t *wqe; 598 ibt_status_t ret; 599 uint_t polled; 600 int i; 601 602 /* 603 * Make sure the port monitor isn't killed if we're in the completion 604 * handler. If the port monitor thread is already being killed, we'll 605 * stop processing completions. 606 */ 607 mutex_enter(&info->ti_event_lock); 608 if (info->ti_event & (ENX_EVENT_DIE | ENX_EVENT_COMPLETION)) { 609 mutex_exit(&info->ti_event_lock); 610 return ((uint_t)ENX_E_SUCCESS); 611 } 612 info->ti_event |= ENX_EVENT_COMPLETION; 613 mutex_exit(&info->ti_event_lock); 614 615 /* 616 * Re-arm the notification callback before we start polling 617 * the completion queue. There's nothing much we can do if the 618 * enable_cq_notify fails - we issue a warning and move on. 619 */ 620 ret = ibt_enable_cq_notify(info->ti_cq_hdl, IBT_NEXT_COMPLETION); 621 if (ret != IBT_SUCCESS) { 622 ENX_DPRINTF_WARN("ibt_enable_cq_notify(cq_hdl=0x%llx) " 623 "failed, ret=%d", info->ti_cq_hdl, ret); 624 } 625 626 /* 627 * Handle tx and rx completions 628 */ 629 while ((ret = ibt_poll_cq(info->ti_cq_hdl, info->ti_wc, info->ti_cq_sz, 630 &polled)) == IBT_SUCCESS) { 631 for (wc = info->ti_wc, i = 0; i < polled; i++, wc++) { 632 wqe = (eibnx_wqe_t *)(uintptr_t)wc->wc_id; 633 if (wc->wc_status != IBT_WC_SUCCESS) { 634 eibnx_handle_wcerr(wc->wc_status, wqe, info); 635 } else if (wqe->qe_type == ENX_QETYP_RWQE) { 636 eibnx_process_rx(info, wc, wqe); 637 eibnx_return_rwqe(info, wqe); 638 } else { 639 eibnx_return_swqe(wqe); 640 } 641 } 642 } 643 644 /* 645 * On the way out, make sure we wake up any pending death requestor 646 * for the port-monitor thread. Note that we need to do a cv_broadcast() 647 * here since there could be multiple threads sleeping on the event cv 648 * and we want to make sure all waiters get a chance to see if it's 649 * their turn. 650 */ 651 mutex_enter(&info->ti_event_lock); 652 info->ti_event &= (~ENX_EVENT_COMPLETION); 653 cv_broadcast(&info->ti_event_cv); 654 mutex_exit(&info->ti_event_lock); 655 656 return (DDI_INTR_CLAIMED); 657 } 658 659 /* 660 * Rx processing code 661 */ 662 static void 663 eibnx_process_rx(eibnx_thr_info_t *info, ibt_wc_t *wc, eibnx_wqe_t *wqe) 664 { 665 eibnx_gw_msg_t msg; 666 eibnx_gw_info_t *gwi; 667 eibnx_gw_info_t *orig_gwi; 668 eibnx_gw_info_t *new_gwi; 669 uint_t orig_gw_state; 670 uint8_t *pkt = (uint8_t *)(uintptr_t)(wqe->qe_sgl.ds_va); 671 boolean_t gwi_changed; 672 673 /* 674 * We'll simply drop any packet (including broadcast advertisements 675 * from gws) we receive before we've done our solicitation broadcast. 676 */ 677 if (info->ti_mcast_done == 0) { 678 return; 679 } 680 681 /* 682 * Skip the GRH and parse the message in the packet 683 */ 684 if (eibnx_fip_parse_pkt(pkt + ENX_GRH_SZ, &msg) != ENX_E_SUCCESS) { 685 return; 686 } 687 688 /* 689 * If it was a login ack for one of our children, we need to pass 690 * it on to the child 691 */ 692 if (msg.gm_type == FIP_VNIC_LOGIN_ACK) { 693 eibnx_handle_login_ack(info, pkt); 694 return; 695 } 696 697 /* 698 * Other than that, we only handle gateway advertisements 699 */ 700 if (msg.gm_type != FIP_GW_ADVERTISE_MCAST && 701 msg.gm_type != FIP_GW_ADVERTISE_UCAST) { 702 return; 703 } 704 705 gwi = &msg.u.gm_info; 706 707 /* 708 * State machine to create eoib instances. Whether this advertisement 709 * is from a new gateway or an old gateway that we already know about, 710 * if this was a unicast response to our earlier solicitation and it's 711 * the first time we're receiving it from this gateway, we're ready to 712 * login, so we create the EoIB instance for it. 713 */ 714 orig_gwi = eibnx_find_gw_in_gwlist(info, gwi); 715 if (orig_gwi == NULL) { 716 if (gwi->gw_flag_available == 0) { 717 gwi->gw_state = ENX_GW_STATE_UNAVAILABLE; 718 gwi->gw_adv_flag = ENX_GW_ALIVE; 719 (void) eibnx_add_gw_to_gwlist(info, gwi, wc, pkt); 720 } else if (gwi->gw_flag_ucast_advt == 0) { 721 gwi->gw_state = ENX_GW_STATE_AVAILABLE; 722 gwi->gw_adv_flag = ENX_GW_ALIVE; 723 (void) eibnx_add_gw_to_gwlist(info, gwi, wc, pkt); 724 } else { 725 gwi->gw_state = ENX_GW_STATE_READY_TO_LOGIN; 726 gwi->gw_adv_flag = ENX_GW_AWARE; 727 if ((new_gwi = eibnx_add_gw_to_gwlist(info, gwi, 728 wc, pkt)) != NULL) { 729 eibnx_queue_for_creation(info, new_gwi); 730 } 731 } 732 } else { 733 orig_gw_state = orig_gwi->gw_state; 734 if (gwi->gw_flag_available == 0) { 735 gwi->gw_state = ENX_GW_STATE_UNAVAILABLE; 736 eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi, 737 wc, pkt, NULL); 738 eibnx_gw_is_alive(orig_gwi); 739 740 } else if (gwi->gw_flag_ucast_advt == 0) { 741 if (orig_gw_state == ENX_GW_STATE_UNAVAILABLE) { 742 gwi->gw_state = ENX_GW_STATE_AVAILABLE; 743 } else { 744 gwi->gw_state = orig_gw_state; 745 } 746 eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi, 747 wc, pkt, NULL); 748 eibnx_gw_is_alive(orig_gwi); 749 750 } else { 751 gwi->gw_state = ENX_GW_STATE_READY_TO_LOGIN; 752 eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi, 753 wc, pkt, &gwi_changed); 754 eibnx_gw_is_aware(info, orig_gwi, gwi_changed); 755 756 if (orig_gw_state != ENX_GW_STATE_READY_TO_LOGIN) 757 eibnx_queue_for_creation(info, orig_gwi); 758 } 759 } 760 } 761 762 /*ARGSUSED*/ 763 static void 764 eibnx_handle_wcerr(uint8_t wcerr, eibnx_wqe_t *wqe, eibnx_thr_info_t *info) 765 { 766 /* 767 * Currently, all we do is report 768 */ 769 switch (wcerr) { 770 case IBT_WC_WR_FLUSHED_ERR: 771 ENX_DPRINTF_VERBOSE("IBT_WC_WR_FLUSHED_ERR seen " 772 "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)", 773 info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type); 774 break; 775 776 case IBT_WC_LOCAL_CHAN_OP_ERR: 777 ENX_DPRINTF_ERR("IBT_WC_LOCAL_CHAN_OP_ERR seen " 778 "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)", 779 info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type); 780 break; 781 782 case IBT_WC_LOCAL_PROTECT_ERR: 783 ENX_DPRINTF_ERR("IBT_WC_LOCAL_PROTECT_ERR seen " 784 "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)", 785 info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type); 786 break; 787 } 788 } 789 790 static void 791 eibnx_handle_login_ack(eibnx_thr_info_t *info, uint8_t *pkt) 792 { 793 eibnx_t *ss = enx_global_ss; 794 fip_login_ack_t *ack; 795 fip_desc_vnic_login_t *login; 796 ddi_eventcookie_t cookie; 797 dev_info_t *rdip; 798 uint16_t vnic_id; 799 uint16_t inst; 800 int ret; 801 802 /* 803 * When we get login acknowledgements, we simply invoke the 804 * appropriate EoIB driver callback to process it on behalf 805 * of the driver instance. We will let the callback do error 806 * checks. 807 */ 808 ack = (fip_login_ack_t *)(pkt + ENX_GRH_SZ); 809 login = &(ack->ak_vnic_login); 810 vnic_id = ntohs(login->vl_vnic_id); 811 inst = EIB_DEVI_INSTANCE(vnic_id); 812 813 if ((rdip = eibnx_find_child_dip_by_inst(info, inst)) == NULL) { 814 ENX_DPRINTF_DEBUG("no eoib child with instance 0x%x found " 815 "for (hca_guid=0x%llx, port_num=0x%x)", inst, 816 info->ti_hca_guid, info->ti_pi->p_port_num); 817 return; 818 } 819 820 ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip, 821 EIB_NDI_EVENT_LOGIN_ACK, &cookie, NDI_EVENT_NOPASS); 822 if (ret != NDI_SUCCESS) { 823 ENX_DPRINTF_WARN("no login-ack cookie for (hca_guid=0x%llx, " 824 "port_num=0x%x, eoib_inst=0x%x), ret=%d", info->ti_hca_guid, 825 info->ti_pi->p_port_num, inst, ret); 826 return; 827 } 828 829 (void) ndi_post_event(ss->nx_dip, rdip, cookie, (void *)pkt); 830 } 831 832 static void 833 eibnx_handle_gw_rebirth(eibnx_thr_info_t *info, uint16_t portid) 834 { 835 eibnx_t *ss = enx_global_ss; 836 ddi_eventcookie_t cookie; 837 dev_info_t *rdip; 838 int ret; 839 840 if ((rdip = eibnx_find_child_dip_by_gw(info, portid)) == NULL) { 841 ENX_DPRINTF_WARN("no eoib child bound to gw portid 0x%x " 842 "found for (hca_guid=0x%llx, port_num=0x%x)", 843 portid, info->ti_hca_guid, info->ti_pi->p_port_num); 844 return; 845 } 846 847 ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip, 848 EIB_NDI_EVENT_GW_AVAILABLE, &cookie, NDI_EVENT_NOPASS); 849 if (ret != NDI_SUCCESS) { 850 ENX_DPRINTF_WARN("no gw-available cookie for (hca_guid=0x%llx, " 851 "port_num=0x%x, gw_portid=0x%x), ret=%d", info->ti_hca_guid, 852 info->ti_pi->p_port_num, portid, ret); 853 return; 854 } 855 856 (void) ndi_post_event(ss->nx_dip, rdip, cookie, NULL); 857 } 858 859 static void 860 eibnx_handle_gw_info_update(eibnx_thr_info_t *info, uint16_t portid, 861 void *new_gw_info) 862 { 863 eibnx_t *ss = enx_global_ss; 864 ddi_eventcookie_t cookie; 865 dev_info_t *rdip; 866 int ret; 867 868 if ((rdip = eibnx_find_child_dip_by_gw(info, portid)) == NULL) { 869 ENX_DPRINTF_WARN("no eoib child bound to gw portid 0x%x " 870 "found for (hca_guid=0x%llx, port_num=0x%x)", 871 portid, info->ti_hca_guid, info->ti_pi->p_port_num); 872 return; 873 } 874 875 ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip, 876 EIB_NDI_EVENT_GW_INFO_UPDATE, &cookie, NDI_EVENT_NOPASS); 877 if (ret != NDI_SUCCESS) { 878 ENX_DPRINTF_WARN("no gw-info-update cookie for " 879 "(hca_guid=0x%llx, port_num=0x%x, gw_portid=0x%x), " 880 "ret=%d", info->ti_hca_guid, info->ti_pi->p_port_num, 881 portid, ret); 882 return; 883 } 884 885 (void) ndi_post_event(ss->nx_dip, rdip, cookie, new_gw_info); 886 } 887 888 static int 889 eibnx_replace_portinfo(eibnx_thr_info_t *ti, ibt_hca_portinfo_t *new_pi, 890 uint_t new_size_pi) 891 { 892 eibnx_t *ss = enx_global_ss; 893 eibnx_hca_t *hca; 894 eibnx_port_t *port; 895 896 mutex_enter(&ss->nx_lock); 897 898 for (hca = ss->nx_hca; hca; hca = hca->hc_next) { 899 if (hca->hc_hdl == ti->ti_hca) 900 break; 901 } 902 903 if (hca == NULL) { 904 ENX_DPRINTF_WARN("hca hdl (0x%llx) not found in hca list", 905 ti->ti_hca); 906 mutex_exit(&ss->nx_lock); 907 return (ENX_E_FAILURE); 908 } 909 910 for (port = hca->hc_port; port; port = port->po_next) { 911 if (port->po_pi == ti->ti_pi) { 912 ibt_free_portinfo(port->po_pi, port->po_pi_size); 913 port->po_pi = new_pi; 914 port->po_pi_size = new_size_pi; 915 ti->ti_pi = port->po_pi; 916 break; 917 } 918 } 919 920 if (port == NULL) { 921 ENX_DPRINTF_WARN("portinfo (0x%llx) not found in hca list", 922 ti->ti_pi); 923 mutex_exit(&ss->nx_lock); 924 return (ENX_E_FAILURE); 925 } 926 927 mutex_exit(&ss->nx_lock); 928 929 return (ENX_E_SUCCESS); 930 } 931 932 static void 933 eibnx_handle_port_events(ibt_hca_hdl_t ev_hca, uint8_t ev_portnum) 934 { 935 eibnx_t *ss = enx_global_ss; 936 eibnx_thr_info_t *ti; 937 ibt_hca_portinfo_t *pi; 938 ibt_status_t ret; 939 uint_t num_pi; 940 uint_t size_pi; 941 uint8_t itr; 942 943 /* 944 * Find the port monitor thread that matches the event hca and 945 * portnum 946 */ 947 mutex_enter(&ss->nx_lock); 948 for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) { 949 if ((ti->ti_hca == ev_hca) && 950 (ti->ti_pi->p_port_num == ev_portnum)) { 951 break; 952 } 953 } 954 mutex_exit(&ss->nx_lock); 955 956 if (ti == NULL) 957 return; 958 959 /* 960 * See if we need to rejoin the mcgs for this port and do so if true 961 */ 962 ret = ibt_query_hca_ports(ev_hca, ev_portnum, &pi, &num_pi, &size_pi); 963 if (ret != IBT_SUCCESS) { 964 ENX_DPRINTF_WARN("ibt_query_hca_ports() failed with %d", ret); 965 return; 966 } else if (num_pi != 1 || pi->p_linkstate != IBT_PORT_ACTIVE) { 967 ENX_DPRINTF_WARN("ibt_query_hca_ports(port_num=%d) failed, " 968 "num_pi=%d, linkstate=0x%x", ev_portnum, num_pi, 969 pi->p_linkstate); 970 ibt_free_portinfo(pi, size_pi); 971 return; 972 } 973 974 itr = pi->p_init_type_reply; 975 if (ENX_PORT_ATTR_LOADED(itr) && ENX_PORT_ATTR_NOT_PRESERVED(itr)) { 976 /* 977 * If our port's base lid has changed, we need to replace 978 * the saved portinfo in our lists with the new one before 979 * going further. 980 */ 981 if (ti->ti_pi->p_base_lid != pi->p_base_lid) { 982 if (eibnx_replace_portinfo(ti, pi, size_pi) == 983 ENX_E_SUCCESS) { 984 pi = NULL; 985 size_pi = 0; 986 } 987 } 988 } 989 990 /* 991 * If the port monitor was stuck waiting for the link to come up, 992 * let it know that it is up now. 993 */ 994 mutex_enter(&ti->ti_event_lock); 995 if ((ti->ti_progress & ENX_MON_LINKSTATE_UP) != ENX_MON_LINKSTATE_UP) { 996 ti->ti_pi->p_linkstate = IBT_PORT_ACTIVE; 997 ti->ti_event |= ENX_EVENT_LINK_UP; 998 cv_broadcast(&ti->ti_event_cv); 999 } 1000 mutex_exit(&ti->ti_event_lock); 1001 1002 if (ENX_PORT_PRES_NOT_PRESERVED(itr)) { 1003 if (ti->ti_progress & ENX_MON_JOINED_MCGS) 1004 (void) eibnx_rejoin_mcgs(ti); 1005 } 1006 1007 if (pi != NULL) 1008 ibt_free_portinfo(pi, size_pi); 1009 } 1010 1011 static void 1012 eibnx_handle_hca_attach(ib_guid_t new_hca_guid) 1013 { 1014 eibnx_t *ss = enx_global_ss; 1015 eibnx_thr_info_t *ti; 1016 eibnx_hca_t *hca; 1017 eibnx_port_t *port; 1018 1019 /* 1020 * All we need to do is to start a port monitor for all the ports 1021 * on the new HCA. To do this, go through our current port monitors 1022 * and see if we already have a monitor for this HCA - if so, print 1023 * a warning and return. 1024 */ 1025 mutex_enter(&ss->nx_lock); 1026 for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) { 1027 if (ti->ti_hca_guid == new_hca_guid) { 1028 ENX_DPRINTF_VERBOSE("hca (guid=0x%llx) already " 1029 "attached", new_hca_guid); 1030 mutex_exit(&ss->nx_lock); 1031 return; 1032 } 1033 } 1034 mutex_exit(&ss->nx_lock); 1035 1036 /* 1037 * If we don't have it in our list, process the HCA and start the 1038 * port monitors 1039 */ 1040 if ((hca = eibnx_prepare_hca(new_hca_guid)) != NULL) { 1041 mutex_enter(&ss->nx_lock); 1042 1043 hca->hc_next = ss->nx_hca; 1044 ss->nx_hca = hca; 1045 1046 for (port = hca->hc_port; port; port = port->po_next) { 1047 ti = eibnx_start_port_monitor(hca, port); 1048 1049 ti->ti_next = ss->nx_thr_info; 1050 ss->nx_thr_info = ti; 1051 } 1052 mutex_exit(&ss->nx_lock); 1053 } 1054 } 1055 1056 static void 1057 eibnx_handle_hca_detach(ib_guid_t del_hca_guid) 1058 { 1059 eibnx_t *ss = enx_global_ss; 1060 eibnx_thr_info_t *ti; 1061 eibnx_thr_info_t *ti_stop_list = NULL; 1062 eibnx_thr_info_t *ti_prev; 1063 eibnx_thr_info_t *ti_next; 1064 eibnx_hca_t *hca; 1065 eibnx_hca_t *hca_prev; 1066 1067 /* 1068 * We need to locate all monitor threads for this HCA and stop them 1069 */ 1070 mutex_enter(&ss->nx_lock); 1071 ti_prev = NULL; 1072 for (ti = ss->nx_thr_info; ti; ti = ti_next) { 1073 ti_next = ti->ti_next; 1074 1075 if (ti->ti_hca_guid != del_hca_guid) { 1076 ti_prev = ti; 1077 } else { 1078 /* 1079 * Take it out from the good list 1080 */ 1081 if (ti_prev) 1082 ti_prev->ti_next = ti_next; 1083 else 1084 ss->nx_thr_info = ti_next; 1085 1086 /* 1087 * And put it in the to-stop list 1088 */ 1089 ti->ti_next = ti_stop_list; 1090 ti_stop_list = ti; 1091 } 1092 } 1093 mutex_exit(&ss->nx_lock); 1094 1095 /* 1096 * Ask all the port_monitor threads to die. 1097 */ 1098 for (ti = ti_stop_list; ti; ti = ti_next) { 1099 ti_next = ti->ti_next; 1100 eibnx_stop_port_monitor(ti); 1101 } 1102 1103 /* 1104 * Now, locate the HCA in our list and release all HCA related 1105 * resources. 1106 */ 1107 mutex_enter(&ss->nx_lock); 1108 hca_prev = NULL; 1109 for (hca = ss->nx_hca; hca; hca = hca->hc_next) { 1110 if (hca->hc_guid != del_hca_guid) { 1111 hca_prev = hca; 1112 } else { 1113 if (hca_prev) { 1114 hca_prev->hc_next = hca->hc_next; 1115 } else { 1116 ss->nx_hca = hca->hc_next; 1117 } 1118 hca->hc_next = NULL; 1119 break; 1120 } 1121 } 1122 mutex_exit(&ss->nx_lock); 1123 1124 if (hca) { 1125 (void) eibnx_cleanup_hca(hca); 1126 } 1127 } 1128