/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ #include #include #include #include #include #include #include #include #include /* SM_INIT_TYPE_REPLY_... */ #include /* * Static function declarations */ static void eibnx_gw_is_alive(eibnx_gw_info_t *); static void eibnx_gw_is_aware(eibnx_thr_info_t *, eibnx_gw_info_t *, boolean_t); static void eibnx_process_rx(eibnx_thr_info_t *, ibt_wc_t *, eibnx_wqe_t *); static void eibnx_handle_wcerr(uint8_t, eibnx_wqe_t *, eibnx_thr_info_t *); static void eibnx_handle_login_ack(eibnx_thr_info_t *, uint8_t *); static void eibnx_handle_gw_rebirth(eibnx_thr_info_t *, uint16_t); static void eibnx_handle_gw_info_update(eibnx_thr_info_t *, uint16_t, void *); static int eibnx_replace_portinfo(eibnx_thr_info_t *, ibt_hca_portinfo_t *, uint_t); static void eibnx_handle_port_events(ibt_hca_hdl_t, uint8_t); static void eibnx_handle_hca_attach(ib_guid_t); static void eibnx_handle_hca_detach(ib_guid_t); /* * NDI event handle we need */ extern ndi_event_hdl_t enx_ndi_event_hdl; /* * SM's init type reply flags */ #define ENX_PORT_ATTR_LOADED(itr) \ (((itr) & SM_INIT_TYPE_REPLY_NO_LOAD_REPLY) == 0) #define ENX_PORT_ATTR_NOT_PRESERVED(itr) \ (((itr) & SM_INIT_TYPE_PRESERVE_CONTENT_REPLY) == 0) #define ENX_PORT_PRES_NOT_PRESERVED(itr) \ (((itr) & SM_INIT_TYPE_PRESERVE_PRESENCE_REPLY) == 0) /* * Port monitor progress flags (all flag values should be non-zero) */ #define ENX_MON_LINKSTATE_UP 0x01 #define ENX_MON_FOUND_MCGS 0x02 #define ENX_MON_SETUP_CQ 0x04 #define ENX_MON_SETUP_UD_CHAN 0x08 #define ENX_MON_SETUP_BUFS 0x10 #define ENX_MON_SETUP_CQ_HDLR 0x20 #define ENX_MON_JOINED_MCGS 0x40 #define ENX_MON_MULTICAST_SLCT 0x80 #define ENX_MON_MAX 0xFF /* * Per-port thread to solicit, monitor and discover EoIB gateways * and create the corresponding EoIB driver instances on the host. */ void eibnx_port_monitor(eibnx_thr_info_t *info) { clock_t solicit_period_ticks; clock_t deadline; kmutex_t ci_lock; callb_cpr_t ci; char thr_name[MAXNAMELEN]; (void) snprintf(thr_name, MAXNAMELEN, ENX_PORT_MONITOR, info->ti_pi->p_port_num); mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, thr_name); info->ti_progress = 0; /* * If the port is not active yet, wait for a port up event. The * async handler, when it sees a port-up event, is expected to * update the port_monitor's portinfo structure's p_linkstate * and wake us up with ENX_EVENT_LINK_UP. */ while (info->ti_pi->p_linkstate != IBT_PORT_ACTIVE) { mutex_enter(&info->ti_event_lock); while ((info->ti_event & (ENX_EVENT_LINK_UP | ENX_EVENT_DIE)) == 0) { mutex_enter(&ci_lock); CALLB_CPR_SAFE_BEGIN(&ci); mutex_exit(&ci_lock); cv_wait(&info->ti_event_cv, &info->ti_event_lock); mutex_enter(&ci_lock); CALLB_CPR_SAFE_END(&ci, &ci_lock); mutex_exit(&ci_lock); } if (info->ti_event & ENX_EVENT_DIE) { mutex_exit(&info->ti_event_lock); goto port_monitor_exit; } info->ti_event &= (~ENX_EVENT_LINK_UP); mutex_exit(&info->ti_event_lock); } info->ti_progress |= ENX_MON_LINKSTATE_UP; /* * Locate the multicast groups for sending solicit requests * to the GW and receiving advertisements from the GW. If * either of the mcg is not present, wait for them to be * created by the GW. */ while (eibnx_find_mgroups(info) != ENX_E_SUCCESS) { mutex_enter(&info->ti_event_lock); while ((info->ti_event & (ENX_EVENT_MCGS_AVAILABLE | ENX_EVENT_DIE)) == 0) { mutex_enter(&ci_lock); CALLB_CPR_SAFE_BEGIN(&ci); mutex_exit(&ci_lock); cv_wait(&info->ti_event_cv, &info->ti_event_lock); mutex_enter(&ci_lock); CALLB_CPR_SAFE_END(&ci, &ci_lock); mutex_exit(&ci_lock); } if (info->ti_event & ENX_EVENT_DIE) { mutex_exit(&info->ti_event_lock); goto port_monitor_exit; } info->ti_event &= (~ENX_EVENT_MCGS_AVAILABLE); mutex_exit(&info->ti_event_lock); } info->ti_progress |= ENX_MON_FOUND_MCGS; /* * Setup a shared CQ */ if (eibnx_setup_cq(info) != ENX_E_SUCCESS) { ENX_DPRINTF_ERR("eibnx_setup_cq() failed, terminating " "port monitor for (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); goto port_monitor_exit; } info->ti_progress |= ENX_MON_SETUP_CQ; /* * Setup UD channel */ if (eibnx_setup_ud_channel(info) != ENX_E_SUCCESS) { ENX_DPRINTF_ERR("eibnx_setup_ud_channel() failed, terminating " "port monitor for (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); goto port_monitor_exit; } info->ti_progress |= ENX_MON_SETUP_UD_CHAN; /* * Allocate/initialize any tx/rx buffers */ if (eibnx_setup_bufs(info) != ENX_E_SUCCESS) { ENX_DPRINTF_ERR("eibnx_setup_bufs() failed, terminating " "port monitor for (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); goto port_monitor_exit; } info->ti_progress |= ENX_MON_SETUP_BUFS; /* * Setup completion handler */ if (eibnx_setup_cq_handler(info) != ENX_E_SUCCESS) { ENX_DPRINTF_ERR("eibnx_setup_cq_handler() failed, terminating " "port monitor for (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); goto port_monitor_exit; } info->ti_progress |= ENX_MON_SETUP_CQ_HDLR; /* * Join EoIB multicast groups */ if (eibnx_join_mcgs(info) != ENX_E_SUCCESS) { ENX_DPRINTF_ERR("eibnx_join_mcgs() failed, terminating ", "port monitor for (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); goto port_monitor_exit; } info->ti_progress |= ENX_MON_JOINED_MCGS; /* * Send SOLICIT pkt to the EoIB multicast group */ if (eibnx_fip_solicit_mcast(info) != ENX_E_SUCCESS) { ENX_DPRINTF_ERR("eibnx_fip_solicit_mcast() failed, terminating " "port monitor for (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); goto port_monitor_exit; } info->ti_progress |= ENX_MON_MULTICAST_SLCT; mutex_enter(&info->ti_event_lock); solicit_period_ticks = drv_usectohz(ENX_DFL_SOLICIT_PERIOD_USEC); periodic_solicit: deadline = ddi_get_lbolt() + solicit_period_ticks; while ((info->ti_event & (ENX_EVENT_TIMED_OUT | ENX_EVENT_DIE)) == 0) { mutex_enter(&ci_lock); CALLB_CPR_SAFE_BEGIN(&ci); mutex_exit(&ci_lock); if (cv_timedwait(&info->ti_event_cv, &info->ti_event_lock, deadline) == -1) { info->ti_event |= ENX_EVENT_TIMED_OUT; } mutex_enter(&ci_lock); CALLB_CPR_SAFE_END(&ci, &ci_lock); mutex_exit(&ci_lock); } if (info->ti_event & ENX_EVENT_DIE) { mutex_exit(&info->ti_event_lock); goto port_monitor_exit; } if (info->ti_event & ENX_EVENT_TIMED_OUT) { if (eibnx_fip_solicit_ucast(info, &solicit_period_ticks) != ENX_E_SUCCESS) { ENX_DPRINTF_WARN("failed to send solicit ucast to " "gateways (hca_guid=0x%llx, port_num=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num); } info->ti_event &= ~ENX_EVENT_TIMED_OUT; } goto periodic_solicit; port_monitor_exit: if (info->ti_progress & ENX_MON_MULTICAST_SLCT) { eibnx_cleanup_port_nodes(info); info->ti_progress &= (~ENX_MON_MULTICAST_SLCT); } if (info->ti_progress & ENX_MON_JOINED_MCGS) { eibnx_rb_join_mcgs(info); info->ti_progress &= (~ENX_MON_JOINED_MCGS); } if (info->ti_progress & ENX_MON_SETUP_CQ_HDLR) { eibnx_rb_setup_cq_handler(info); info->ti_progress &= (~ENX_MON_SETUP_CQ_HDLR); } if (info->ti_progress & ENX_MON_SETUP_BUFS) { eibnx_rb_setup_bufs(info); info->ti_progress &= (~ENX_MON_SETUP_BUFS); } if (info->ti_progress & ENX_MON_SETUP_UD_CHAN) { eibnx_rb_setup_ud_channel(info); info->ti_progress &= (~ENX_MON_SETUP_UD_CHAN); } if (info->ti_progress & ENX_MON_SETUP_CQ) { eibnx_rb_setup_cq(info); info->ti_progress &= (~ENX_MON_SETUP_CQ); } if (info->ti_progress & ENX_MON_FOUND_MCGS) { eibnx_rb_find_mgroups(info); info->ti_progress &= (~ENX_MON_FOUND_MCGS); } mutex_enter(&ci_lock); CALLB_CPR_EXIT(&ci); mutex_destroy(&ci_lock); } /* * Async subnet notices handler registered with IBTF */ /*ARGSUSED*/ void eibnx_subnet_notices_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t sn_evcode, ibt_subnet_event_t *sn_event) { eibnx_t *ss = enx_global_ss; eibnx_thr_info_t *ti; ib_gid_t notice_gid; switch (sn_evcode) { case IBT_SM_EVENT_MCG_CREATED: notice_gid = sn_event->sm_notice_gid; if ((notice_gid.gid_prefix == enx_solicit_mgid.gid_prefix && notice_gid.gid_guid == enx_solicit_mgid.gid_guid) || (notice_gid.gid_prefix == enx_advertise_mgid.gid_prefix && notice_gid.gid_guid == enx_advertise_mgid.gid_guid)) { mutex_enter(&ss->nx_lock); for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) { mutex_enter(&ti->ti_event_lock); ti->ti_event |= ENX_EVENT_MCGS_AVAILABLE; cv_broadcast(&ti->ti_event_cv); mutex_exit(&ti->ti_event_lock); } mutex_exit(&ss->nx_lock); } break; case IBT_SM_EVENT_MCG_DELETED: break; default: break; } } /* * Async event handler registered with IBTF */ /*ARGSUSED*/ void eibnx_async_handler(void *clnt_pvt, ibt_hca_hdl_t hca, ibt_async_code_t code, ibt_async_event_t *event) { switch (code) { case IBT_ERROR_CATASTROPHIC_CHAN: case IBT_ERROR_INVALID_REQUEST_CHAN: case IBT_ERROR_ACCESS_VIOLATION_CHAN: case IBT_ERROR_CQ: case IBT_ERROR_CATASTROPHIC_SRQ: ENX_DPRINTF_ERR("ibt ERROR event 0x%x received " "(hca_guid=0x%llx)", code, event->ev_hca_guid); break; case IBT_ERROR_PORT_DOWN: ENX_DPRINTF_WARN("ibt PORT_DOWN event received " "(hca_guid=0x%llx, port_num=0x%x)", event->ev_hca_guid, event->ev_port); break; case IBT_EVENT_PORT_UP: ENX_DPRINTF_WARN("ibt PORT_UP event received " "(hca_guid=0x%llx, port_num=0x%x)", event->ev_hca_guid, event->ev_port); eibnx_handle_port_events(hca, event->ev_port); break; case IBT_PORT_CHANGE_EVENT: ENX_DPRINTF_WARN("ibt PORT_CHANGE event received " "(hca_guid=0x%llx, port_num=0x%x)", event->ev_hca_guid, event->ev_port); eibnx_handle_port_events(hca, event->ev_port); break; case IBT_CLNT_REREG_EVENT: ENX_DPRINTF_WARN("ibt CLNT_REREG event received " "(hca_guid=0x%llx, port_num=0x%x)", event->ev_hca_guid, event->ev_port); eibnx_handle_port_events(hca, event->ev_port); break; case IBT_HCA_ATTACH_EVENT: ENX_DPRINTF_VERBOSE("ibt HCA_ATTACH event received " "(new hca_guid=0x%llx)", event->ev_hca_guid); eibnx_handle_hca_attach(event->ev_hca_guid); break; case IBT_HCA_DETACH_EVENT: ENX_DPRINTF_VERBOSE("ibt HCA_DETACH event received " "(target hca_guid=0x%llx)", event->ev_hca_guid); eibnx_handle_hca_detach(event->ev_hca_guid); break; default: ENX_DPRINTF_VERBOSE("ibt UNSUPPORTED event 0x%x received " "(hca_guid=0x%llx)", code, event->ev_hca_guid); break; } } boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *gwi) { int64_t cur_lbolt; cur_lbolt = ddi_get_lbolt64(); mutex_enter(&gwi->gw_adv_lock); if ((cur_lbolt - gwi->gw_adv_last_lbolt) > gwi->gw_adv_timeout_ticks) { gwi->gw_adv_flag = ENX_GW_DEAD; mutex_exit(&gwi->gw_adv_lock); return (B_TRUE); } mutex_exit(&gwi->gw_adv_lock); return (B_FALSE); } static void eibnx_gw_is_alive(eibnx_gw_info_t *gwi) { /* * We've just received a multicast advertisement from this * gateway. Multicast or unicast, this means that the gateway * is alive. Record this timestamp (in ticks). */ mutex_enter(&gwi->gw_adv_lock); gwi->gw_adv_last_lbolt = ddi_get_lbolt64(); if (gwi->gw_adv_flag == ENX_GW_DEAD) { gwi->gw_adv_flag = ENX_GW_ALIVE; } mutex_exit(&gwi->gw_adv_lock); } static void eibnx_gw_is_aware(eibnx_thr_info_t *info, eibnx_gw_info_t *gwi, boolean_t gwi_changed) { eib_gw_info_t eib_gwi; boolean_t post_rebirth_event = B_FALSE; /* * We're here when we receive a unicast advertisement from a * gateway. If this gateway was discovered earlier but was in * a dead state, this means it has come back alive and become * aware of us. We may need to inform any EoIB children * waiting for notification. Note that if this gateway is * being discovered for the first time now, we wouldn't have * created the binding eoib node for it (we will do that when * we return from this routine), so the "rebirth" and "gw info * update" event postings will be NOPs. */ mutex_enter(&gwi->gw_adv_lock); gwi->gw_adv_last_lbolt = ddi_get_lbolt64(); if (gwi->gw_adv_flag != ENX_GW_AWARE) { post_rebirth_event = B_TRUE; } gwi->gw_adv_flag = ENX_GW_AWARE; mutex_exit(&gwi->gw_adv_lock); /* * If we have a gateway information update event, we post that * first, so any rebirth event processed later will have the * correct gateway information. */ if (gwi_changed) { eib_gwi.gi_system_guid = gwi->gw_system_guid; eib_gwi.gi_guid = gwi->gw_guid; eib_gwi.gi_sn_prefix = gwi->gw_addr.ga_gid.gid_prefix; eib_gwi.gi_adv_period = gwi->gw_adv_period; eib_gwi.gi_ka_period = gwi->gw_ka_period; eib_gwi.gi_vnic_ka_period = gwi->gw_vnic_ka_period; eib_gwi.gi_ctrl_qpn = gwi->gw_ctrl_qpn; eib_gwi.gi_lid = gwi->gw_lid; eib_gwi.gi_portid = gwi->gw_portid; eib_gwi.gi_num_net_vnics = gwi->gw_num_net_vnics; eib_gwi.gi_flag_available = gwi->gw_flag_available; eib_gwi.gi_is_host_adm_vnics = gwi->gw_is_host_adm_vnics; eib_gwi.gi_sl = gwi->gw_sl; eib_gwi.gi_n_rss_qpn = gwi->gw_n_rss_qpn; bcopy(gwi->gw_system_name, eib_gwi.gi_system_name, EIB_GW_SYSNAME_LEN); bcopy(gwi->gw_port_name, eib_gwi.gi_port_name, EIB_GW_PORTNAME_LEN); bcopy(gwi->gw_vendor_id, eib_gwi.gi_vendor_id, EIB_GW_VENDOR_LEN); eibnx_handle_gw_info_update(info, eib_gwi.gi_portid, &eib_gwi); } if (post_rebirth_event) { eibnx_handle_gw_rebirth(info, gwi->gw_portid); } } /* * Thread to create eoib nodes and online instances */ void eibnx_create_eoib_node(void) { eibnx_t *ss = enx_global_ss; eibnx_nodeq_t *node; kmutex_t ci_lock; callb_cpr_t ci; mutex_init(&ci_lock, NULL, MUTEX_DRIVER, NULL); CALLB_CPR_INIT(&ci, &ci_lock, callb_generic_cpr, ENX_NODE_CREATOR); wait_for_node_to_create: mutex_enter(&ss->nx_nodeq_lock); while ((ss->nx_nodeq == NULL) && (ss->nx_nodeq_thr_die == 0)) { mutex_enter(&ci_lock); CALLB_CPR_SAFE_BEGIN(&ci); mutex_exit(&ci_lock); cv_wait(&ss->nx_nodeq_cv, &ss->nx_nodeq_lock); mutex_enter(&ci_lock); CALLB_CPR_SAFE_END(&ci, &ci_lock); mutex_exit(&ci_lock); } /* * If this is not really a work item, but a request for us to * die, throwaway all pending work requests and just die. */ if (ss->nx_nodeq_thr_die) { while (ss->nx_nodeq) { node = ss->nx_nodeq; ss->nx_nodeq = node->nc_next; node->nc_next = NULL; kmem_free(node, sizeof (eibnx_nodeq_t)); } mutex_exit(&ss->nx_nodeq_lock); mutex_enter(&ci_lock); CALLB_CPR_EXIT(&ci); mutex_destroy(&ci_lock); return; } /* * Grab the first node entry from the queue */ ASSERT(ss->nx_nodeq != NULL); node = ss->nx_nodeq; ss->nx_nodeq = node->nc_next; node->nc_next = NULL; mutex_exit(&ss->nx_nodeq_lock); (void) eibnx_configure_node(node->nc_info, node->nc_gwi, NULL); kmem_free(node, sizeof (eibnx_nodeq_t)); goto wait_for_node_to_create; /*NOTREACHED*/ } /* * Tx and Rx completion interrupt handler. Guaranteed to be single * threaded and nonreentrant for this CQ. */ void eibnx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg) { eibnx_thr_info_t *info = arg; if (info->ti_cq_hdl != cq_hdl) { ENX_DPRINTF_DEBUG("eibnx_comp_intr: " "cq_hdl(0x%llx) != info->ti_cq_hdl(0x%llx), " "ignoring completion", cq_hdl, info->ti_cq_hdl); return; } ASSERT(info->ti_softint_hdl != NULL); (void) ddi_intr_trigger_softint(info->ti_softint_hdl, NULL); } /* * Send and Receive completion handler functions for EoIB nexus */ /*ARGSUSED*/ uint_t eibnx_comp_handler(caddr_t arg1, caddr_t arg2) { eibnx_thr_info_t *info = (eibnx_thr_info_t *)arg1; ibt_wc_t *wc; eibnx_wqe_t *wqe; ibt_status_t ret; uint_t polled; int i; /* * Make sure the port monitor isn't killed if we're in the completion * handler. If the port monitor thread is already being killed, we'll * stop processing completions. */ mutex_enter(&info->ti_event_lock); if (info->ti_event & (ENX_EVENT_DIE | ENX_EVENT_COMPLETION)) { mutex_exit(&info->ti_event_lock); return ((uint_t)ENX_E_SUCCESS); } info->ti_event |= ENX_EVENT_COMPLETION; mutex_exit(&info->ti_event_lock); /* * Re-arm the notification callback before we start polling * the completion queue. There's nothing much we can do if the * enable_cq_notify fails - we issue a warning and move on. */ ret = ibt_enable_cq_notify(info->ti_cq_hdl, IBT_NEXT_COMPLETION); if (ret != IBT_SUCCESS) { ENX_DPRINTF_WARN("ibt_enable_cq_notify(cq_hdl=0x%llx) " "failed, ret=%d", info->ti_cq_hdl, ret); } /* * Handle tx and rx completions */ while ((ret = ibt_poll_cq(info->ti_cq_hdl, info->ti_wc, info->ti_cq_sz, &polled)) == IBT_SUCCESS) { for (wc = info->ti_wc, i = 0; i < polled; i++, wc++) { wqe = (eibnx_wqe_t *)(uintptr_t)wc->wc_id; if (wc->wc_status != IBT_WC_SUCCESS) { eibnx_handle_wcerr(wc->wc_status, wqe, info); } else if (wqe->qe_type == ENX_QETYP_RWQE) { eibnx_process_rx(info, wc, wqe); eibnx_return_rwqe(info, wqe); } else { eibnx_return_swqe(wqe); } } } /* * On the way out, make sure we wake up any pending death requestor * for the port-monitor thread. Note that we need to do a cv_broadcast() * here since there could be multiple threads sleeping on the event cv * and we want to make sure all waiters get a chance to see if it's * their turn. */ mutex_enter(&info->ti_event_lock); info->ti_event &= (~ENX_EVENT_COMPLETION); cv_broadcast(&info->ti_event_cv); mutex_exit(&info->ti_event_lock); return (DDI_INTR_CLAIMED); } /* * Rx processing code */ static void eibnx_process_rx(eibnx_thr_info_t *info, ibt_wc_t *wc, eibnx_wqe_t *wqe) { eibnx_gw_msg_t msg; eibnx_gw_info_t *gwi; eibnx_gw_info_t *orig_gwi; eibnx_gw_info_t *new_gwi; uint_t orig_gw_state; uint8_t *pkt = (uint8_t *)(uintptr_t)(wqe->qe_sgl.ds_va); boolean_t gwi_changed; /* * We'll simply drop any packet (including broadcast advertisements * from gws) we receive before we've done our solicitation broadcast. */ if (info->ti_mcast_done == 0) { return; } /* * Skip the GRH and parse the message in the packet */ if (eibnx_fip_parse_pkt(pkt + ENX_GRH_SZ, &msg) != ENX_E_SUCCESS) { return; } /* * If it was a login ack for one of our children, we need to pass * it on to the child */ if (msg.gm_type == FIP_VNIC_LOGIN_ACK) { eibnx_handle_login_ack(info, pkt); return; } /* * Other than that, we only handle gateway advertisements */ if (msg.gm_type != FIP_GW_ADVERTISE_MCAST && msg.gm_type != FIP_GW_ADVERTISE_UCAST) { return; } gwi = &msg.u.gm_info; /* * State machine to create eoib instances. Whether this advertisement * is from a new gateway or an old gateway that we already know about, * if this was a unicast response to our earlier solicitation and it's * the first time we're receiving it from this gateway, we're ready to * login, so we create the EoIB instance for it. */ orig_gwi = eibnx_find_gw_in_gwlist(info, gwi); if (orig_gwi == NULL) { if (gwi->gw_flag_available == 0) { gwi->gw_state = ENX_GW_STATE_UNAVAILABLE; gwi->gw_adv_flag = ENX_GW_ALIVE; (void) eibnx_add_gw_to_gwlist(info, gwi, wc, pkt); } else if (gwi->gw_flag_ucast_advt == 0) { gwi->gw_state = ENX_GW_STATE_AVAILABLE; gwi->gw_adv_flag = ENX_GW_ALIVE; (void) eibnx_add_gw_to_gwlist(info, gwi, wc, pkt); } else { gwi->gw_state = ENX_GW_STATE_READY_TO_LOGIN; gwi->gw_adv_flag = ENX_GW_AWARE; if ((new_gwi = eibnx_add_gw_to_gwlist(info, gwi, wc, pkt)) != NULL) { eibnx_queue_for_creation(info, new_gwi); } } } else { orig_gw_state = orig_gwi->gw_state; if (gwi->gw_flag_available == 0) { gwi->gw_state = ENX_GW_STATE_UNAVAILABLE; eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi, wc, pkt, NULL); eibnx_gw_is_alive(orig_gwi); } else if (gwi->gw_flag_ucast_advt == 0) { if (orig_gw_state == ENX_GW_STATE_UNAVAILABLE) { gwi->gw_state = ENX_GW_STATE_AVAILABLE; } else { gwi->gw_state = orig_gw_state; } eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi, wc, pkt, NULL); eibnx_gw_is_alive(orig_gwi); } else { gwi->gw_state = ENX_GW_STATE_READY_TO_LOGIN; eibnx_replace_gw_in_gwlist(info, orig_gwi, gwi, wc, pkt, &gwi_changed); eibnx_gw_is_aware(info, orig_gwi, gwi_changed); if (orig_gw_state != ENX_GW_STATE_READY_TO_LOGIN) eibnx_queue_for_creation(info, orig_gwi); } } } /*ARGSUSED*/ static void eibnx_handle_wcerr(uint8_t wcerr, eibnx_wqe_t *wqe, eibnx_thr_info_t *info) { /* * Currently, all we do is report */ switch (wcerr) { case IBT_WC_WR_FLUSHED_ERR: ENX_DPRINTF_VERBOSE("IBT_WC_WR_FLUSHED_ERR seen " "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type); break; case IBT_WC_LOCAL_CHAN_OP_ERR: ENX_DPRINTF_ERR("IBT_WC_LOCAL_CHAN_OP_ERR seen " "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type); break; case IBT_WC_LOCAL_PROTECT_ERR: ENX_DPRINTF_ERR("IBT_WC_LOCAL_PROTECT_ERR seen " "(hca_guid=0x%llx, port_num=0x%x, wqe_type=0x%x)", info->ti_hca_guid, info->ti_pi->p_port_num, wqe->qe_type); break; } } static void eibnx_handle_login_ack(eibnx_thr_info_t *info, uint8_t *pkt) { eibnx_t *ss = enx_global_ss; fip_login_ack_t *ack; fip_desc_vnic_login_t *login; ddi_eventcookie_t cookie; dev_info_t *rdip; uint16_t vnic_id; uint16_t inst; int ret; /* * When we get login acknowledgements, we simply invoke the * appropriate EoIB driver callback to process it on behalf * of the driver instance. We will let the callback do error * checks. */ ack = (fip_login_ack_t *)(pkt + ENX_GRH_SZ); login = &(ack->ak_vnic_login); vnic_id = ntohs(login->vl_vnic_id); inst = EIB_DEVI_INSTANCE(vnic_id); if ((rdip = eibnx_find_child_dip_by_inst(info, inst)) == NULL) { ENX_DPRINTF_DEBUG("no eoib child with instance 0x%x found " "for (hca_guid=0x%llx, port_num=0x%x)", inst, info->ti_hca_guid, info->ti_pi->p_port_num); return; } ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip, EIB_NDI_EVENT_LOGIN_ACK, &cookie, NDI_EVENT_NOPASS); if (ret != NDI_SUCCESS) { ENX_DPRINTF_WARN("no login-ack cookie for (hca_guid=0x%llx, " "port_num=0x%x, eoib_inst=0x%x), ret=%d", info->ti_hca_guid, info->ti_pi->p_port_num, inst, ret); return; } (void) ndi_post_event(ss->nx_dip, rdip, cookie, (void *)pkt); } static void eibnx_handle_gw_rebirth(eibnx_thr_info_t *info, uint16_t portid) { eibnx_t *ss = enx_global_ss; ddi_eventcookie_t cookie; dev_info_t *rdip; int ret; if ((rdip = eibnx_find_child_dip_by_gw(info, portid)) == NULL) { ENX_DPRINTF_WARN("no eoib child bound to gw portid 0x%x " "found for (hca_guid=0x%llx, port_num=0x%x)", portid, info->ti_hca_guid, info->ti_pi->p_port_num); return; } ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip, EIB_NDI_EVENT_GW_AVAILABLE, &cookie, NDI_EVENT_NOPASS); if (ret != NDI_SUCCESS) { ENX_DPRINTF_WARN("no gw-available cookie for (hca_guid=0x%llx, " "port_num=0x%x, gw_portid=0x%x), ret=%d", info->ti_hca_guid, info->ti_pi->p_port_num, portid, ret); return; } (void) ndi_post_event(ss->nx_dip, rdip, cookie, NULL); } static void eibnx_handle_gw_info_update(eibnx_thr_info_t *info, uint16_t portid, void *new_gw_info) { eibnx_t *ss = enx_global_ss; ddi_eventcookie_t cookie; dev_info_t *rdip; int ret; if ((rdip = eibnx_find_child_dip_by_gw(info, portid)) == NULL) { ENX_DPRINTF_WARN("no eoib child bound to gw portid 0x%x " "found for (hca_guid=0x%llx, port_num=0x%x)", portid, info->ti_hca_guid, info->ti_pi->p_port_num); return; } ret = ndi_event_retrieve_cookie(enx_ndi_event_hdl, rdip, EIB_NDI_EVENT_GW_INFO_UPDATE, &cookie, NDI_EVENT_NOPASS); if (ret != NDI_SUCCESS) { ENX_DPRINTF_WARN("no gw-info-update cookie for " "(hca_guid=0x%llx, port_num=0x%x, gw_portid=0x%x), " "ret=%d", info->ti_hca_guid, info->ti_pi->p_port_num, portid, ret); return; } (void) ndi_post_event(ss->nx_dip, rdip, cookie, new_gw_info); } static int eibnx_replace_portinfo(eibnx_thr_info_t *ti, ibt_hca_portinfo_t *new_pi, uint_t new_size_pi) { eibnx_t *ss = enx_global_ss; eibnx_hca_t *hca; eibnx_port_t *port; mutex_enter(&ss->nx_lock); for (hca = ss->nx_hca; hca; hca = hca->hc_next) { if (hca->hc_hdl == ti->ti_hca) break; } if (hca == NULL) { ENX_DPRINTF_WARN("hca hdl (0x%llx) not found in hca list", ti->ti_hca); mutex_exit(&ss->nx_lock); return (ENX_E_FAILURE); } for (port = hca->hc_port; port; port = port->po_next) { if (port->po_pi == ti->ti_pi) { ibt_free_portinfo(port->po_pi, port->po_pi_size); port->po_pi = new_pi; port->po_pi_size = new_size_pi; ti->ti_pi = port->po_pi; break; } } if (port == NULL) { ENX_DPRINTF_WARN("portinfo (0x%llx) not found in hca list", ti->ti_pi); mutex_exit(&ss->nx_lock); return (ENX_E_FAILURE); } mutex_exit(&ss->nx_lock); return (ENX_E_SUCCESS); } static void eibnx_handle_port_events(ibt_hca_hdl_t ev_hca, uint8_t ev_portnum) { eibnx_t *ss = enx_global_ss; eibnx_thr_info_t *ti; ibt_hca_portinfo_t *pi; ibt_status_t ret; uint_t num_pi; uint_t size_pi; uint8_t itr; /* * Find the port monitor thread that matches the event hca and * portnum */ mutex_enter(&ss->nx_lock); for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) { if ((ti->ti_hca == ev_hca) && (ti->ti_pi->p_port_num == ev_portnum)) { break; } } mutex_exit(&ss->nx_lock); if (ti == NULL) return; /* * See if we need to rejoin the mcgs for this port and do so if true */ ret = ibt_query_hca_ports(ev_hca, ev_portnum, &pi, &num_pi, &size_pi); if (ret != IBT_SUCCESS) { ENX_DPRINTF_WARN("ibt_query_hca_ports() failed with %d", ret); return; } else if (num_pi != 1 || pi->p_linkstate != IBT_PORT_ACTIVE) { ENX_DPRINTF_WARN("ibt_query_hca_ports(port_num=%d) failed, " "num_pi=%d, linkstate=0x%x", ev_portnum, num_pi, pi->p_linkstate); ibt_free_portinfo(pi, size_pi); return; } itr = pi->p_init_type_reply; if (ENX_PORT_ATTR_LOADED(itr) && ENX_PORT_ATTR_NOT_PRESERVED(itr)) { /* * If our port's base lid has changed, we need to replace * the saved portinfo in our lists with the new one before * going further. */ if (ti->ti_pi->p_base_lid != pi->p_base_lid) { if (eibnx_replace_portinfo(ti, pi, size_pi) == ENX_E_SUCCESS) { pi = NULL; size_pi = 0; } } } /* * If the port monitor was stuck waiting for the link to come up, * let it know that it is up now. */ mutex_enter(&ti->ti_event_lock); if ((ti->ti_progress & ENX_MON_LINKSTATE_UP) != ENX_MON_LINKSTATE_UP) { ti->ti_pi->p_linkstate = IBT_PORT_ACTIVE; ti->ti_event |= ENX_EVENT_LINK_UP; cv_broadcast(&ti->ti_event_cv); } mutex_exit(&ti->ti_event_lock); if (ENX_PORT_PRES_NOT_PRESERVED(itr)) { if (ti->ti_progress & ENX_MON_JOINED_MCGS) (void) eibnx_rejoin_mcgs(ti); } if (pi != NULL) ibt_free_portinfo(pi, size_pi); } static void eibnx_handle_hca_attach(ib_guid_t new_hca_guid) { eibnx_t *ss = enx_global_ss; eibnx_thr_info_t *ti; eibnx_hca_t *hca; eibnx_port_t *port; /* * All we need to do is to start a port monitor for all the ports * on the new HCA. To do this, go through our current port monitors * and see if we already have a monitor for this HCA - if so, print * a warning and return. */ mutex_enter(&ss->nx_lock); for (ti = ss->nx_thr_info; ti; ti = ti->ti_next) { if (ti->ti_hca_guid == new_hca_guid) { ENX_DPRINTF_VERBOSE("hca (guid=0x%llx) already " "attached", new_hca_guid); mutex_exit(&ss->nx_lock); return; } } mutex_exit(&ss->nx_lock); /* * If we don't have it in our list, process the HCA and start the * port monitors */ if ((hca = eibnx_prepare_hca(new_hca_guid)) != NULL) { mutex_enter(&ss->nx_lock); hca->hc_next = ss->nx_hca; ss->nx_hca = hca; for (port = hca->hc_port; port; port = port->po_next) { ti = eibnx_start_port_monitor(hca, port); ti->ti_next = ss->nx_thr_info; ss->nx_thr_info = ti; } mutex_exit(&ss->nx_lock); } } static void eibnx_handle_hca_detach(ib_guid_t del_hca_guid) { eibnx_t *ss = enx_global_ss; eibnx_thr_info_t *ti; eibnx_thr_info_t *ti_stop_list = NULL; eibnx_thr_info_t *ti_prev; eibnx_thr_info_t *ti_next; eibnx_hca_t *hca; eibnx_hca_t *hca_prev; /* * We need to locate all monitor threads for this HCA and stop them */ mutex_enter(&ss->nx_lock); ti_prev = NULL; for (ti = ss->nx_thr_info; ti; ti = ti_next) { ti_next = ti->ti_next; if (ti->ti_hca_guid != del_hca_guid) { ti_prev = ti; } else { /* * Take it out from the good list */ if (ti_prev) ti_prev->ti_next = ti_next; else ss->nx_thr_info = ti_next; /* * And put it in the to-stop list */ ti->ti_next = ti_stop_list; ti_stop_list = ti; } } mutex_exit(&ss->nx_lock); /* * Ask all the port_monitor threads to die. */ for (ti = ti_stop_list; ti; ti = ti_next) { ti_next = ti->ti_next; eibnx_stop_port_monitor(ti); } /* * Now, locate the HCA in our list and release all HCA related * resources. */ mutex_enter(&ss->nx_lock); hca_prev = NULL; for (hca = ss->nx_hca; hca; hca = hca->hc_next) { if (hca->hc_guid != del_hca_guid) { hca_prev = hca; } else { if (hca_prev) { hca_prev->hc_next = hca->hc_next; } else { ss->nx_hca = hca->hc_next; } hca->hc_next = NULL; break; } } mutex_exit(&ss->nx_lock); if (hca) { (void) eibnx_cleanup_hca(hca); } }