/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include /* * ibtl_handlers.c */ /* * What's in this file? * * This file started as an implementation of Asynchronous Event/Error * handling and Completion Queue handling. As the implementation * evolved, code has been added for other ibc_* interfaces (resume, * predetach, etc.) that use the same mechanisms as used for asyncs. * * Async and CQ handling at interrupt level. * * CQ handling is normally done at interrupt level using the CQ callback * handler to call the appropriate IBT Client (owner of the CQ). For * clients that would prefer a fully flexible non-interrupt context to * do their CQ handling, a CQ can be created so that its handler is * called from a non-interrupt thread. CQ handling is done frequently * whereas Async handling is expected to occur very infrequently. * * Async handling is done by marking (or'ing in of an async_code of) the * pertinent IBTL data structure, and then notifying the async_thread(s) * that the data structure has async work to be done. The notification * occurs by linking the data structure through its async_link onto a * list of like data structures and waking up an async_thread. This * list append is not done if there is already async work pending on * this data structure (IBTL_ASYNC_PENDING). * * Async Mutex and CQ Mutex * * The global ibtl_async_mutex is "the" mutex used to control access * to all the data needed by ibc_async_handler. All the threads that * use this mutex are written so that the mutex is held for very short * periods of time, and never held while making calls to functions * that may block. * * The global ibtl_cq_mutex is used similarly by ibc_cq_handler and * the ibtl_cq_thread(s). * * Mutex hierarchy * * The ibtl_clnt_list_mutex is above the ibtl_async_mutex. * ibtl_clnt_list_mutex protects all of the various lists. * The ibtl_async_mutex is below this in the hierarchy. * * The ibtl_cq_mutex is independent of the above mutexes. * * Threads * * There are "ibtl_cq_threads" number of threads created for handling * Completion Queues in threads. If this feature really gets used, * then we will want to do some suitable tuning. Similarly, we may * want to tune the number of "ibtl_async_thread_init". * * The function ibtl_cq_thread is the main loop for handling a CQ in a * thread. There can be multiple threads executing this same code. * The code sleeps when there is no work to be done (list is empty), * otherwise it pulls the first CQ structure off the list and performs * the CQ handler callback to the client. After that returns, a check * is made, and if another ibc_cq_handler call was made for this CQ, * the client is called again. * * The function ibtl_async_thread is the main loop for handling async * events/errors. There can be multiple threads executing this same code. * The code sleeps when there is no work to be done (lists are empty), * otherwise it pulls the first structure off one of the lists and * performs the async callback(s) to the client(s). Note that HCA * async handling is done by calling each of the clients using the HCA. * When the async handling completes, the data structure having the async * event/error is checked for more work before it's considered "done". * * Taskq * * The async_taskq is used here for allowing async handler callbacks to * occur simultaneously to multiple clients of an HCA. This taskq could * be used for other purposes, e.g., if all the async_threads are in * use, but this is deemed as overkill since asyncs should occur rarely. */ /* Globals */ static char ibtf_handlers[] = "ibtl_handlers"; /* priority for IBTL threads (async, cq, and taskq) */ static pri_t ibtl_pri = MAXCLSYSPRI - 1; /* maybe override in /etc/system */ /* taskq used for HCA asyncs */ #define ibtl_async_taskq system_taskq /* data for async handling by threads */ static kmutex_t ibtl_async_mutex; /* protects most *_async_* data */ static kcondvar_t ibtl_async_cv; /* async_threads wait on this */ static kcondvar_t ibtl_clnt_cv; /* ibt_detach might wait on this */ static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp); static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp); static kt_did_t *ibtl_async_did; /* for thread_join() */ int ibtl_async_thread_init = 4; /* total # of async_threads to create */ static int ibtl_async_thread_exit = 0; /* set if/when thread(s) should exit */ /* async lists for various structures */ static ibtl_hca_devinfo_t *ibtl_async_hca_list_start, *ibtl_async_hca_list_end; static ibtl_eec_t *ibtl_async_eec_list_start, *ibtl_async_eec_list_end; static ibtl_qp_t *ibtl_async_qp_list_start, *ibtl_async_qp_list_end; static ibtl_cq_t *ibtl_async_cq_list_start, *ibtl_async_cq_list_end; static ibtl_srq_t *ibtl_async_srq_list_start, *ibtl_async_srq_list_end; /* data for CQ completion handling by threads */ static kmutex_t ibtl_cq_mutex; /* protects the cv and the list below */ static kcondvar_t ibtl_cq_cv; static ibtl_cq_t *ibtl_cq_list_start, *ibtl_cq_list_end; static int ibtl_cq_threads = 0; /* total # of cq threads */ static int ibtl_cqs_using_threads = 0; /* total # of cqs using threads */ static int ibtl_cq_thread_exit = 0; /* set if/when thread(s) should exit */ /* value used to tell IBTL threads to exit */ #define IBTL_THREAD_EXIT 0x1b7fdead /* IBTF DEAD */ int ibtl_eec_not_supported = 1; char *ibtl_last_client_name; /* may help debugging */ _NOTE(LOCK_ORDER(ibtl_clnt_list_mutex ibtl_async_mutex)) /* * ibc_async_handler() * * Asynchronous Event/Error Handler. * * This is the function called HCA drivers to post various async * event and errors mention in the IB architecture spec. See * ibtl_types.h for additional details of this. * * This function marks the pertinent IBTF object with the async_code, * and queues the object for handling by an ibtl_async_thread. If * the object is NOT already marked for async processing, it is added * to the associated list for that type of object, and an * ibtl_async_thread is signaled to finish the async work. */ void ibc_async_handler(ibc_clnt_hdl_t hca_devp, ibt_async_code_t code, ibc_async_event_t *event_p) { ibtl_qp_t *ibtl_qp; ibtl_cq_t *ibtl_cq; ibtl_srq_t *ibtl_srq; ibtl_eec_t *ibtl_eec; uint8_t port_minus1; IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler(%p, 0x%x, %p)", hca_devp, code, event_p); mutex_enter(&ibtl_async_mutex); switch (code) { case IBT_EVENT_PATH_MIGRATED_QP: case IBT_EVENT_SQD: case IBT_ERROR_CATASTROPHIC_QP: case IBT_ERROR_PATH_MIGRATE_REQ_QP: case IBT_EVENT_COM_EST_QP: case IBT_ERROR_INVALID_REQUEST_QP: case IBT_ERROR_ACCESS_VIOLATION_QP: case IBT_EVENT_EMPTY_QP: ibtl_qp = event_p->ev_qp_hdl; if (ibtl_qp == NULL) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: " "bad qp handle"); break; } switch (code) { case IBT_ERROR_CATASTROPHIC_QP: ibtl_qp->qp_cat_fma_ena = event_p->ev_fma_ena; break; case IBT_ERROR_PATH_MIGRATE_REQ_QP: ibtl_qp->qp_pth_fma_ena = event_p->ev_fma_ena; break; case IBT_ERROR_INVALID_REQUEST_QP: ibtl_qp->qp_inv_fma_ena = event_p->ev_fma_ena; break; case IBT_ERROR_ACCESS_VIOLATION_QP: ibtl_qp->qp_acc_fma_ena = event_p->ev_fma_ena; break; } ibtl_qp->qp_async_codes |= code; if ((ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) == 0) { ibtl_qp->qp_async_flags |= IBTL_ASYNC_PENDING; ibtl_qp->qp_async_link = NULL; if (ibtl_async_qp_list_end == NULL) ibtl_async_qp_list_start = ibtl_qp; else ibtl_async_qp_list_end->qp_async_link = ibtl_qp; ibtl_async_qp_list_end = ibtl_qp; cv_signal(&ibtl_async_cv); } break; case IBT_ERROR_CQ: ibtl_cq = event_p->ev_cq_hdl; if (ibtl_cq == NULL) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: " "bad cq handle"); break; } ibtl_cq->cq_async_codes |= code; ibtl_cq->cq_fma_ena = event_p->ev_fma_ena; if ((ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) == 0) { ibtl_cq->cq_async_flags |= IBTL_ASYNC_PENDING; ibtl_cq->cq_async_link = NULL; if (ibtl_async_cq_list_end == NULL) ibtl_async_cq_list_start = ibtl_cq; else ibtl_async_cq_list_end->cq_async_link = ibtl_cq; ibtl_async_cq_list_end = ibtl_cq; cv_signal(&ibtl_async_cv); } break; case IBT_ERROR_CATASTROPHIC_SRQ: case IBT_EVENT_LIMIT_REACHED_SRQ: ibtl_srq = event_p->ev_srq_hdl; if (ibtl_srq == NULL) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: " "bad srq handle"); break; } ibtl_srq->srq_async_codes |= code; ibtl_srq->srq_fma_ena = event_p->ev_fma_ena; if ((ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) == 0) { ibtl_srq->srq_async_flags |= IBTL_ASYNC_PENDING; ibtl_srq->srq_async_link = NULL; if (ibtl_async_srq_list_end == NULL) ibtl_async_srq_list_start = ibtl_srq; else ibtl_async_srq_list_end->srq_async_link = ibtl_srq; ibtl_async_srq_list_end = ibtl_srq; cv_signal(&ibtl_async_cv); } break; case IBT_EVENT_PATH_MIGRATED_EEC: case IBT_ERROR_PATH_MIGRATE_REQ_EEC: case IBT_ERROR_CATASTROPHIC_EEC: case IBT_EVENT_COM_EST_EEC: if (ibtl_eec_not_supported) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: " "EEC events are disabled."); break; } ibtl_eec = event_p->ev_eec_hdl; if (ibtl_eec == NULL) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: " "bad eec handle"); break; } switch (code) { case IBT_ERROR_PATH_MIGRATE_REQ_EEC: ibtl_eec->eec_pth_fma_ena = event_p->ev_fma_ena; break; case IBT_ERROR_CATASTROPHIC_EEC: ibtl_eec->eec_cat_fma_ena = event_p->ev_fma_ena; break; } ibtl_eec->eec_async_codes |= code; if ((ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) == 0) { ibtl_eec->eec_async_flags |= IBTL_ASYNC_PENDING; ibtl_eec->eec_async_link = NULL; if (ibtl_async_eec_list_end == NULL) ibtl_async_eec_list_start = ibtl_eec; else ibtl_async_eec_list_end->eec_async_link = ibtl_eec; ibtl_async_eec_list_end = ibtl_eec; cv_signal(&ibtl_async_cv); } break; case IBT_ERROR_LOCAL_CATASTROPHIC: hca_devp->hd_async_codes |= code; hca_devp->hd_fma_ena = event_p->ev_fma_ena; /* FALLTHROUGH */ case IBT_EVENT_PORT_UP: case IBT_ERROR_PORT_DOWN: if ((code == IBT_EVENT_PORT_UP) || (code == IBT_ERROR_PORT_DOWN)) { if ((port_minus1 = event_p->ev_port - 1) >= hca_devp->hd_hca_attr->hca_nports) { IBTF_DPRINTF_L2(ibtf_handlers, "ibc_async_handler: bad port #: %d", event_p->ev_port); break; } hca_devp->hd_async_port[port_minus1] = ((code == IBT_EVENT_PORT_UP) ? IBTL_HCA_PORT_UP : IBTL_HCA_PORT_DOWN) | IBTL_HCA_PORT_CHANGED; hca_devp->hd_async_codes |= code; } if ((hca_devp->hd_async_flags & IBTL_ASYNC_PENDING) == 0) { hca_devp->hd_async_flags |= IBTL_ASYNC_PENDING; hca_devp->hd_async_link = NULL; if (ibtl_async_hca_list_end == NULL) ibtl_async_hca_list_start = hca_devp; else ibtl_async_hca_list_end->hd_async_link = hca_devp; ibtl_async_hca_list_end = hca_devp; cv_signal(&ibtl_async_cv); } break; default: IBTF_DPRINTF_L1(ibtf_handlers, "ibc_async_handler: " "invalid code (0x%x)", code); } mutex_exit(&ibtl_async_mutex); } /* Finally, make the async call to the client. */ static void ibtl_async_client_call(ibtl_hca_t *ibt_hca, ibt_async_code_t code, ibt_async_event_t *event_p) { ibtl_clnt_t *clntp; void *client_private; ibt_async_handler_t async_handler; char *client_name; IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call(%p, 0x%x, %p)", ibt_hca, code, event_p); clntp = ibt_hca->ha_clnt_devp; _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_last_client_name)) /* Record who is being called (just a debugging aid) */ ibtl_last_client_name = client_name = clntp->clnt_name; _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_last_client_name)) client_private = clntp->clnt_private; async_handler = clntp->clnt_modinfop->mi_async_handler; if (code & (IBT_EVENT_COM_EST_QP | IBT_EVENT_COM_EST_EEC)) { mutex_enter(&ibtl_clnt_list_mutex); async_handler = ibtl_cm_async_handler; client_private = ibtl_cm_clnt_private; mutex_exit(&ibtl_clnt_list_mutex); ibt_hca = NULL; IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: " "calling CM for COM_EST"); } else { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: " "calling client '%s'", client_name); } if (async_handler != NULL) async_handler(client_private, ibt_hca, code, event_p); else IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_async_client_call: " "client '%s' has no async handler", client_name); } /* * Inform CM or DM about HCA events. * * We use taskqs to allow simultaneous notification, with sleeping. * Since taskqs only allow one argument, we define a structure * because we need to pass in more than one argument. */ struct ibtl_mgr_s { ibtl_hca_devinfo_t *mgr_hca_devp; ibt_async_handler_t mgr_async_handler; void *mgr_clnt_private; }; /* * Asyncs of HCA level events for CM and DM. Call CM or DM and tell them * about the HCA for the event recorded in the ibtl_hca_devinfo_t. */ static void ibtl_do_mgr_async_task(void *arg) { struct ibtl_mgr_s *mgrp = (struct ibtl_mgr_s *)arg; ibtl_hca_devinfo_t *hca_devp = mgrp->mgr_hca_devp; IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_mgr_async_task(0x%x)", hca_devp->hd_async_code); mgrp->mgr_async_handler(mgrp->mgr_clnt_private, NULL, hca_devp->hd_async_code, &hca_devp->hd_async_event); kmem_free(mgrp, sizeof (*mgrp)); mutex_enter(&ibtl_clnt_list_mutex); if (--hca_devp->hd_async_task_cnt == 0) cv_signal(&hca_devp->hd_async_task_cv); mutex_exit(&ibtl_clnt_list_mutex); } static void ibtl_tell_mgr(ibtl_hca_devinfo_t *hca_devp, ibt_async_handler_t async_handler, void *clnt_private) { struct ibtl_mgr_s *mgrp; if (async_handler == NULL) return; _NOTE(NO_COMPETING_THREADS_NOW) mgrp = kmem_alloc(sizeof (*mgrp), KM_SLEEP); mgrp->mgr_hca_devp = hca_devp; mgrp->mgr_async_handler = async_handler; mgrp->mgr_clnt_private = clnt_private; hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_do_mgr_async_task, mgrp, TQ_SLEEP); #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif } /* * Per client-device asyncs for HCA level events. Call each client that is * using the HCA for the event recorded in the ibtl_hca_devinfo_t. */ static void ibtl_hca_client_async_task(void *arg) { ibtl_hca_t *ibt_hca = (ibtl_hca_t *)arg; ibtl_hca_devinfo_t *hca_devp = ibt_hca->ha_hca_devp; ibtl_clnt_t *clntp = ibt_hca->ha_clnt_devp; ibt_async_event_t async_event; IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_hca_client_async_task(%p, 0x%x)", ibt_hca, hca_devp->hd_async_code); bcopy(&hca_devp->hd_async_event, &async_event, sizeof (async_event)); ibtl_async_client_call(ibt_hca, hca_devp->hd_async_code, &async_event); mutex_enter(&ibtl_async_mutex); if (--ibt_hca->ha_async_cnt == 0 && (ibt_hca->ha_async_flags & IBTL_ASYNC_FREE_OBJECT)) { mutex_exit(&ibtl_async_mutex); kmem_free(ibt_hca, sizeof (ibtl_hca_t)); } else mutex_exit(&ibtl_async_mutex); mutex_enter(&ibtl_clnt_list_mutex); if (--hca_devp->hd_async_task_cnt == 0) cv_signal(&hca_devp->hd_async_task_cv); if (--clntp->clnt_async_cnt == 0) cv_broadcast(&ibtl_clnt_cv); mutex_exit(&ibtl_clnt_list_mutex); } /* * Asyncs for HCA level events. * * The function continues to run until there are no more async * events/errors for this HCA. An event is chosen for dispatch * to all clients of this HCA. This thread dispatches them via * the ibtl_async_taskq, then sleeps until all tasks are done. * * This thread records the async_code and async_event in the * ibtl_hca_devinfo_t for all client taskq threads to reference. * * This is called from an async or taskq thread with ibtl_async_mutex held. */ static void ibtl_do_hca_asyncs(ibtl_hca_devinfo_t *hca_devp) { ibtl_hca_t *ibt_hca; ibt_async_code_t code; ibtl_async_port_status_t temp; uint8_t nports; uint8_t port_minus1; ibtl_async_port_status_t *portp; mutex_exit(&ibtl_async_mutex); mutex_enter(&ibtl_clnt_list_mutex); while (hca_devp->hd_async_busy) cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex); hca_devp->hd_async_busy = 1; mutex_enter(&ibtl_async_mutex); bzero(&hca_devp->hd_async_event, sizeof (hca_devp->hd_async_event)); for (;;) { hca_devp->hd_async_event.ev_fma_ena = 0; code = hca_devp->hd_async_codes; if (code & IBT_ERROR_LOCAL_CATASTROPHIC) { code = IBT_ERROR_LOCAL_CATASTROPHIC; hca_devp->hd_async_event.ev_fma_ena = hca_devp->hd_fma_ena; } else if (code & IBT_ERROR_PORT_DOWN) code = IBT_ERROR_PORT_DOWN; else if (code & IBT_EVENT_PORT_UP) code = IBT_EVENT_PORT_UP; else { hca_devp->hd_async_codes = 0; code = 0; } if (code == 0) { hca_devp->hd_async_flags &= ~IBTL_ASYNC_PENDING; break; } hca_devp->hd_async_codes &= ~code; if ((code == IBT_EVENT_PORT_UP) || (code == IBT_ERROR_PORT_DOWN)) { /* PORT_UP or PORT_DOWN */ portp = hca_devp->hd_async_port; nports = hca_devp->hd_hca_attr->hca_nports; for (port_minus1 = 0; port_minus1 < nports; port_minus1++) { temp = ((code == IBT_EVENT_PORT_UP) ? IBTL_HCA_PORT_UP : IBTL_HCA_PORT_DOWN) | IBTL_HCA_PORT_CHANGED; if (portp[port_minus1] == temp) break; } if (port_minus1 >= nports) { /* we checked again, but found nothing */ continue; } IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_do_hca_asyncs: " "async: port# %x code %x", port_minus1 + 1, code); /* mark it to check for other ports after we're done */ hca_devp->hd_async_codes |= code; hca_devp->hd_async_event.ev_port = port_minus1 + 1; hca_devp->hd_async_port[port_minus1] &= ~IBTL_HCA_PORT_CHANGED; mutex_exit(&ibtl_async_mutex); ibtl_reinit_hca_portinfo(hca_devp, port_minus1 + 1); mutex_enter(&ibtl_async_mutex); } hca_devp->hd_async_code = code; hca_devp->hd_async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid; mutex_exit(&ibtl_async_mutex); /* * Make sure to inform CM, DM, and IBMA if we know of them. * Also, make sure not to inform them a second time, which * would occur if they have the HCA open. */ if (ibtl_ibma_async_handler) ibtl_tell_mgr(hca_devp, ibtl_ibma_async_handler, ibtl_ibma_clnt_private); /* wait for all tasks to complete */ while (hca_devp->hd_async_task_cnt != 0) cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); if (ibtl_dm_async_handler) ibtl_tell_mgr(hca_devp, ibtl_dm_async_handler, ibtl_dm_clnt_private); if (ibtl_cm_async_handler) ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler, ibtl_cm_clnt_private); /* wait for all tasks to complete */ while (hca_devp->hd_async_task_cnt != 0) cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); for (ibt_hca = hca_devp->hd_clnt_list; ibt_hca != NULL; ibt_hca = ibt_hca->ha_clnt_link) { /* Managers are handled above */ if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler == ibtl_cm_async_handler) continue; if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler == ibtl_dm_async_handler) continue; if (IBTL_HCA2MODI_P(ibt_hca)->mi_async_handler == ibtl_ibma_async_handler) continue; ++ibt_hca->ha_clnt_devp->clnt_async_cnt; mutex_enter(&ibtl_async_mutex); ibt_hca->ha_async_cnt++; mutex_exit(&ibtl_async_mutex); hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP); } /* wait for all tasks to complete */ while (hca_devp->hd_async_task_cnt != 0) cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); mutex_enter(&ibtl_async_mutex); } hca_devp->hd_async_code = 0; hca_devp->hd_async_busy = 0; cv_broadcast(&hca_devp->hd_async_busy_cv); mutex_exit(&ibtl_clnt_list_mutex); } /* * Asyncs for QP objects. * * The function continues to run until there are no more async * events/errors for this object. */ static void ibtl_do_qp_asyncs(ibtl_qp_t *ibtl_qp) { ibt_async_code_t code; ibt_async_event_t async_event; ASSERT(MUTEX_HELD(&ibtl_async_mutex)); bzero(&async_event, sizeof (async_event)); async_event.ev_chan_hdl = IBTL_QP2CHAN(ibtl_qp); while ((code = ibtl_qp->qp_async_codes) != 0) { async_event.ev_fma_ena = 0; if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) code = 0; /* fallthrough to "kmem_free" */ else if (code & IBT_ERROR_CATASTROPHIC_QP) { code = IBT_ERROR_CATASTROPHIC_QP; async_event.ev_fma_ena = ibtl_qp->qp_cat_fma_ena; } else if (code & IBT_ERROR_INVALID_REQUEST_QP) { code = IBT_ERROR_INVALID_REQUEST_QP; async_event.ev_fma_ena = ibtl_qp->qp_inv_fma_ena; } else if (code & IBT_ERROR_ACCESS_VIOLATION_QP) { code = IBT_ERROR_ACCESS_VIOLATION_QP; async_event.ev_fma_ena = ibtl_qp->qp_acc_fma_ena; } else if (code & IBT_ERROR_PATH_MIGRATE_REQ_QP) { code = IBT_ERROR_PATH_MIGRATE_REQ_QP; async_event.ev_fma_ena = ibtl_qp->qp_pth_fma_ena; } else if (code & IBT_EVENT_PATH_MIGRATED_QP) code = IBT_EVENT_PATH_MIGRATED_QP; else if (code & IBT_EVENT_SQD) code = IBT_EVENT_SQD; else if (code & IBT_EVENT_COM_EST_QP) code = IBT_EVENT_COM_EST_QP; else if (code & IBT_EVENT_EMPTY_QP) code = IBT_EVENT_EMPTY_QP; else { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_qp_asyncs: " "async: unexpected QP async code 0x%x", code); ibtl_qp->qp_async_codes = 0; code = 0; } ibtl_qp->qp_async_codes &= ~code; if (code) { mutex_exit(&ibtl_async_mutex); ibtl_async_client_call(ibtl_qp->qp_hca, code, &async_event); mutex_enter(&ibtl_async_mutex); } if (ibtl_qp->qp_async_flags & IBTL_ASYNC_FREE_OBJECT) { mutex_exit(&ibtl_async_mutex); cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv); mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex); kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t)); mutex_enter(&ibtl_async_mutex); return; } } ibtl_qp->qp_async_flags &= ~IBTL_ASYNC_PENDING; } /* * Asyncs for SRQ objects. * * The function continues to run until there are no more async * events/errors for this object. */ static void ibtl_do_srq_asyncs(ibtl_srq_t *ibtl_srq) { ibt_async_code_t code; ibt_async_event_t async_event; ASSERT(MUTEX_HELD(&ibtl_async_mutex)); bzero(&async_event, sizeof (async_event)); async_event.ev_srq_hdl = ibtl_srq; async_event.ev_fma_ena = ibtl_srq->srq_fma_ena; while ((code = ibtl_srq->srq_async_codes) != 0) { if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) code = 0; /* fallthrough to "kmem_free" */ else if (code & IBT_ERROR_CATASTROPHIC_SRQ) code = IBT_ERROR_CATASTROPHIC_SRQ; else if (code & IBT_EVENT_LIMIT_REACHED_SRQ) code = IBT_EVENT_LIMIT_REACHED_SRQ; else { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_srq_asyncs: " "async: unexpected SRQ async code 0x%x", code); ibtl_srq->srq_async_codes = 0; code = 0; } ibtl_srq->srq_async_codes &= ~code; if (code) { mutex_exit(&ibtl_async_mutex); ibtl_async_client_call(ibtl_srq->srq_hca, code, &async_event); mutex_enter(&ibtl_async_mutex); } if (ibtl_srq->srq_async_flags & IBTL_ASYNC_FREE_OBJECT) { mutex_exit(&ibtl_async_mutex); kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s)); mutex_enter(&ibtl_async_mutex); return; } } ibtl_srq->srq_async_flags &= ~IBTL_ASYNC_PENDING; } /* * Asyncs for CQ objects. * * The function continues to run until there are no more async * events/errors for this object. */ static void ibtl_do_cq_asyncs(ibtl_cq_t *ibtl_cq) { ibt_async_code_t code; ibt_async_event_t async_event; ASSERT(MUTEX_HELD(&ibtl_async_mutex)); bzero(&async_event, sizeof (async_event)); async_event.ev_cq_hdl = ibtl_cq; async_event.ev_fma_ena = ibtl_cq->cq_fma_ena; while ((code = ibtl_cq->cq_async_codes) != 0) { if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) code = 0; /* fallthrough to "kmem_free" */ else if (code & IBT_ERROR_CQ) code = IBT_ERROR_CQ; else { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_cq_asyncs: " "async: unexpected CQ async code 0x%x", code); ibtl_cq->cq_async_codes = 0; code = 0; } ibtl_cq->cq_async_codes &= ~code; if (code) { mutex_exit(&ibtl_async_mutex); ibtl_async_client_call(ibtl_cq->cq_hca, code, &async_event); mutex_enter(&ibtl_async_mutex); } if (ibtl_cq->cq_async_flags & IBTL_ASYNC_FREE_OBJECT) { mutex_exit(&ibtl_async_mutex); mutex_destroy(&ibtl_cq->cq_mutex); kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s)); mutex_enter(&ibtl_async_mutex); return; } } ibtl_cq->cq_async_flags &= ~IBTL_ASYNC_PENDING; } /* * Asyncs for EEC objects. * * The function continues to run until there are no more async * events/errors for this object. */ static void ibtl_do_eec_asyncs(ibtl_eec_t *ibtl_eec) { ibt_async_code_t code; ibt_async_event_t async_event; ASSERT(MUTEX_HELD(&ibtl_async_mutex)); bzero(&async_event, sizeof (async_event)); async_event.ev_chan_hdl = ibtl_eec->eec_channel; while ((code = ibtl_eec->eec_async_codes) != 0) { async_event.ev_fma_ena = 0; if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) code = 0; /* fallthrough to "kmem_free" */ else if (code & IBT_ERROR_CATASTROPHIC_EEC) { code = IBT_ERROR_CATASTROPHIC_CHAN; async_event.ev_fma_ena = ibtl_eec->eec_cat_fma_ena; } else if (code & IBT_ERROR_PATH_MIGRATE_REQ_EEC) { code = IBT_ERROR_PATH_MIGRATE_REQ; async_event.ev_fma_ena = ibtl_eec->eec_pth_fma_ena; } else if (code & IBT_EVENT_PATH_MIGRATED_EEC) code = IBT_EVENT_PATH_MIGRATED; else if (code & IBT_EVENT_COM_EST_EEC) code = IBT_EVENT_COM_EST; else { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_do_eec_asyncs: " "async: unexpected code 0x%x", code); ibtl_eec->eec_async_codes = 0; code = 0; } ibtl_eec->eec_async_codes &= ~code; if (code) { mutex_exit(&ibtl_async_mutex); ibtl_async_client_call(ibtl_eec->eec_hca, code, &async_event); mutex_enter(&ibtl_async_mutex); } if (ibtl_eec->eec_async_flags & IBTL_ASYNC_FREE_OBJECT) { mutex_exit(&ibtl_async_mutex); kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s)); mutex_enter(&ibtl_async_mutex); return; } } ibtl_eec->eec_async_flags &= ~IBTL_ASYNC_PENDING; } #ifdef __lock_lint kmutex_t cpr_mutex; #endif /* * Loop forever, calling async_handlers until all of the async lists * are empty. */ static void ibtl_async_thread(void) { #ifndef __lock_lint kmutex_t cpr_mutex; #endif callb_cpr_t cprinfo; _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo)) _NOTE(NO_COMPETING_THREADS_NOW) mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL); CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr, "ibtl_async_thread"); #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif mutex_enter(&ibtl_async_mutex); for (;;) { if (ibtl_async_hca_list_start) { ibtl_hca_devinfo_t *hca_devp; /* remove first entry from list */ hca_devp = ibtl_async_hca_list_start; ibtl_async_hca_list_start = hca_devp->hd_async_link; hca_devp->hd_async_link = NULL; if (ibtl_async_hca_list_start == NULL) ibtl_async_hca_list_end = NULL; ibtl_do_hca_asyncs(hca_devp); } else if (ibtl_async_qp_list_start) { ibtl_qp_t *ibtl_qp; /* remove from list */ ibtl_qp = ibtl_async_qp_list_start; ibtl_async_qp_list_start = ibtl_qp->qp_async_link; ibtl_qp->qp_async_link = NULL; if (ibtl_async_qp_list_start == NULL) ibtl_async_qp_list_end = NULL; ibtl_do_qp_asyncs(ibtl_qp); } else if (ibtl_async_srq_list_start) { ibtl_srq_t *ibtl_srq; /* remove from list */ ibtl_srq = ibtl_async_srq_list_start; ibtl_async_srq_list_start = ibtl_srq->srq_async_link; ibtl_srq->srq_async_link = NULL; if (ibtl_async_srq_list_start == NULL) ibtl_async_srq_list_end = NULL; ibtl_do_srq_asyncs(ibtl_srq); } else if (ibtl_async_eec_list_start) { ibtl_eec_t *ibtl_eec; /* remove from list */ ibtl_eec = ibtl_async_eec_list_start; ibtl_async_eec_list_start = ibtl_eec->eec_async_link; ibtl_eec->eec_async_link = NULL; if (ibtl_async_eec_list_start == NULL) ibtl_async_eec_list_end = NULL; ibtl_do_eec_asyncs(ibtl_eec); } else if (ibtl_async_cq_list_start) { ibtl_cq_t *ibtl_cq; /* remove from list */ ibtl_cq = ibtl_async_cq_list_start; ibtl_async_cq_list_start = ibtl_cq->cq_async_link; ibtl_cq->cq_async_link = NULL; if (ibtl_async_cq_list_start == NULL) ibtl_async_cq_list_end = NULL; ibtl_do_cq_asyncs(ibtl_cq); } else { if (ibtl_async_thread_exit == IBTL_THREAD_EXIT) break; mutex_enter(&cpr_mutex); CALLB_CPR_SAFE_BEGIN(&cprinfo); mutex_exit(&cpr_mutex); cv_wait(&ibtl_async_cv, &ibtl_async_mutex); mutex_exit(&ibtl_async_mutex); mutex_enter(&cpr_mutex); CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex); mutex_exit(&cpr_mutex); mutex_enter(&ibtl_async_mutex); } } mutex_exit(&ibtl_async_mutex); #ifndef __lock_lint mutex_enter(&cpr_mutex); CALLB_CPR_EXIT(&cprinfo); #endif mutex_destroy(&cpr_mutex); } void ibtl_free_qp_async_check(ibtl_qp_t *ibtl_qp) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_qp_async_check(%p)", ibtl_qp); mutex_enter(&ibtl_async_mutex); /* * If there is an active async, mark this object to be freed * by the async_thread when it's done. */ if (ibtl_qp->qp_async_flags & IBTL_ASYNC_PENDING) { ibtl_qp->qp_async_flags |= IBTL_ASYNC_FREE_OBJECT; mutex_exit(&ibtl_async_mutex); } else { /* free the object now */ mutex_exit(&ibtl_async_mutex); cv_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_cv); mutex_destroy(&(IBTL_QP2CHAN(ibtl_qp))->ch_cm_mutex); kmem_free(IBTL_QP2CHAN(ibtl_qp), sizeof (ibtl_channel_t)); } } void ibtl_free_cq_async_check(ibtl_cq_t *ibtl_cq) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_cq_async_check(%p)", ibtl_cq); mutex_enter(&ibtl_async_mutex); /* if there is an active async, mark this object to be freed */ if (ibtl_cq->cq_async_flags & IBTL_ASYNC_PENDING) { ibtl_cq->cq_async_flags |= IBTL_ASYNC_FREE_OBJECT; mutex_exit(&ibtl_async_mutex); } else { /* free the object now */ mutex_exit(&ibtl_async_mutex); mutex_destroy(&ibtl_cq->cq_mutex); kmem_free(ibtl_cq, sizeof (struct ibtl_cq_s)); } } void ibtl_free_srq_async_check(ibtl_srq_t *ibtl_srq) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_srq_async_check(%p)", ibtl_srq); mutex_enter(&ibtl_async_mutex); /* if there is an active async, mark this object to be freed */ if (ibtl_srq->srq_async_flags & IBTL_ASYNC_PENDING) { ibtl_srq->srq_async_flags |= IBTL_ASYNC_FREE_OBJECT; mutex_exit(&ibtl_async_mutex); } else { /* free the object now */ mutex_exit(&ibtl_async_mutex); kmem_free(ibtl_srq, sizeof (struct ibtl_srq_s)); } } void ibtl_free_eec_async_check(ibtl_eec_t *ibtl_eec) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_eec_async_check(%p)", ibtl_eec); mutex_enter(&ibtl_async_mutex); /* if there is an active async, mark this object to be freed */ if (ibtl_eec->eec_async_flags & IBTL_ASYNC_PENDING) { ibtl_eec->eec_async_flags |= IBTL_ASYNC_FREE_OBJECT; mutex_exit(&ibtl_async_mutex); } else { /* free the object now */ mutex_exit(&ibtl_async_mutex); kmem_free(ibtl_eec, sizeof (struct ibtl_eec_s)); } } /* * This function differs from above in that we assume this is called * from non-interrupt context, and never called from the async_thread. */ void ibtl_free_hca_async_check(ibtl_hca_t *ibt_hca) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_hca_async_check(%p)", ibt_hca); mutex_enter(&ibtl_async_mutex); /* if there is an active async, mark this object to be freed */ if (ibt_hca->ha_async_cnt > 0) { ibt_hca->ha_async_flags |= IBTL_ASYNC_FREE_OBJECT; mutex_exit(&ibtl_async_mutex); } else { /* free the object now */ mutex_exit(&ibtl_async_mutex); kmem_free(ibt_hca, sizeof (ibtl_hca_t)); } } /* * Completion Queue Handling. * * A completion queue can be handled through a simple callback * at interrupt level, or it may be queued for an ibtl_cq_thread * to handle. The latter is chosen during ibt_alloc_cq when the * IBTF_CQ_HANDLER_IN_THREAD is specified. */ static void ibtl_cq_handler_call(ibtl_cq_t *ibtl_cq) { ibt_cq_handler_t cq_handler; void *arg; IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_cq_handler_call(%p)", ibtl_cq); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq)) cq_handler = ibtl_cq->cq_comp_handler; arg = ibtl_cq->cq_arg; if (cq_handler != NULL) cq_handler(ibtl_cq, arg); else IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_cq_handler_call: " "no cq_handler for cq %p", ibtl_cq); } /* * Before ibt_free_cq can continue, we need to ensure no more cq_handler * callbacks can occur. When we get the mutex, we know there are no * outstanding cq_handler callbacks. We set the cq_handler to NULL to * prohibit future callbacks. */ void ibtl_free_cq_check(ibtl_cq_t *ibtl_cq) { mutex_enter(&ibtl_cq->cq_mutex); ibtl_cq->cq_comp_handler = NULL; mutex_exit(&ibtl_cq->cq_mutex); if (ibtl_cq->cq_in_thread) { mutex_enter(&ibtl_cq_mutex); --ibtl_cqs_using_threads; while (ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) { ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT; ibtl_cq->cq_impl_flags |= IBTL_CQ_FREE; cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex); } mutex_exit(&ibtl_cq_mutex); } } /* * Loop forever, calling cq_handlers until the cq list * is empty. */ static void ibtl_cq_thread(void) { #ifndef __lock_lint kmutex_t cpr_mutex; #endif callb_cpr_t cprinfo; _NOTE(MUTEX_PROTECTS_DATA(cpr_mutex, cprinfo)) _NOTE(NO_COMPETING_THREADS_NOW) mutex_init(&cpr_mutex, NULL, MUTEX_DRIVER, NULL); CALLB_CPR_INIT(&cprinfo, &cpr_mutex, callb_generic_cpr, "ibtl_cq_thread"); #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif mutex_enter(&ibtl_cq_mutex); for (;;) { if (ibtl_cq_list_start) { ibtl_cq_t *ibtl_cq; ibtl_cq = ibtl_cq_list_start; ibtl_cq_list_start = ibtl_cq->cq_link; ibtl_cq->cq_link = NULL; if (ibtl_cq == ibtl_cq_list_end) ibtl_cq_list_end = NULL; while (ibtl_cq->cq_impl_flags & IBTL_CQ_CALL_CLIENT) { ibtl_cq->cq_impl_flags &= ~IBTL_CQ_CALL_CLIENT; mutex_exit(&ibtl_cq_mutex); ibtl_cq_handler_call(ibtl_cq); mutex_enter(&ibtl_cq_mutex); } ibtl_cq->cq_impl_flags &= ~IBTL_CQ_PENDING; if (ibtl_cq->cq_impl_flags & IBTL_CQ_FREE) cv_broadcast(&ibtl_cq_cv); } else { if (ibtl_cq_thread_exit == IBTL_THREAD_EXIT) break; mutex_enter(&cpr_mutex); CALLB_CPR_SAFE_BEGIN(&cprinfo); mutex_exit(&cpr_mutex); cv_wait(&ibtl_cq_cv, &ibtl_cq_mutex); mutex_exit(&ibtl_cq_mutex); mutex_enter(&cpr_mutex); CALLB_CPR_SAFE_END(&cprinfo, &cpr_mutex); mutex_exit(&cpr_mutex); mutex_enter(&ibtl_cq_mutex); } } mutex_exit(&ibtl_cq_mutex); #ifndef __lock_lint mutex_enter(&cpr_mutex); CALLB_CPR_EXIT(&cprinfo); #endif mutex_destroy(&cpr_mutex); } /* * ibc_cq_handler() * * Completion Queue Notification Handler. * */ /*ARGSUSED*/ void ibc_cq_handler(ibc_clnt_hdl_t ibc_hdl, ibt_cq_hdl_t ibtl_cq) { IBTF_DPRINTF_L4(ibtf_handlers, "ibc_cq_handler(%p, %p)", ibc_hdl, ibtl_cq); if (ibtl_cq->cq_in_thread) { mutex_enter(&ibtl_cq_mutex); ibtl_cq->cq_impl_flags |= IBTL_CQ_CALL_CLIENT; if ((ibtl_cq->cq_impl_flags & IBTL_CQ_PENDING) == 0) { ibtl_cq->cq_impl_flags |= IBTL_CQ_PENDING; ibtl_cq->cq_link = NULL; if (ibtl_cq_list_end == NULL) ibtl_cq_list_start = ibtl_cq; else ibtl_cq_list_end->cq_link = ibtl_cq; ibtl_cq_list_end = ibtl_cq; cv_signal(&ibtl_cq_cv); } mutex_exit(&ibtl_cq_mutex); return; } else ibtl_cq_handler_call(ibtl_cq); } /* * ibt_enable_cq_notify() * Enable Notification requests on the specified CQ. * * ibt_cq The CQ handle. * * notify_type Enable notifications for all (IBT_NEXT_COMPLETION) * completions, or the next Solicited completion * (IBT_NEXT_SOLICITED) only. * * Completion notifications are disabled by setting the completion * handler to NULL by calling ibt_set_cq_handler(). */ ibt_status_t ibt_enable_cq_notify(ibt_cq_hdl_t ibtl_cq, ibt_cq_notify_flags_t notify_type) { IBTF_DPRINTF_L3(ibtf_handlers, "ibt_enable_cq_notify(%p, %d)", ibtl_cq, notify_type); return (IBTL_CQ2CIHCAOPS_P(ibtl_cq)->ibc_notify_cq( IBTL_CQ2CIHCA(ibtl_cq), ibtl_cq->cq_ibc_cq_hdl, notify_type)); } /* * ibt_set_cq_handler() * Register a work request completion handler with the IBTF. * * ibt_cq The CQ handle. * * completion_handler The completion handler. * * arg The IBTF client private argument to be passed * back to the client when calling the CQ * completion handler. * * Completion notifications are disabled by setting the completion * handler to NULL. When setting the handler to NULL, no additional * calls to the previous CQ handler will be initiated, but there may * be one in progress. * * This function does not otherwise change the state of previous * calls to ibt_enable_cq_notify(). */ void ibt_set_cq_handler(ibt_cq_hdl_t ibtl_cq, ibt_cq_handler_t completion_handler, void *arg) { IBTF_DPRINTF_L3(ibtf_handlers, "ibt_set_cq_handler(%p, %p, %p)", ibtl_cq, completion_handler, arg); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ibtl_cq)) ibtl_cq->cq_comp_handler = completion_handler; ibtl_cq->cq_arg = arg; } /* * Inform IBT clients about New HCAs. * * We use taskqs to allow simultaneous notification, with sleeping. * Since taskqs only allow one argument, we define a structure * because we need to pass in two arguments. */ struct ibtl_new_hca_s { ibtl_clnt_t *nh_clntp; ibtl_hca_devinfo_t *nh_hca_devp; ibt_async_code_t nh_code; }; static void ibtl_tell_client_about_new_hca(void *arg) { struct ibtl_new_hca_s *new_hcap = (struct ibtl_new_hca_s *)arg; ibtl_clnt_t *clntp = new_hcap->nh_clntp; ibt_async_event_t async_event; ibtl_hca_devinfo_t *hca_devp = new_hcap->nh_hca_devp; bzero(&async_event, sizeof (async_event)); async_event.ev_hca_guid = hca_devp->hd_hca_attr->hca_node_guid; clntp->clnt_modinfop->mi_async_handler( clntp->clnt_private, NULL, new_hcap->nh_code, &async_event); kmem_free(new_hcap, sizeof (*new_hcap)); #ifdef __lock_lint { ibt_hca_hdl_t hca_hdl; (void) ibt_open_hca(clntp, 0ULL, &hca_hdl); } #endif mutex_enter(&ibtl_clnt_list_mutex); if (--hca_devp->hd_async_task_cnt == 0) cv_signal(&hca_devp->hd_async_task_cv); if (--clntp->clnt_async_cnt == 0) cv_broadcast(&ibtl_clnt_cv); mutex_exit(&ibtl_clnt_list_mutex); } /* * ibtl_announce_new_hca: * * o First attach these clients in the given order * IBMA * IBCM * * o Next attach all other clients in parallel. * * NOTE: Use the taskq to simultaneously notify all clients of the new HCA. * Retval from clients is ignored. */ void ibtl_announce_new_hca(ibtl_hca_devinfo_t *hca_devp) { ibtl_clnt_t *clntp; struct ibtl_new_hca_s *new_hcap; IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_announce_new_hca(%p, %llX)", hca_devp, hca_devp->hd_hca_attr->hca_node_guid); mutex_enter(&ibtl_clnt_list_mutex); clntp = ibtl_clnt_list; while (clntp != NULL) { if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) { IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: calling IBMF"); if (clntp->clnt_modinfop->mi_async_handler) { _NOTE(NO_COMPETING_THREADS_NOW) new_hcap = kmem_alloc(sizeof (*new_hcap), KM_SLEEP); new_hcap->nh_clntp = clntp; new_hcap->nh_hca_devp = hca_devp; new_hcap->nh_code = IBT_HCA_ATTACH_EVENT; #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif clntp->clnt_async_cnt++; hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_tell_client_about_new_hca, new_hcap, TQ_SLEEP); } break; } clntp = clntp->clnt_list_link; } if (clntp != NULL) while (clntp->clnt_async_cnt > 0) cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex); clntp = ibtl_clnt_list; while (clntp != NULL) { if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) { IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: " "calling %s", clntp->clnt_modinfop->mi_clnt_name); if (clntp->clnt_modinfop->mi_async_handler) { _NOTE(NO_COMPETING_THREADS_NOW) new_hcap = kmem_alloc(sizeof (*new_hcap), KM_SLEEP); new_hcap->nh_clntp = clntp; new_hcap->nh_hca_devp = hca_devp; new_hcap->nh_code = IBT_HCA_ATTACH_EVENT; #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif clntp->clnt_async_cnt++; hca_devp->hd_async_task_cnt++; mutex_exit(&ibtl_clnt_list_mutex); (void) ibtl_tell_client_about_new_hca( new_hcap); mutex_enter(&ibtl_clnt_list_mutex); } break; } clntp = clntp->clnt_list_link; } clntp = ibtl_clnt_list; while (clntp != NULL) { if (clntp->clnt_modinfop->mi_clnt_class == IBT_CM) { IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: " "calling %s", clntp->clnt_modinfop->mi_clnt_name); if (clntp->clnt_modinfop->mi_async_handler) { _NOTE(NO_COMPETING_THREADS_NOW) new_hcap = kmem_alloc(sizeof (*new_hcap), KM_SLEEP); new_hcap->nh_clntp = clntp; new_hcap->nh_hca_devp = hca_devp; new_hcap->nh_code = IBT_HCA_ATTACH_EVENT; #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif clntp->clnt_async_cnt++; hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_tell_client_about_new_hca, new_hcap, TQ_SLEEP); } break; } clntp = clntp->clnt_list_link; } if (clntp != NULL) while (clntp->clnt_async_cnt > 0) cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex); clntp = ibtl_clnt_list; while (clntp != NULL) { if ((clntp->clnt_modinfop->mi_clnt_class != IBT_DM) && (clntp->clnt_modinfop->mi_clnt_class != IBT_CM) && (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA)) { IBTF_DPRINTF_L4(ibtf_handlers, "ibtl_announce_new_hca: Calling %s ", clntp->clnt_modinfop->mi_clnt_name); if (clntp->clnt_modinfop->mi_async_handler) { _NOTE(NO_COMPETING_THREADS_NOW) new_hcap = kmem_alloc(sizeof (*new_hcap), KM_SLEEP); new_hcap->nh_clntp = clntp; new_hcap->nh_hca_devp = hca_devp; new_hcap->nh_code = IBT_HCA_ATTACH_EVENT; #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif clntp->clnt_async_cnt++; hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_tell_client_about_new_hca, new_hcap, TQ_SLEEP); } } clntp = clntp->clnt_list_link; } /* wait for all tasks to complete */ while (hca_devp->hd_async_task_cnt != 0) cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); /* wakeup thread that may be waiting to send an HCA async */ ASSERT(hca_devp->hd_async_busy == 1); hca_devp->hd_async_busy = 0; cv_broadcast(&hca_devp->hd_async_busy_cv); mutex_exit(&ibtl_clnt_list_mutex); } /* * ibtl_detach_all_clients: * * Return value - 0 for Success, 1 for Failure * * o First detach general clients. * * o Next detach these clients * IBCM * IBDM * * o Finally, detach this client * IBMA */ int ibtl_detach_all_clients(ibtl_hca_devinfo_t *hca_devp) { ib_guid_t hcaguid = hca_devp->hd_hca_attr->hca_node_guid; ibtl_hca_t *ibt_hca; ibtl_clnt_t *clntp; int retval; IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients(%llX)", hcaguid); ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex)); while (hca_devp->hd_async_busy) cv_wait(&hca_devp->hd_async_busy_cv, &ibtl_clnt_list_mutex); hca_devp->hd_async_busy = 1; /* First inform general clients asynchronously */ hca_devp->hd_async_event.ev_hca_guid = hcaguid; hca_devp->hd_async_event.ev_fma_ena = 0; hca_devp->hd_async_event.ev_chan_hdl = NULL; hca_devp->hd_async_event.ev_cq_hdl = NULL; hca_devp->hd_async_code = IBT_HCA_DETACH_EVENT; ibt_hca = hca_devp->hd_clnt_list; while (ibt_hca != NULL) { clntp = ibt_hca->ha_clnt_devp; if (IBTL_GENERIC_CLIENT(clntp)) { ++ibt_hca->ha_clnt_devp->clnt_async_cnt; mutex_enter(&ibtl_async_mutex); ibt_hca->ha_async_cnt++; mutex_exit(&ibtl_async_mutex); hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP); } ibt_hca = ibt_hca->ha_clnt_link; } /* wait for all clients to complete */ while (hca_devp->hd_async_task_cnt != 0) { cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); } /* Go thru the clients and check if any have not closed this HCA. */ retval = 0; ibt_hca = hca_devp->hd_clnt_list; while (ibt_hca != NULL) { clntp = ibt_hca->ha_clnt_devp; if (IBTL_GENERIC_CLIENT(clntp)) { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients: " "client '%s' failed to close the HCA.", ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name); retval = 1; } ibt_hca = ibt_hca->ha_clnt_link; } if (retval == 1) goto bailout; /* Next inform IBDM asynchronously */ ibt_hca = hca_devp->hd_clnt_list; while (ibt_hca != NULL) { clntp = ibt_hca->ha_clnt_devp; if (clntp->clnt_modinfop->mi_clnt_class == IBT_DM) { ++ibt_hca->ha_clnt_devp->clnt_async_cnt; mutex_enter(&ibtl_async_mutex); ibt_hca->ha_async_cnt++; mutex_exit(&ibtl_async_mutex); hca_devp->hd_async_task_cnt++; mutex_exit(&ibtl_clnt_list_mutex); ibtl_hca_client_async_task(ibt_hca); mutex_enter(&ibtl_clnt_list_mutex); break; } ibt_hca = ibt_hca->ha_clnt_link; } /* * Next inform IBCM. * As IBCM doesn't perform ibt_open_hca(), IBCM will not be * accessible via hca_devp->hd_clnt_list. * ibtl_cm_async_handler will NOT be NULL, if IBCM is registered. */ if (ibtl_cm_async_handler) { ibtl_tell_mgr(hca_devp, ibtl_cm_async_handler, ibtl_cm_clnt_private); /* wait for all tasks to complete */ while (hca_devp->hd_async_task_cnt != 0) cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); } /* Go thru the clients and check if any have not closed this HCA. */ retval = 0; ibt_hca = hca_devp->hd_clnt_list; while (ibt_hca != NULL) { clntp = ibt_hca->ha_clnt_devp; if (clntp->clnt_modinfop->mi_clnt_class != IBT_IBMA) { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients: " "client '%s' failed to close the HCA.", ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name); retval = 1; } ibt_hca = ibt_hca->ha_clnt_link; } if (retval == 1) goto bailout; /* Finally, inform IBMA */ ibt_hca = hca_devp->hd_clnt_list; while (ibt_hca != NULL) { clntp = ibt_hca->ha_clnt_devp; if (clntp->clnt_modinfop->mi_clnt_class == IBT_IBMA) { ++ibt_hca->ha_clnt_devp->clnt_async_cnt; mutex_enter(&ibtl_async_mutex); ibt_hca->ha_async_cnt++; mutex_exit(&ibtl_async_mutex); hca_devp->hd_async_task_cnt++; (void) taskq_dispatch(ibtl_async_taskq, ibtl_hca_client_async_task, ibt_hca, TQ_SLEEP); } else IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients: " "client '%s' is unexpectedly on the client list", ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name); ibt_hca = ibt_hca->ha_clnt_link; } /* wait for IBMA to complete */ while (hca_devp->hd_async_task_cnt != 0) { cv_wait(&hca_devp->hd_async_task_cv, &ibtl_clnt_list_mutex); } /* Check if this HCA's client list is empty. */ ibt_hca = hca_devp->hd_clnt_list; if (ibt_hca != NULL) { IBTF_DPRINTF_L2(ibtf_handlers, "ibtl_detach_all_clients: " "client '%s' failed to close the HCA.", ibt_hca->ha_clnt_devp->clnt_modinfop->mi_clnt_name); retval = 1; } else retval = 0; bailout: if (retval) { hca_devp->hd_state = IBTL_HCA_DEV_ATTACHED; /* fix hd_state */ mutex_exit(&ibtl_clnt_list_mutex); ibtl_announce_new_hca(hca_devp); mutex_enter(&ibtl_clnt_list_mutex); } else { hca_devp->hd_async_busy = 0; cv_broadcast(&hca_devp->hd_async_busy_cv); } return (retval); } void ibtl_free_clnt_async_check(ibtl_clnt_t *clntp) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_free_clnt_async_check(%p)", clntp); ASSERT(MUTEX_HELD(&ibtl_clnt_list_mutex)); /* wait for all asyncs based on "ibtl_clnt_list" to complete */ while (clntp->clnt_async_cnt != 0) { cv_wait(&ibtl_clnt_cv, &ibtl_clnt_list_mutex); } } static void ibtl_dec_clnt_async_cnt(ibtl_clnt_t *clntp) { mutex_enter(&ibtl_clnt_list_mutex); if (--clntp->clnt_async_cnt == 0) { cv_broadcast(&ibtl_clnt_cv); } mutex_exit(&ibtl_clnt_list_mutex); } static void ibtl_inc_clnt_async_cnt(ibtl_clnt_t *clntp) { mutex_enter(&ibtl_clnt_list_mutex); ++clntp->clnt_async_cnt; mutex_exit(&ibtl_clnt_list_mutex); } /* * Functions and data structures to inform clients that a notification * has occurred about Multicast Groups that might interest them. */ struct ibtl_sm_notice { ibt_clnt_hdl_t np_ibt_hdl; ib_gid_t np_sgid; ibt_subnet_event_code_t np_code; ibt_subnet_event_t np_event; }; static void ibtl_sm_notice_task(void *arg) { struct ibtl_sm_notice *noticep = (struct ibtl_sm_notice *)arg; ibt_clnt_hdl_t ibt_hdl = noticep->np_ibt_hdl; ibt_sm_notice_handler_t sm_notice_handler; sm_notice_handler = ibt_hdl->clnt_sm_trap_handler; if (sm_notice_handler != NULL) sm_notice_handler(ibt_hdl->clnt_sm_trap_handler_arg, noticep->np_sgid, noticep->np_code, ¬icep->np_event); kmem_free(noticep, sizeof (*noticep)); ibtl_dec_clnt_async_cnt(ibt_hdl); } /* * Inform the client that MCG notices are not working at this time. */ void ibtl_cm_sm_notice_init_failure(ibtl_cm_sm_init_fail_t *ifail) { ibt_clnt_hdl_t ibt_hdl = ifail->smf_ibt_hdl; struct ibtl_sm_notice *noticep; ib_gid_t *sgidp = &ifail->smf_sgid[0]; int i; for (i = 0; i < ifail->smf_num_sgids; i++) { _NOTE(NO_COMPETING_THREADS_NOW) noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP); noticep->np_ibt_hdl = ibt_hdl; noticep->np_sgid = *sgidp++; noticep->np_code = IBT_SM_EVENT_UNAVAILABLE; #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif ibtl_inc_clnt_async_cnt(ibt_hdl); (void) taskq_dispatch(ibtl_async_taskq, ibtl_sm_notice_task, noticep, TQ_SLEEP); } } /* * Inform all clients of the event. */ void ibtl_cm_sm_notice_handler(ib_gid_t sgid, ibt_subnet_event_code_t code, ibt_subnet_event_t *event) { _NOTE(NO_COMPETING_THREADS_NOW) struct ibtl_sm_notice *noticep; ibtl_clnt_t *clntp; mutex_enter(&ibtl_clnt_list_mutex); clntp = ibtl_clnt_list; while (clntp != NULL) { if (clntp->clnt_sm_trap_handler) { noticep = kmem_zalloc(sizeof (*noticep), KM_SLEEP); noticep->np_ibt_hdl = clntp; noticep->np_sgid = sgid; noticep->np_code = code; noticep->np_event = *event; ++clntp->clnt_async_cnt; (void) taskq_dispatch(ibtl_async_taskq, ibtl_sm_notice_task, noticep, TQ_SLEEP); } clntp = clntp->clnt_list_link; } mutex_exit(&ibtl_clnt_list_mutex); #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif } /* * Record the handler for this client. */ void ibtl_cm_set_sm_notice_handler(ibt_clnt_hdl_t ibt_hdl, ibt_sm_notice_handler_t sm_notice_handler, void *private) { _NOTE(NO_COMPETING_THREADS_NOW) ibt_hdl->clnt_sm_trap_handler = sm_notice_handler; ibt_hdl->clnt_sm_trap_handler_arg = private; #ifndef lint _NOTE(COMPETING_THREADS_NOW) #endif } /* * ibtl_another_cq_handler_in_thread() * * Conditionally increase the number of cq_threads. * The number of threads grows, based on the number of cqs using threads. * * The table below controls the number of threads as follows: * * Number of CQs Number of cq_threads * 0 0 * 1 1 * 2-3 2 * 4-5 3 * 6-9 4 * 10-15 5 * 16-23 6 * 24-31 7 * 32+ 8 */ #define IBTL_CQ_MAXTHREADS 8 static uint8_t ibtl_cq_scaling[IBTL_CQ_MAXTHREADS] = { 1, 2, 4, 6, 10, 16, 24, 32 }; static kt_did_t ibtl_cq_did[IBTL_CQ_MAXTHREADS]; void ibtl_another_cq_handler_in_thread(void) { kthread_t *t; int my_idx; mutex_enter(&ibtl_cq_mutex); if ((ibtl_cq_threads == IBTL_CQ_MAXTHREADS) || (++ibtl_cqs_using_threads < ibtl_cq_scaling[ibtl_cq_threads])) { mutex_exit(&ibtl_cq_mutex); return; } my_idx = ibtl_cq_threads++; mutex_exit(&ibtl_cq_mutex); t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN, ibtl_pri - 1); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did)) ibtl_cq_did[my_idx] = t->t_did; /* save for thread_join() */ _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did)) } void ibtl_thread_init(void) { IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init()"); mutex_init(&ibtl_async_mutex, NULL, MUTEX_DEFAULT, NULL); cv_init(&ibtl_async_cv, NULL, CV_DEFAULT, NULL); cv_init(&ibtl_clnt_cv, NULL, CV_DEFAULT, NULL); mutex_init(&ibtl_cq_mutex, NULL, MUTEX_DEFAULT, NULL); cv_init(&ibtl_cq_cv, NULL, CV_DEFAULT, NULL); } void ibtl_thread_init2(void) { int i; static int initted = 0; kthread_t *t; mutex_enter(&ibtl_async_mutex); if (initted == 1) { mutex_exit(&ibtl_async_mutex); return; } initted = 1; mutex_exit(&ibtl_async_mutex); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_async_did)) ibtl_async_did = kmem_zalloc(ibtl_async_thread_init * sizeof (kt_did_t), KM_SLEEP); IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_init2()"); for (i = 0; i < ibtl_async_thread_init; i++) { t = thread_create(NULL, 0, ibtl_async_thread, NULL, 0, &p0, TS_RUN, ibtl_pri - 1); ibtl_async_did[i] = t->t_did; /* thread_join() */ } _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_async_did)) _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads)) for (i = 0; i < ibtl_cq_threads; i++) { t = thread_create(NULL, 0, ibtl_cq_thread, NULL, 0, &p0, TS_RUN, ibtl_pri - 1); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_did)) ibtl_cq_did[i] = t->t_did; /* save for thread_join() */ _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_did)) } _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads)) } void ibtl_thread_fini(void) { int i; IBTF_DPRINTF_L3(ibtf_handlers, "ibtl_thread_fini()"); /* undo the work done by ibtl_thread_init() */ mutex_enter(&ibtl_cq_mutex); ibtl_cq_thread_exit = IBTL_THREAD_EXIT; cv_broadcast(&ibtl_cq_cv); mutex_exit(&ibtl_cq_mutex); mutex_enter(&ibtl_async_mutex); ibtl_async_thread_exit = IBTL_THREAD_EXIT; cv_broadcast(&ibtl_async_cv); mutex_exit(&ibtl_async_mutex); _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(ibtl_cq_threads)) for (i = 0; i < ibtl_cq_threads; i++) thread_join(ibtl_cq_did[i]); _NOTE(NOW_VISIBLE_TO_OTHER_THREADS(ibtl_cq_threads)) if (ibtl_async_did) { for (i = 0; i < ibtl_async_thread_init; i++) thread_join(ibtl_async_did[i]); kmem_free(ibtl_async_did, ibtl_async_thread_init * sizeof (kt_did_t)); } mutex_destroy(&ibtl_cq_mutex); cv_destroy(&ibtl_cq_cv); mutex_destroy(&ibtl_async_mutex); cv_destroy(&ibtl_async_cv); cv_destroy(&ibtl_clnt_cv); }