/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * RDMA channel interface for Solaris SCSI RDMA Protocol Target (SRP) * transport port provider module for the COMSTAR framework. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "srp.h" #include "srpt_impl.h" #include "srpt_ioc.h" #include "srpt_stp.h" #include "srpt_ch.h" extern srpt_ctxt_t *srpt_ctxt; extern uint16_t srpt_send_msg_depth; /* * Prototypes. */ static void srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg); static void srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg); static void srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu); /* * srpt_ch_alloc() */ srpt_channel_t * srpt_ch_alloc(srpt_target_port_t *tgt, uint8_t port) { ibt_status_t status; srpt_channel_t *ch; ibt_cq_attr_t cq_attr; ibt_rc_chan_alloc_args_t ch_args; uint32_t cq_real_size; srpt_ioc_t *ioc; ASSERT(tgt != NULL); ioc = tgt->tp_ioc; ASSERT(ioc != NULL); ch = kmem_zalloc(sizeof (*ch), KM_SLEEP); rw_init(&ch->ch_rwlock, NULL, RW_DRIVER, NULL); mutex_init(&ch->ch_reflock, NULL, MUTEX_DRIVER, NULL); cv_init(&ch->ch_cv_complete, NULL, CV_DRIVER, NULL); ch->ch_refcnt = 1; ch->ch_cv_waiters = 0; ch->ch_state = SRPT_CHANNEL_CONNECTING; ch->ch_tgt = tgt; ch->ch_req_lim_delta = 0; ch->ch_ti_iu_len = 0; cq_attr.cq_size = srpt_send_msg_depth * 2; cq_attr.cq_sched = 0; cq_attr.cq_flags = IBT_CQ_NO_FLAGS; status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_scq_hdl, &cq_real_size); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L1("ch_alloc, send CQ alloc error (%d)", status); goto scq_alloc_err; } cq_attr.cq_size = srpt_send_msg_depth + 1; cq_attr.cq_sched = 0; cq_attr.cq_flags = IBT_CQ_NO_FLAGS; status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_rcq_hdl, &cq_real_size); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_alloc, receive CQ alloc error (%d)", status); goto rcq_alloc_err; } ibt_set_cq_handler(ch->ch_scq_hdl, srpt_ch_scq_hdlr, ch); ibt_set_cq_handler(ch->ch_rcq_hdl, srpt_ch_rcq_hdlr, ch); (void) ibt_enable_cq_notify(ch->ch_scq_hdl, IBT_NEXT_COMPLETION); (void) ibt_enable_cq_notify(ch->ch_rcq_hdl, IBT_NEXT_COMPLETION); ch_args.rc_flags = IBT_WR_SIGNALED; /* Maker certain initiator can not read/write our memory */ ch_args.rc_control = 0; ch_args.rc_hca_port_num = port; /* * Any SRP IU can result in a number of STMF data buffer transfers * and those transfers themselves could span multiple initiator * buffers. Therefore, the number of send WQE's actually required * can vary. Here we assume that on average an I/O will require * no more than SRPT_MAX_OUT_IO_PER_CMD send WQE's. In practice * this will prevent send work queue overrun, but we will also * inform STMF to throttle I/O should the work queue become full. * * If the HCA tells us the max outstanding WRs for a channel is * lower than our default, use the HCA value. */ ch_args.rc_sizes.cs_sq = min(ioc->ioc_attr.hca_max_chan_sz, (srpt_send_msg_depth * SRPT_MAX_OUT_IO_PER_CMD)); ch_args.rc_sizes.cs_rq = 0; ch_args.rc_sizes.cs_sq_sgl = 2; ch_args.rc_sizes.cs_rq_sgl = 0; ch_args.rc_scq = ch->ch_scq_hdl; ch_args.rc_rcq = ch->ch_rcq_hdl; ch_args.rc_pd = ioc->ioc_pd_hdl; ch_args.rc_clone_chan = NULL; ch_args.rc_srq = ioc->ioc_srq_hdl; status = ibt_alloc_rc_channel(ioc->ioc_ibt_hdl, IBT_ACHAN_USES_SRQ, &ch_args, &ch->ch_chan_hdl, &ch->ch_sizes); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_alloc, IBT channel alloc error (%d)", status); goto qp_alloc_err; } /* * Create pool of send WQE entries to map send wqe work IDs * to various types (specifically in error cases where OP * is not known). */ ch->ch_num_swqe = ch->ch_sizes.cs_sq; SRPT_DPRINTF_L2("ch_alloc, number of SWQEs = %u", ch->ch_num_swqe); ch->ch_swqe = kmem_zalloc(sizeof (srpt_swqe_t) * ch->ch_num_swqe, KM_SLEEP); if (ch->ch_swqe == NULL) { SRPT_DPRINTF_L2("ch_alloc, SWQE alloc error"); (void) ibt_free_channel(ch->ch_chan_hdl); goto qp_alloc_err; } mutex_init(&ch->ch_swqe_lock, NULL, MUTEX_DRIVER, NULL); ch->ch_head = 1; for (ch->ch_tail = 1; ch->ch_tail < ch->ch_num_swqe -1; ch->ch_tail++) { ch->ch_swqe[ch->ch_tail].sw_next = ch->ch_tail + 1; } ch->ch_swqe[ch->ch_tail].sw_next = 0; ibt_set_chan_private(ch->ch_chan_hdl, ch); return (ch); qp_alloc_err: (void) ibt_free_cq(ch->ch_rcq_hdl); rcq_alloc_err: (void) ibt_free_cq(ch->ch_scq_hdl); scq_alloc_err: cv_destroy(&ch->ch_cv_complete); mutex_destroy(&ch->ch_reflock); rw_destroy(&ch->ch_rwlock); kmem_free(ch, sizeof (*ch)); return (NULL); } /* * srpt_ch_add_ref() */ void srpt_ch_add_ref(srpt_channel_t *ch) { mutex_enter(&ch->ch_reflock); ch->ch_refcnt++; SRPT_DPRINTF_L4("ch_add_ref, ch (%p), refcnt (%d)", (void *)ch, ch->ch_refcnt); ASSERT(ch->ch_refcnt != 0); mutex_exit(&ch->ch_reflock); } /* * srpt_ch_release_ref() * * A non-zero value for wait causes thread to block until all references * to channel are released. */ void srpt_ch_release_ref(srpt_channel_t *ch, uint_t wait) { mutex_enter(&ch->ch_reflock); SRPT_DPRINTF_L4("ch_release_ref, ch (%p), refcnt (%d), wait (%d)", (void *)ch, ch->ch_refcnt, wait); ASSERT(ch->ch_refcnt != 0); ch->ch_refcnt--; if (ch->ch_refcnt != 0) { if (wait) { ch->ch_cv_waiters++; while (ch->ch_refcnt != 0) { cv_wait(&ch->ch_cv_complete, &ch->ch_reflock); } ch->ch_cv_waiters--; } else { mutex_exit(&ch->ch_reflock); return; } } /* * Last thread out frees the IB resources, locks/conditions and memory */ if (ch->ch_cv_waiters > 0) { /* we're not last, wake someone else up */ cv_signal(&ch->ch_cv_complete); mutex_exit(&ch->ch_reflock); return; } SRPT_DPRINTF_L3("ch_release_ref - release resources"); if (ch->ch_chan_hdl) { SRPT_DPRINTF_L3("ch_release_ref - free channel"); (void) ibt_free_channel(ch->ch_chan_hdl); } if (ch->ch_scq_hdl) { (void) ibt_free_cq(ch->ch_scq_hdl); } if (ch->ch_rcq_hdl) { (void) ibt_free_cq(ch->ch_rcq_hdl); } /* * There should be no IU's associated with this * channel on the SCSI session. */ if (ch->ch_session != NULL) { ASSERT(list_is_empty(&ch->ch_session->ss_task_list)); /* * Currently only have one channel per session, we will * need to release a reference when support is added * for multi-channel target login. */ srpt_stp_free_session(ch->ch_session); ch->ch_session = NULL; } kmem_free(ch->ch_swqe, sizeof (srpt_swqe_t) * ch->ch_num_swqe); mutex_destroy(&ch->ch_swqe_lock); mutex_exit(&ch->ch_reflock); mutex_destroy(&ch->ch_reflock); rw_destroy(&ch->ch_rwlock); kmem_free(ch, sizeof (srpt_channel_t)); } /* * srpt_ch_disconnect() */ void srpt_ch_disconnect(srpt_channel_t *ch) { ibt_status_t status; SRPT_DPRINTF_L3("ch_disconnect, invoked for ch (%p)", (void *)ch); rw_enter(&ch->ch_rwlock, RW_WRITER); /* * If we are already in the process of disconnecting then * nothing need be done, CM will call-back into us when done. */ if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { SRPT_DPRINTF_L2("ch_disconnect, called when" " disconnect in progress"); rw_exit(&ch->ch_rwlock); return; } ch->ch_state = SRPT_CHANNEL_DISCONNECTING; rw_exit(&ch->ch_rwlock); /* * Initiate the sending of the CM DREQ message, the private data * should be the SRP Target logout IU. We don't really care about * the remote CM DREP message returned. We issue this in an * asynchronous manner and will cleanup when called back by CM. */ status = ibt_close_rc_channel(ch->ch_chan_hdl, IBT_NONBLOCKING, NULL, 0, NULL, NULL, 0); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_disconnect, close RC channel" " err(%d)", status); } } /* * srpt_ch_cleanup() */ void srpt_ch_cleanup(srpt_channel_t *ch) { srpt_iu_t *iu; srpt_iu_t *next; ibt_wc_t wc; srpt_target_port_t *tgt; srpt_channel_t *tgt_ch; scsi_task_t *iutask; SRPT_DPRINTF_L3("ch_cleanup, invoked for ch(%p), state(%d)", (void *)ch, ch->ch_state); /* add a ref for the channel until we're done */ srpt_ch_add_ref(ch); tgt = ch->ch_tgt; ASSERT(tgt != NULL); /* * Make certain the channel is in the target ports list of * known channels and remove it (releasing the target * ports reference to the channel). */ mutex_enter(&tgt->tp_ch_list_lock); tgt_ch = list_head(&tgt->tp_ch_list); while (tgt_ch != NULL) { if (tgt_ch == ch) { list_remove(&tgt->tp_ch_list, tgt_ch); srpt_ch_release_ref(tgt_ch, 0); break; } tgt_ch = list_next(&tgt->tp_ch_list, tgt_ch); } mutex_exit(&tgt->tp_ch_list_lock); if (tgt_ch == NULL) { SRPT_DPRINTF_L2("ch_cleanup, target channel no" "longer known to target"); srpt_ch_release_ref(ch, 0); return; } rw_enter(&ch->ch_rwlock, RW_WRITER); ch->ch_state = SRPT_CHANNEL_DISCONNECTING; rw_exit(&ch->ch_rwlock); /* * Don't accept any further incoming requests, and clean * up the receive queue. The send queue is left alone * so tasks can finish and clean up (whether normally * or via abort). */ if (ch->ch_rcq_hdl) { ibt_set_cq_handler(ch->ch_rcq_hdl, NULL, NULL); while (ibt_poll_cq(ch->ch_rcq_hdl, &wc, 1, NULL) == IBT_SUCCESS) { iu = (srpt_iu_t *)(uintptr_t)wc.wc_id; SRPT_DPRINTF_L4("ch_cleanup, recovering" " outstanding RX iu(%p)", (void *)iu); mutex_enter(&iu->iu_lock); srpt_ioc_repost_recv_iu(iu->iu_ioc, iu); /* * Channel reference has not yet been added for this * IU, so do not decrement. */ mutex_exit(&iu->iu_lock); } } /* * Go through the list of outstanding IU for the channel's SCSI * session and for each either abort or complete an abort. */ rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_session != NULL) { rw_enter(&ch->ch_session->ss_rwlock, RW_READER); iu = list_head(&ch->ch_session->ss_task_list); while (iu != NULL) { next = list_next(&ch->ch_session->ss_task_list, iu); mutex_enter(&iu->iu_lock); if (ch == iu->iu_ch) { if (iu->iu_stmf_task == NULL) { cmn_err(CE_NOTE, "ch_cleanup, NULL stmf task"); ASSERT(0); } iutask = iu->iu_stmf_task; } else { iutask = NULL; } mutex_exit(&iu->iu_lock); if (iutask != NULL) { SRPT_DPRINTF_L4("ch_cleanup, aborting " "task(%p)", (void *)iutask); stmf_abort(STMF_QUEUE_TASK_ABORT, iutask, STMF_ABORTED, NULL); } iu = next; } rw_exit(&ch->ch_session->ss_rwlock); } rw_exit(&ch->ch_rwlock); srpt_ch_release_ref(ch, 0); } /* * srpt_ch_rsp_comp() * * Process a completion for an IB SEND message. A SEND completion * is for a SRP response packet sent back to the initiator. It * will not have a STMF SCSI task associated with it if it was * sent for a rejected IU, or was a task management abort response. */ static void srpt_ch_rsp_comp(srpt_channel_t *ch, srpt_iu_t *iu, ibt_wc_status_t wc_status) { stmf_status_t st = STMF_SUCCESS; ASSERT(iu->iu_ch == ch); /* * Process the completion regardless whether it's a failure or * success. At this point, we've processed as far as we can and * just need to complete the associated task. */ if (wc_status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_rsp_comp, WC status err(%d)", wc_status); st = STMF_FAILURE; if (wc_status != IBT_WC_WR_FLUSHED_ERR) { srpt_ch_disconnect(ch); } } /* * If the IU response completion is not associated with * with a SCSI task, release the IU to return the resource * and the reference to the channel it holds. */ mutex_enter(&iu->iu_lock); atomic_dec_32(&iu->iu_sq_posted_cnt); if (iu->iu_stmf_task == NULL) { srpt_ioc_repost_recv_iu(iu->iu_ioc, iu); mutex_exit(&iu->iu_lock); srpt_ch_release_ref(ch, 0); return; } /* * We should not get a SEND completion where the task has already * completed aborting and STMF has been informed. */ ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0); /* * Let STMF know we are done. */ mutex_exit(&iu->iu_lock); stmf_send_status_done(iu->iu_stmf_task, st, STMF_IOF_LPORT_DONE); } /* * srpt_ch_data_comp() * * Process an IB completion for a RDMA operation. This completion * should be associated with the last RDMA operation for any * data buffer transfer. */ static void srpt_ch_data_comp(srpt_channel_t *ch, stmf_data_buf_t *stmf_dbuf, ibt_wc_status_t wc_status) { srpt_ds_dbuf_t *dbuf; srpt_iu_t *iu; stmf_status_t status; ASSERT(stmf_dbuf != NULL); dbuf = (srpt_ds_dbuf_t *)stmf_dbuf->db_port_private; ASSERT(dbuf != NULL); iu = dbuf->db_iu; ASSERT(iu != NULL); ASSERT(iu->iu_ch == ch); /* * If work completion indicates non-flush failure, then * start a channel disconnect (asynchronous) and release * the reference to the IU. The task will be cleaned * up with STMF during channel shutdown processing. */ if (wc_status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_data_comp, WC status err(%d)", wc_status); if (wc_status != IBT_WC_WR_FLUSHED_ERR) { srpt_ch_disconnect(ch); } atomic_dec_32(&iu->iu_sq_posted_cnt); return; } /* * If STMF has requested this task be aborted, then if this is the * last I/O operation outstanding, notify STMF the task has been * aborted and ignore the completion. */ mutex_enter(&iu->iu_lock); atomic_dec_32(&iu->iu_sq_posted_cnt); if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) { scsi_task_t *abort_task = iu->iu_stmf_task; mutex_exit(&iu->iu_lock); stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task, STMF_ABORTED, NULL); return; } /* * We should not get an RDMA completion where the task has already * completed aborting and STMF has been informed. */ ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0); /* * Good completion for last RDMA op associated with a data buffer * I/O, if specified initiate status otherwise let STMF know we are * done. */ stmf_dbuf->db_xfer_status = STMF_SUCCESS; mutex_exit(&iu->iu_lock); DTRACE_SRP_8(xfer__done, srpt_channel_t, ch, ibt_wr_ds_t, &(dbuf->db_sge), srpt_iu_t, iu, ibt_send_wr_t, 0, uint32_t, stmf_dbuf->db_data_size, uint32_t, 0, uint32_t, 0, uint32_t, (stmf_dbuf->db_flags & DB_DIRECTION_TO_RPORT) ? 1 : 0); if ((stmf_dbuf->db_flags & DB_SEND_STATUS_GOOD) != 0) { status = srpt_stp_send_status(dbuf->db_iu->iu_stmf_task, 0); if (status == STMF_SUCCESS) { return; } stmf_dbuf->db_xfer_status = STMF_FAILURE; } stmf_data_xfer_done(dbuf->db_iu->iu_stmf_task, stmf_dbuf, 0); } /* * srpt_ch_scq_hdlr() */ static void srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg) { ibt_status_t status; srpt_channel_t *ch = arg; ibt_wc_t wc[SRPT_SEND_WC_POLL_SIZE]; ibt_wc_t *wcp; int i; uint32_t cq_rearmed = 0; uint32_t entries; srpt_swqe_t *swqe; ASSERT(ch != NULL); /* Reference channel for the duration of this call */ srpt_ch_add_ref(ch); for (;;) { status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_SEND_WC_POLL_SIZE, &entries); if (status != IBT_SUCCESS) { if (status != IBT_CQ_EMPTY) { /* * This error should not happen. It indicates * something abnormal has gone wrong and means * either a hardware or programming logic error. */ SRPT_DPRINTF_L2( "ch_scq_hdlr, unexpected CQ err(%d)", status); srpt_ch_disconnect(ch); } /* * If we have not rearmed the CQ do so now and poll to * eliminate race; otherwise we are done. */ if (cq_rearmed == 0) { (void) ibt_enable_cq_notify(ch->ch_scq_hdl, IBT_NEXT_COMPLETION); cq_rearmed = 1; continue; } else { break; } } for (wcp = wc, i = 0; i < entries; i++, wcp++) { /* * A zero work ID indicates this CQE is associated * with an intermediate post of a RDMA data transfer * operation. Since intermediate data requests are * unsignaled, we should only get these if there was * an error. No action is required. */ if (wcp->wc_id == 0) { continue; } swqe = ch->ch_swqe + wcp->wc_id; switch (swqe->sw_type) { case SRPT_SWQE_TYPE_RESP: srpt_ch_rsp_comp(ch, (srpt_iu_t *) swqe->sw_addr, wcp->wc_status); break; case SRPT_SWQE_TYPE_DATA: srpt_ch_data_comp(ch, (stmf_data_buf_t *) swqe->sw_addr, wcp->wc_status); break; default: SRPT_DPRINTF_L2("ch_scq_hdlr, bad type(%d)", swqe->sw_type); ASSERT(0); } srpt_ch_free_swqe_wrid(ch, wcp->wc_id); } } srpt_ch_release_ref(ch, 0); } /* * srpt_ch_rcq_hdlr() */ static void srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg) { ibt_status_t status; srpt_channel_t *ch = arg; ibt_wc_t wc[SRPT_RECV_WC_POLL_SIZE]; ibt_wc_t *wcp; int i; uint32_t entries; srpt_iu_t *iu; uint_t cq_rearmed = 0; /* * The channel object will exists while the CQ handler call-back * is installed. */ ASSERT(ch != NULL); srpt_ch_add_ref(ch); /* * If we know a channel disconnect has started do nothing * and let channel cleanup code recover resources from the CQ. * We are not concerned about races with the state transition * since the code will do the correct thing either way. This * is simply to circumvent rearming the CQ, and it will * catch the state next time. */ rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { SRPT_DPRINTF_L2("ch_rcq_hdlr, channel disconnecting"); rw_exit(&ch->ch_rwlock); srpt_ch_release_ref(ch, 0); return; } rw_exit(&ch->ch_rwlock); for (;;) { status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_RECV_WC_POLL_SIZE, &entries); if (status != IBT_SUCCESS) { if (status != IBT_CQ_EMPTY) { /* * This error should not happen. It indicates * something abnormal has gone wrong and means * either a hardware or programming logic error. */ SRPT_DPRINTF_L2( "ch_rcq_hdlr, unexpected CQ err(%d)", status); srpt_ch_disconnect(ch); break; } /* * If we have not rearmed the CQ do so now and poll to * eliminate race; otherwise we are done. */ if (cq_rearmed == 0) { (void) ibt_enable_cq_notify(ch->ch_rcq_hdl, IBT_NEXT_COMPLETION); cq_rearmed = 1; continue; } else { break; } } for (wcp = wc, i = 0; i < entries; i++, wcp++) { /* * Check wc_status before proceeding. If the * status indicates a channel problem, stop processing. */ if (wcp->wc_status != IBT_WC_SUCCESS) { if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) { SRPT_DPRINTF_L2( "ch_rcq, unexpected" " wc_status err(%d)", wcp->wc_status); srpt_ch_disconnect(ch); goto done; } else { /* skip IUs with errors */ SRPT_DPRINTF_L2( "ch_rcq, ERROR comp(%d)", wcp->wc_status); /* XXX - verify not leaking IUs */ continue; } } iu = (srpt_iu_t *)(uintptr_t)wcp->wc_id; ASSERT(iu != NULL); /* * Process the IU. */ ASSERT(wcp->wc_type == IBT_WRC_RECV); srpt_ch_process_iu(ch, iu); } } done: srpt_ch_release_ref(ch, 0); } /* * srpt_ch_srp_cmd() */ static int srpt_ch_srp_cmd(srpt_channel_t *ch, srpt_iu_t *iu) { srp_cmd_req_t *cmd = (srp_cmd_req_t *)iu->iu_buf; srp_indirect_desc_t *i_desc; uint_t i_di_cnt; uint_t i_do_cnt; uint8_t do_fmt; uint8_t di_fmt; uint32_t *cur_desc_off; int i; ibt_status_t status; uint8_t addlen; DTRACE_SRP_2(task__command, srpt_channel_t, ch, srp_cmd_req_t, cmd); iu->iu_ch = ch; iu->iu_tag = cmd->cr_tag; /* * The SRP specification and SAM require support for bi-directional * data transfer, so we create a single buffer descriptor list that * in the IU buffer that covers the data-in and data-out buffers. * In practice we will just see unidirectional transfers with either * data-in or data out descriptors. If we were to take that as fact, * we could reduce overhead slightly. */ /* * additional length is a 6-bit number in 4-byte words, so multiply by 4 * to get bytes. */ addlen = cmd->cr_add_cdb_len & 0x3f; /* mask off 6 bits */ cur_desc_off = (uint32_t *)(void *)&cmd->cr_add_data; cur_desc_off += addlen; /* 32-bit arithmetic */ iu->iu_num_rdescs = 0; iu->iu_rdescs = (srp_direct_desc_t *)(void *)cur_desc_off; /* * Examine buffer description for Data In (i.e. data flows * to the initiator). */ i_do_cnt = i_di_cnt = 0; di_fmt = cmd->cr_buf_fmt >> 4; if (di_fmt == SRP_DATA_DESC_DIRECT) { iu->iu_num_rdescs = 1; cur_desc_off = (uint32_t *)(void *)&iu->iu_rdescs[1]; } else if (di_fmt == SRP_DATA_DESC_INDIRECT) { i_desc = (srp_indirect_desc_t *)iu->iu_rdescs; i_di_cnt = b2h32(i_desc->id_table.dd_len) / sizeof (srp_direct_desc_t); /* * Some initiators like OFED occasionally use the wrong counts, * so check total to allow for this. NOTE: we do not support * reading of the descriptor table from the initiator, so if * not all descriptors are in the IU we drop the task. */ if (i_di_cnt > (cmd->cr_dicnt + cmd->cr_docnt)) { SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of" " descriptors not supported"); SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d)," " i_di_cnt(%d), cr_dicnt(%d)", (uint_t)sizeof (srp_direct_desc_t), i_di_cnt, cmd->cr_dicnt); iu->iu_rdescs = NULL; return (1); } bcopy(&i_desc->id_desc[0], iu->iu_rdescs, sizeof (srp_direct_desc_t) * i_di_cnt); iu->iu_num_rdescs += i_di_cnt; cur_desc_off = (uint32_t *)(void *)&i_desc->id_desc[i_di_cnt]; } /* * Examine buffer description for Data Out (i.e. data flows * from the initiator). */ do_fmt = cmd->cr_buf_fmt & 0x0F; if (do_fmt == SRP_DATA_DESC_DIRECT) { if (di_fmt == SRP_DATA_DESC_DIRECT) { bcopy(cur_desc_off, &iu->iu_rdescs[iu->iu_num_rdescs], sizeof (srp_direct_desc_t)); } iu->iu_num_rdescs++; } else if (do_fmt == SRP_DATA_DESC_INDIRECT) { i_desc = (srp_indirect_desc_t *)cur_desc_off; i_do_cnt = b2h32(i_desc->id_table.dd_len) / sizeof (srp_direct_desc_t); /* * Some initiators like OFED occasionally use the wrong counts, * so check total to allow for this. NOTE: we do not support * reading of the descriptor table from the initiator, so if * not all descriptors are in the IU we drop the task. */ if ((i_di_cnt + i_do_cnt) > (cmd->cr_dicnt + cmd->cr_docnt)) { SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of" " descriptors not supported"); SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d)," " i_do_cnt(%d), cr_docnt(%d)", (uint_t)sizeof (srp_direct_desc_t), i_do_cnt, cmd->cr_docnt); iu->iu_rdescs = 0; return (1); } bcopy(&i_desc->id_desc[0], &iu->iu_rdescs[iu->iu_num_rdescs], sizeof (srp_direct_desc_t) * i_do_cnt); iu->iu_num_rdescs += i_do_cnt; } iu->iu_tot_xfer_len = 0; for (i = 0; i < iu->iu_num_rdescs; i++) { iu->iu_rdescs[i].dd_vaddr = b2h64(iu->iu_rdescs[i].dd_vaddr); iu->iu_rdescs[i].dd_hdl = b2h32(iu->iu_rdescs[i].dd_hdl); iu->iu_rdescs[i].dd_len = b2h32(iu->iu_rdescs[i].dd_len); iu->iu_tot_xfer_len += iu->iu_rdescs[i].dd_len; } #ifdef DEBUG if (srpt_errlevel >= SRPT_LOG_L4) { SRPT_DPRINTF_L4("ch_srp_cmd, iu->iu_tot_xfer_len (%d)", iu->iu_tot_xfer_len); for (i = 0; i < iu->iu_num_rdescs; i++) { SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_vaddr" " (0x%08llx)", i, (u_longlong_t)iu->iu_rdescs[i].dd_vaddr); SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_hdl" " (0x%08x)", i, iu->iu_rdescs[i].dd_hdl); SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_len (%d)", i, iu->iu_rdescs[i].dd_len); } SRPT_DPRINTF_L4("ch_srp_cmd, LUN (0x%08lx)", (unsigned long int) *((uint64_t *)(void *) cmd->cr_lun)); } #endif rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { /* * The channel has begun disconnecting, so ignore the * the command returning the IU resources. */ rw_exit(&ch->ch_rwlock); return (1); } /* * Once a SCSI task is allocated and assigned to the IU, it * owns those IU resources, which will be held until STMF * is notified the task is done (from a lport perspective). */ iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport, ch->ch_session->ss_ss, cmd->cr_lun, SRP_CDB_SIZE + (addlen * 4), 0); if (iu->iu_stmf_task == NULL) { /* * Could not allocate, return status to the initiator * indicating that we are temporarily unable to process * commands. If unable to send, immediately return IU * resource. */ SRPT_DPRINTF_L2("ch_srp_cmd, SCSI task allocation failure"); rw_exit(&ch->ch_rwlock); mutex_enter(&iu->iu_lock); status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0, NULL, SRPT_NO_FENCE_SEND); mutex_exit(&iu->iu_lock); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_srp_cmd, error(%d) posting error" " response", status); return (1); } else { return (0); } } iu->iu_stmf_task->task_port_private = iu; iu->iu_stmf_task->task_flags = 0; if (di_fmt != 0) { iu->iu_stmf_task->task_flags |= TF_WRITE_DATA; } if (do_fmt != 0) { iu->iu_stmf_task->task_flags |= TF_READ_DATA; } switch (cmd->cr_task_attr) { case SRP_TSK_ATTR_QTYPE_SIMPLE: iu->iu_stmf_task->task_flags |= TF_ATTR_SIMPLE_QUEUE; break; case SRP_TSK_ATTR_QTYPE_HEAD_OF_Q: iu->iu_stmf_task->task_flags |= TF_ATTR_HEAD_OF_QUEUE; break; case SRP_TSK_ATTR_QTYPE_ORDERED: iu->iu_stmf_task->task_flags |= TF_ATTR_ORDERED_QUEUE; break; case SRP_TSK_ATTR_QTYPE_ACA_Q_TAG: iu->iu_stmf_task->task_flags |= TF_ATTR_ACA; break; default: SRPT_DPRINTF_L2("ch_srp_cmd, reserved task attr (%d)", cmd->cr_task_attr); iu->iu_stmf_task->task_flags |= TF_ATTR_ORDERED_QUEUE; break; } iu->iu_stmf_task->task_additional_flags = 0; iu->iu_stmf_task->task_priority = 0; iu->iu_stmf_task->task_mgmt_function = TM_NONE; iu->iu_stmf_task->task_max_nbufs = STMF_BUFS_MAX; iu->iu_stmf_task->task_expected_xfer_length = iu->iu_tot_xfer_len; iu->iu_stmf_task->task_csn_size = 0; bcopy(cmd->cr_cdb, iu->iu_stmf_task->task_cdb, SRP_CDB_SIZE); if (addlen != 0) { bcopy(&cmd->cr_add_data, iu->iu_stmf_task->task_cdb + SRP_CDB_SIZE, addlen * 4); } /* * Add the IU/task to the session and post to STMF. The task will * remain in the session's list until STMF is informed by SRP that * it is done with the task. */ DTRACE_SRP_3(scsi__command, srpt_channel_t, iu->iu_ch, scsi_task_t, iu->iu_stmf_task, srp_cmd_req_t, cmd); srpt_stp_add_task(ch->ch_session, iu); SRPT_DPRINTF_L3("ch_srp_cmd, new task (%p) posted", (void *)iu->iu_stmf_task); stmf_post_task(iu->iu_stmf_task, NULL); rw_exit(&ch->ch_rwlock); return (0); } /* * srpt_ch_task_mgmt_abort() * * Returns 0 on success, indicating we've sent a management response. * Returns !0 to indicate failure; the IU should be reposted. */ static ibt_status_t srpt_ch_task_mgmt_abort(srpt_channel_t *ch, srpt_iu_t *iu, uint64_t tag_to_abort) { srpt_session_t *session = ch->ch_session; srpt_iu_t *ss_iu; ibt_status_t status; /* * Locate the associated task (tag_to_abort) in the * session's active task list. */ rw_enter(&session->ss_rwlock, RW_READER); ss_iu = list_head(&session->ss_task_list); while (ss_iu != NULL) { mutex_enter(&ss_iu->iu_lock); if ((tag_to_abort == ss_iu->iu_tag)) { mutex_exit(&ss_iu->iu_lock); break; } mutex_exit(&ss_iu->iu_lock); ss_iu = list_next(&session->ss_task_list, ss_iu); } rw_exit(&session->ss_rwlock); /* * Take appropriate action based on state of task * to be aborted: * 1) No longer exists - do nothing. * 2) Previously aborted or status queued - do nothing. * 3) Otherwise - initiate abort. */ if (ss_iu == NULL) { goto send_mgmt_resp; } mutex_enter(&ss_iu->iu_lock); if ((ss_iu->iu_flags & (SRPT_IU_STMF_ABORTING | SRPT_IU_ABORTED | SRPT_IU_RESP_SENT)) != 0) { mutex_exit(&ss_iu->iu_lock); goto send_mgmt_resp; } /* * Set aborting flag and notify STMF of abort request. No * additional I/O will be queued for this IU. */ SRPT_DPRINTF_L3("ch_task_mgmt_abort, task found"); ss_iu->iu_flags |= SRPT_IU_SRP_ABORTING; mutex_exit(&ss_iu->iu_lock); stmf_abort(STMF_QUEUE_TASK_ABORT, ss_iu->iu_stmf_task, STMF_ABORTED, NULL); send_mgmt_resp: mutex_enter(&iu->iu_lock); status = srpt_stp_send_mgmt_response(iu, SRP_TM_SUCCESS, SRPT_FENCE_SEND); mutex_exit(&iu->iu_lock); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_task_mgmt_abort, err(%d)" " posting abort response", status); } return (status); } /* * srpt_ch_srp_task_mgmt() */ static int srpt_ch_srp_task_mgmt(srpt_channel_t *ch, srpt_iu_t *iu) { srp_tsk_mgmt_t *tsk = (srp_tsk_mgmt_t *)iu->iu_buf; uint8_t tm_fn; ibt_status_t status; SRPT_DPRINTF_L3("ch_srp_task_mgmt, SRP TASK MGMT func(%d)", tsk->tm_function); /* * Both tag and lun fileds have the same corresponding offsets * in both srp_tsk_mgmt_t and srp_cmd_req_t structures. The * casting will allow us to use the same dtrace translator. */ DTRACE_SRP_2(task__command, srpt_channel_t, ch, srp_cmd_req_t, (srp_cmd_req_t *)tsk); iu->iu_ch = ch; iu->iu_tag = tsk->tm_tag; /* * Task management aborts are processed directly by the SRP driver; * all other task management requests are handed off to STMF. */ switch (tsk->tm_function) { case SRP_TSK_MGMT_ABORT_TASK: /* * Initiate SCSI transport protocol specific task abort * logic. */ status = srpt_ch_task_mgmt_abort(ch, iu, tsk->tm_task_tag); if (status != IBT_SUCCESS) { /* repost this IU */ return (1); } else { return (0); } case SRP_TSK_MGMT_ABORT_TASK_SET: tm_fn = TM_ABORT_TASK_SET; break; case SRP_TSK_MGMT_CLEAR_TASK_SET: tm_fn = TM_CLEAR_TASK_SET; break; case SRP_TSK_MGMT_LUN_RESET: tm_fn = TM_LUN_RESET; break; case SRP_TSK_MGMT_CLEAR_ACA: tm_fn = TM_CLEAR_ACA; break; default: /* * SRP does not support the requested task management * function; return a not supported status in the response. */ SRPT_DPRINTF_L2("ch_srp_task_mgmt, SRP task mgmt fn(%d)" " not supported", tsk->tm_function); mutex_enter(&iu->iu_lock); status = srpt_stp_send_mgmt_response(iu, SRP_TM_NOT_SUPPORTED, SRPT_NO_FENCE_SEND); mutex_exit(&iu->iu_lock); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting" " response", status); return (1); } return (0); } rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { /* * The channel has begun disconnecting, so ignore the * the command returning the IU resources. */ rw_exit(&ch->ch_rwlock); return (1); } /* * Once a SCSI mgmt task is allocated and assigned to the IU, it * owns those IU resources, which will be held until we inform * STMF that we are done with the task (from an lports perspective). */ iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport, ch->ch_session->ss_ss, tsk->tm_lun, 0, STMF_TASK_EXT_NONE); if (iu->iu_stmf_task == NULL) { /* * Could not allocate, return status to the initiator * indicating that we are temporarily unable to process * commands. If unable to send, immediately return IU * resource. */ SRPT_DPRINTF_L2("ch_srp_task_mgmt, SCSI task allocation" " failure"); rw_exit(&ch->ch_rwlock); mutex_enter(&iu->iu_lock); status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0, NULL, SRPT_NO_FENCE_SEND); mutex_exit(&iu->iu_lock); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting" "busy response", status); /* repost the IU */ return (1); } return (0); } iu->iu_stmf_task->task_port_private = iu; iu->iu_stmf_task->task_flags = 0; iu->iu_stmf_task->task_additional_flags = TASK_AF_NO_EXPECTED_XFER_LENGTH; iu->iu_stmf_task->task_priority = 0; iu->iu_stmf_task->task_mgmt_function = tm_fn; iu->iu_stmf_task->task_max_nbufs = STMF_BUFS_MAX; iu->iu_stmf_task->task_expected_xfer_length = 0; iu->iu_stmf_task->task_csn_size = 0; /* * Add the IU/task to the session and post to STMF. The task will * remain in the session's list until STMF is informed by SRP that * it is done with the task. */ srpt_stp_add_task(ch->ch_session, iu); SRPT_DPRINTF_L3("ch_srp_task_mgmt, new mgmt task(%p) posted", (void *)iu->iu_stmf_task); stmf_post_task(iu->iu_stmf_task, NULL); rw_exit(&ch->ch_rwlock); return (0); } /* * srpt_ch_process_iu() */ static void srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu) { srpt_iu_data_t *iud; int status = 1; /* * IU adds reference to channel which will represent a * a reference by STMF. If for whatever reason the IU * is not handed off to STMF, then this reference will be * released. Otherwise, the reference will be released when * SRP informs STMF that the associated SCSI task is done. */ srpt_ch_add_ref(ch); /* * Validate login RC channel state. Normally active, if * not active then we need to handle a possible race between the * receipt of a implied RTU and CM calling back to notify of the * state transition. */ rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { rw_exit(&ch->ch_rwlock); goto repost_iu; } rw_exit(&ch->ch_rwlock); iud = iu->iu_buf; switch (iud->rx_iu.srp_op) { case SRP_IU_CMD: status = srpt_ch_srp_cmd(ch, iu); break; case SRP_IU_TASK_MGMT: status = srpt_ch_srp_task_mgmt(ch, iu); return; case SRP_IU_I_LOGOUT: SRPT_DPRINTF_L3("ch_process_iu, SRP INITIATOR LOGOUT"); /* * Initiators should logout by issuing a CM disconnect * request (DREQ) with the logout IU in the private data; * however some initiators have been known to send the * IU in-band, if this happens just initiate the logout. * Note that we do not return a response as per the * specification. */ srpt_stp_logout(ch); break; case SRP_IU_AER_RSP: case SRP_IU_CRED_RSP: default: /* * We don't send asynchronous events or ask for credit * adjustments, so nothing need be done. Log we got an * unexpected IU but then just repost the IU to the SRQ. */ SRPT_DPRINTF_L2("ch_process_iu, invalid IU from initiator," " IU opcode(%d)", iud->rx_iu.srp_op); break; } if (status == 0) { return; } repost_iu: SRPT_DPRINTF_L4("process_iu: reposting iu %p", (void *)iu); mutex_enter(&iu->iu_lock); srpt_ioc_repost_recv_iu(iu->iu_ioc, iu); mutex_exit(&iu->iu_lock); srpt_ch_release_ref(ch, 0); } /* * srpt_ch_post_send */ ibt_status_t srpt_ch_post_send(srpt_channel_t *ch, srpt_iu_t *iu, uint32_t len, uint_t fence) { ibt_status_t status; ibt_send_wr_t wr; ibt_wr_ds_t ds; uint_t posted; ASSERT(ch != NULL); ASSERT(iu != NULL); ASSERT(mutex_owned(&iu->iu_lock)); rw_enter(&ch->ch_rwlock, RW_READER); if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) { rw_exit(&ch->ch_rwlock); SRPT_DPRINTF_L2("ch_post_send, bad ch state (%d)", ch->ch_state); return (IBT_FAILURE); } rw_exit(&ch->ch_rwlock); wr.wr_id = srpt_ch_alloc_swqe_wrid(ch, SRPT_SWQE_TYPE_RESP, (void *)iu); if (wr.wr_id == 0) { SRPT_DPRINTF_L2("ch_post_send, queue full"); return (IBT_FAILURE); } atomic_inc_32(&iu->iu_sq_posted_cnt); wr.wr_flags = IBT_WR_SEND_SIGNAL; if (fence == SRPT_FENCE_SEND) { wr.wr_flags |= IBT_WR_SEND_FENCE; } wr.wr_opcode = IBT_WRC_SEND; wr.wr_trans = IBT_RC_SRV; wr.wr_nds = 1; wr.wr_sgl = &ds; ds.ds_va = iu->iu_sge.ds_va; ds.ds_key = iu->iu_sge.ds_key; ds.ds_len = len; SRPT_DPRINTF_L4("ch_post_send, posting SRP response to channel" " ds.ds_va (0x%16llx), ds.ds_key (0x%08x), " " ds.ds_len (%d)", (u_longlong_t)ds.ds_va, ds.ds_key, ds.ds_len); status = ibt_post_send(ch->ch_chan_hdl, &wr, 1, &posted); if (status != IBT_SUCCESS) { SRPT_DPRINTF_L2("ch_post_send, post_send failed (%d)", status); atomic_dec_32(&iu->iu_sq_posted_cnt); srpt_ch_free_swqe_wrid(ch, wr.wr_id); return (status); } return (IBT_SUCCESS); } /* * srpt_ch_alloc_swqe_wrid() */ ibt_wrid_t srpt_ch_alloc_swqe_wrid(srpt_channel_t *ch, srpt_swqe_type_t wqe_type, void *addr) { ibt_wrid_t wrid; mutex_enter(&ch->ch_swqe_lock); if (ch->ch_head == ch->ch_tail) { mutex_exit(&ch->ch_swqe_lock); return ((ibt_wrid_t)0); } wrid = (ibt_wrid_t)ch->ch_head; ch->ch_swqe[ch->ch_head].sw_type = wqe_type; ch->ch_swqe[ch->ch_head].sw_addr = addr; ch->ch_head = ch->ch_swqe[ch->ch_head].sw_next; ch->ch_swqe_posted++; mutex_exit(&ch->ch_swqe_lock); return (wrid); } /* * srpt_ch_free_swqe_wrid() */ void srpt_ch_free_swqe_wrid(srpt_channel_t *ch, ibt_wrid_t id) { mutex_enter(&ch->ch_swqe_lock); ch->ch_swqe[ch->ch_tail].sw_next = id; ch->ch_tail = (uint32_t)id; ch->ch_swqe_posted--; mutex_exit(&ch->ch_swqe_lock); }