/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include "dapl.h" #include "dapl_adapter_util.h" #include "dapl_evd_util.h" #include "dapl_cr_util.h" #include "dapl_lmr_util.h" #include "dapl_rmr_util.h" #include "dapl_cookie.h" #include "dapl_tavor_ibtf_impl.h" #include "dapl_hash.h" /* Function prototypes */ extern DAT_RETURN dapls_tavor_wrid_init(ib_qp_handle_t); extern DAT_RETURN dapls_tavor_srq_wrid_init(ib_srq_handle_t); extern void dapls_tavor_srq_wrid_free(ib_srq_handle_t); extern DAT_BOOLEAN dapls_tavor_srq_wrid_resize(ib_srq_handle_t, uint32_t); static DAT_RETURN dapli_ib_srq_add_ep(IN ib_srq_handle_t srq_ptr, IN uint32_t qpnum, IN DAPL_EP *ep_ptr); static void dapli_ib_srq_remove_ep(IN ib_srq_handle_t srq_ptr, IN uint32_t qpnum); static DAT_RETURN dapli_ib_srq_resize_internal(IN DAPL_SRQ *srq_ptr, IN DAT_COUNT srqlen); /* * dapli_get_dto_cq * * Obtain the cq_handle for a DTO EVD. If the EVD is NULL, use the * null_ib_cq_handle. If it hasn't been created yet, create it now in * the HCA structure. It will be cleaned up in dapls_ib_cqd_destroy(). * * This is strictly internal to IB. DAPL allows a NULL DTO EVD handle, * but IB does not. So we create a CQ under the hood and make sure * an error is generated if the user every tries to post, by * setting the WQ length to 0 in ep_create and/or ep_modify. * * Returns * A valid CQ handle */ static ib_cq_handle_t dapli_get_dto_cq( IN DAPL_IA *ia_ptr, IN DAPL_EVD *evd_ptr) { dapl_evd_create_t create_msg; ib_cq_handle_t cq_handle; int ia_fd; int retval; mlnx_umap_cq_data_out_t *mcq; if (evd_ptr != DAT_HANDLE_NULL) { cq_handle = evd_ptr->ib_cq_handle; } else if (ia_ptr->hca_ptr->null_ib_cq_handle != IB_INVALID_HANDLE) { cq_handle = ia_ptr->hca_ptr->null_ib_cq_handle; } else { cq_handle = (ib_cq_handle_t) dapl_os_alloc(sizeof (struct dapls_ib_cq_handle)); if (cq_handle == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapli_get_dto_cq: cq malloc failed\n"); ia_ptr->hca_ptr->null_ib_cq_handle = IB_INVALID_HANDLE; return (IB_INVALID_HANDLE); } /* * create a fake a CQ, we don't bother to mmap this CQ * since nobody know about it to reap events from it. */ (void) dapl_os_memzero(&create_msg, sizeof (create_msg)); create_msg.evd_flags = DAT_EVD_DTO_FLAG; mcq = (mlnx_umap_cq_data_out_t *)create_msg.evd_cq_data_out; ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd; /* call into driver to allocate cq */ retval = ioctl(ia_fd, DAPL_EVD_CREATE, &create_msg); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapli_get_dto_cq: DAPL_EVD_CREATE failed\n"); dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle)); ia_ptr->hca_ptr->null_ib_cq_handle = IB_INVALID_HANDLE; return (IB_INVALID_HANDLE); } (void) dapl_os_memzero(cq_handle, sizeof (struct dapls_ib_cq_handle)); dapl_os_lock_init(&cq_handle->cq_wrid_wqhdr_lock); cq_handle->evd_hkey = create_msg.evd_hkey; cq_handle->cq_addr = NULL; cq_handle->cq_map_offset = mcq->mcq_mapoffset; cq_handle->cq_map_len = mcq->mcq_maplen; cq_handle->cq_num = mcq->mcq_cqnum; cq_handle->cq_size = create_msg.evd_cq_real_size; cq_handle->cq_cqesz = mcq->mcq_cqesz; cq_handle->cq_iauar = ia_ptr->hca_ptr->ib_hca_handle->ia_uar; dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "dapli_get_dto_cq: cq 0x%p created, hkey 0x%016llx\n", cq_handle, create_msg.evd_hkey); /* save this dummy CQ handle into the hca */ ia_ptr->hca_ptr->null_ib_cq_handle = cq_handle; } return (cq_handle); } /* * dapl_ib_qp_alloc * * Alloc a QP * * Input: * *ep_ptr pointer to EP INFO * ib_hca_handle provider HCA handle * ib_pd_handle provider protection domain handle * cq_recv provider recv CQ handle * cq_send provider send CQ handle * * Output: * none * * Returns: * DAT_SUCCESS * DAT_INSUFFICIENT_RESOURCES * */ DAT_RETURN dapls_ib_qp_alloc( IN DAPL_IA *ia_ptr, IN DAPL_EP *ep_ptr, IN DAPL_EP *ep_ctx_ptr) { dapl_ep_create_t ep_args; dapl_ep_free_t epf_args; ib_qp_handle_t qp_p; DAPL_SRQ *srq_p; ib_cq_handle_t cq_recv; ib_cq_handle_t cq_send; DAPL_PZ *pz_handle; DAPL_EVD *evd_handle; uint32_t mpt_mask; size_t premev_size; uint32_t i; int ia_fd; int hca_fd; DAT_RETURN dat_status; int retval; mlnx_umap_qp_data_out_t *mqp; /* check parameters */ if (ia_ptr->hca_ptr->ib_hca_handle == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: hca_handle == NULL\n"); return (DAT_INVALID_PARAMETER); } ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd; hca_fd = ia_ptr->hca_ptr->ib_hca_handle->hca_fd; dapl_os_assert(ep_ptr->param.pz_handle != NULL); dapl_os_assert(ep_ptr->param.connect_evd_handle != NULL); /* fill in args for ep_create */ (void) dapl_os_memzero(&ep_args, sizeof (ep_args)); mqp = (mlnx_umap_qp_data_out_t *)ep_args.ep_qp_data_out; pz_handle = (DAPL_PZ *)ep_ptr->param.pz_handle; ep_args.ep_pd_hkey = pz_handle->pd_handle->pd_hkey; cq_recv = dapli_get_dto_cq(ia_ptr, (DAPL_EVD *)ep_ptr->param.recv_evd_handle); ep_args.ep_rcv_evd_hkey = cq_recv->evd_hkey; cq_send = dapli_get_dto_cq(ia_ptr, (DAPL_EVD *)ep_ptr->param.request_evd_handle); ep_args.ep_snd_evd_hkey = cq_send->evd_hkey; evd_handle = (DAPL_EVD *)ep_ptr->param.connect_evd_handle; ep_args.ep_conn_evd_hkey = evd_handle->ib_cq_handle->evd_hkey; ep_args.ep_ch_sizes.dcs_sq = ep_ptr->param.ep_attr.max_request_dtos; ep_args.ep_ch_sizes.dcs_sq_sgl = ep_ptr->param.ep_attr.max_request_iov; qp_p = (ib_qp_handle_t)dapl_os_alloc( sizeof (struct dapls_ib_qp_handle)); if (qp_p == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: os_alloc failed\n"); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } (void) dapl_os_memzero(qp_p, sizeof (*qp_p)); if (ep_ptr->param.srq_handle == NULL) { premev_size = ep_ptr->param.ep_attr.max_recv_dtos * sizeof (ib_work_completion_t); if (premev_size != 0) { qp_p->qp_premature_events = (ib_work_completion_t *) dapl_os_alloc(premev_size); if (qp_p->qp_premature_events == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc:alloc premature_events failed\n"); dapl_os_free(qp_p, sizeof (*qp_p)); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } } qp_p->qp_num_premature_events = 0; ep_args.ep_srq_hkey = 0; ep_args.ep_srq_attached = 0; ep_args.ep_ch_sizes.dcs_rq = ep_ptr->param.ep_attr.max_recv_dtos; ep_args.ep_ch_sizes.dcs_rq_sgl = ep_ptr->param.ep_attr.max_recv_iov; } else { premev_size = 0; srq_p = (DAPL_SRQ *)ep_ptr->param.srq_handle; /* premature events for EPs with SRQ sit on the SRQ */ qp_p->qp_premature_events = srq_p->srq_handle-> srq_premature_events; qp_p->qp_num_premature_events = 0; ep_args.ep_srq_hkey = srq_p->srq_handle->srq_hkey; ep_args.ep_srq_attached = 1; ep_args.ep_ch_sizes.dcs_rq = 0; ep_args.ep_ch_sizes.dcs_rq_sgl = 0; } /* * there are cases when ep_ptr is a dummy container ep, and the orig * ep pointer is passed in ep_ctx_ptr. eg - dapl_ep_modify does this. * ep_cookie should be the actual ep pointer, not the dummy container * ep since the kernel returns this via events and the CM callback * routines */ ep_args.ep_cookie = (uintptr_t)ep_ctx_ptr; dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: ep_ptr 0x%p, pz 0x%p (0x%llx), rcv_evd 0x%p (0x%llx)\n" " snd_evd 0x%p (0x%llx), conn_evd 0x%p (0x%llx)\n" " srq_hdl 0x%p (0x%llx)\n" " sq_sz %d, rq_sz %d, sq_sgl_sz %d, rq_sgl_sz %d\n", ep_ptr, pz_handle, ep_args.ep_pd_hkey, ep_ptr->param.recv_evd_handle, ep_args.ep_rcv_evd_hkey, ep_ptr->param.request_evd_handle, ep_args.ep_snd_evd_hkey, ep_ptr->param.connect_evd_handle, ep_args.ep_conn_evd_hkey, ep_ptr->param.srq_handle, ep_args.ep_srq_hkey, ep_args.ep_ch_sizes.dcs_sq, ep_args.ep_ch_sizes.dcs_rq, ep_args.ep_ch_sizes.dcs_sq_sgl, ep_args.ep_ch_sizes.dcs_rq_sgl); /* The next line is only needed for backward compatibility */ mqp->mqp_rev = MLNX_UMAP_IF_VERSION; retval = ioctl(ia_fd, DAPL_EP_CREATE, &ep_args); if (retval != 0 || mqp->mqp_rev != MLNX_UMAP_IF_VERSION) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: ep_create failed errno %d, retval %d\n", errno, retval); if (premev_size != 0) { dapl_os_free(qp_p->qp_premature_events, premev_size); } dapl_os_free(qp_p, sizeof (*qp_p)); return (dapls_convert_error(errno, retval)); } /* In the case of Arbel or Hermon */ if (mqp->mqp_sdbr_mapoffset != 0 || mqp->mqp_sdbr_maplen != 0) qp_p->qp_sq_dbp = dapls_ib_get_dbp(mqp->mqp_sdbr_maplen, hca_fd, mqp->mqp_sdbr_mapoffset, mqp->mqp_sdbr_offset); if (mqp->mqp_rdbr_mapoffset != 0 || mqp->mqp_rdbr_maplen != 0) qp_p->qp_rq_dbp = dapls_ib_get_dbp(mqp->mqp_rdbr_maplen, hca_fd, mqp->mqp_rdbr_mapoffset, mqp->mqp_rdbr_offset); qp_p->qp_addr = mmap64((void *)0, mqp->mqp_maplen, (PROT_READ | PROT_WRITE), MAP_SHARED, hca_fd, mqp->mqp_mapoffset); if (qp_p->qp_addr == MAP_FAILED || qp_p->qp_sq_dbp == MAP_FAILED || qp_p->qp_rq_dbp == MAP_FAILED) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "qp_alloc: mmap failed(%d)\n", errno); epf_args.epf_hkey = ep_args.ep_hkey; retval = ioctl(ia_fd, DAPL_EP_FREE, &epf_args); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "qp_alloc: EP_FREE err:%d\n", errno); } if (premev_size != 0) { dapl_os_free(qp_p->qp_premature_events, premev_size); } dapl_os_free(qp_p, sizeof (*qp_p)); return (dapls_convert_error(errno, 0)); } qp_p->qp_map_len = mqp->mqp_maplen; qp_p->qp_num = mqp->mqp_qpnum; qp_p->qp_iauar = ia_ptr->hca_ptr->ib_hca_handle->ia_uar; qp_p->qp_ia_bf = ia_ptr->hca_ptr->ib_hca_handle->ia_bf; qp_p->qp_ia_bf_toggle = ia_ptr->hca_ptr->ib_hca_handle->ia_bf_toggle; evd_handle = (DAPL_EVD *)ep_ptr->param.request_evd_handle; qp_p->qp_sq_cqhdl = evd_handle->ib_cq_handle; qp_p->qp_sq_lastwqeaddr = NULL; qp_p->qp_sq_wqhdr = NULL; qp_p->qp_sq_buf = (caddr_t)(qp_p->qp_addr + mqp->mqp_sq_off); qp_p->qp_sq_desc_addr = mqp->mqp_sq_desc_addr; qp_p->qp_sq_numwqe = mqp->mqp_sq_numwqe; qp_p->qp_sq_wqesz = mqp->mqp_sq_wqesz; qp_p->qp_sq_sgl = ep_ptr->param.ep_attr.max_request_iov; qp_p->qp_sq_inline = ia_ptr->hca_ptr->max_inline_send; qp_p->qp_sq_headroom = mqp->mqp_sq_headroomwqes; evd_handle = (DAPL_EVD *)ep_ptr->param.recv_evd_handle; qp_p->qp_rq_cqhdl = evd_handle->ib_cq_handle; qp_p->qp_rq_lastwqeaddr = NULL; qp_p->qp_rq_wqhdr = NULL; qp_p->qp_rq_buf = (caddr_t)(qp_p->qp_addr + mqp->mqp_rq_off); qp_p->qp_rq_desc_addr = mqp->mqp_rq_desc_addr; qp_p->qp_rq_numwqe = mqp->mqp_rq_numwqe; qp_p->qp_rq_wqesz = mqp->mqp_rq_wqesz; qp_p->qp_rq_sgl = ep_ptr->param.ep_attr.max_recv_iov; dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: created, qp_sq_buf %p, qp_rq_buf %p\n", qp_p->qp_sq_buf, qp_p->qp_rq_buf); dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: created, sq numwqe %x wqesz %x, rq numwqe %x wqesz %x\n", qp_p->qp_sq_numwqe, qp_p->qp_sq_wqesz, qp_p->qp_rq_numwqe, qp_p->qp_rq_wqesz); dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: created, qp_sq_desc_addr %x, qp_rq_desc_addr %x\n", mqp->mqp_sq_desc_addr, mqp->mqp_rq_desc_addr); dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: created, ep_ptr 0x%p, ep_hkey 0x%016llx\n\n", ep_ptr, ep_args.ep_hkey); qp_p->ep_hkey = ep_args.ep_hkey; /* * Calculate the number of bits in max_rmrs - this is indirectly * the max number of entried in the MPT table (defaults to 512K * but is configurable). This value is used while creating new * rkeys in bind processing (see dapl_tavor_hw.c). * Stash this value in the qp handle, don't want to do this math * for every bind */ mpt_mask = (uint32_t)ia_ptr->hca_ptr->ia_attr.max_rmrs - 1; for (i = 0; mpt_mask > 0; mpt_mask = (mpt_mask >> 1), i++) ; qp_p->qp_num_mpt_shift = (uint32_t)i; ep_ptr->qpn = qp_p->qp_num; /* update the qp handle in the ep ptr */ ep_ptr->qp_handle = qp_p; /* * ibt_alloc_rc_channel transitions the qp state to INIT. * hence we directly transition from UNATTACHED to INIT */ ep_ptr->qp_state = IBT_STATE_INIT; if (ep_ptr->param.srq_handle) { /* insert ep into the SRQ's ep_table */ dat_status = dapli_ib_srq_add_ep(srq_p->srq_handle, qp_p->qp_num, ep_ptr); if (dat_status != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_alloc: srq_add_ep failed ep_ptr 0x%p, 0x%x\n", ep_ptr, dat_status); (void) dapls_ib_qp_free(ia_ptr, ep_ptr); return (DAT_INVALID_PARAMETER); } qp_p->qp_srq_enabled = 1; qp_p->qp_srq = srq_p->srq_handle; } else { qp_p->qp_srq_enabled = 0; qp_p->qp_srq = NULL; } DAPL_INIT_QP(ia_ptr)(qp_p); if (dapls_tavor_wrid_init(qp_p) != DAT_SUCCESS) { (void) dapls_ib_qp_free(ia_ptr, ep_ptr); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } return (DAT_SUCCESS); } /* * dapls_ib_qp_free * * Free a QP * * Input: * *ep_ptr pointer to EP INFO * ib_hca_handle provider HCA handle * * Output: * none * * Returns: * none * */ DAT_RETURN dapls_ib_qp_free(IN DAPL_IA *ia_ptr, IN DAPL_EP *ep_ptr) { ib_qp_handle_t qp_p = ep_ptr->qp_handle; ib_hca_handle_t ib_hca_handle = ia_ptr->hca_ptr->ib_hca_handle; dapl_ep_free_t args; int retval; if ((ep_ptr->qp_handle != IB_INVALID_HANDLE) && (ep_ptr->qp_state != DAPL_QP_STATE_UNATTACHED)) { if (munmap((void *)qp_p->qp_addr, qp_p->qp_map_len) < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "qp_free: munmap failed(%d)\n", errno); } args.epf_hkey = qp_p->ep_hkey; retval = ioctl(ib_hca_handle->ia_fd, DAPL_EP_FREE, &args); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_free: ioctl errno = %d, retval = %d\n", errno, retval); } dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_free: freed, ep_ptr 0x%p, ep_hkey 0x%016llx\n", ep_ptr, qp_p->ep_hkey); if (qp_p->qp_srq) { dapli_ib_srq_remove_ep(qp_p->qp_srq, qp_p->qp_num); } else { if (qp_p->qp_premature_events) { dapl_os_free(qp_p->qp_premature_events, ep_ptr->param.ep_attr.max_recv_dtos * sizeof (ib_work_completion_t)); } } dapl_os_free(qp_p, sizeof (*qp_p)); ep_ptr->qp_handle = NULL; } return (DAT_SUCCESS); } /* * dapl_ib_qp_modify * * Set the QP to the parameters specified in an EP_PARAM * * We can't be sure what state the QP is in so we first obtain the state * from the driver. The EP_PARAM structure that is provided has been * sanitized such that only non-zero values are valid. * * Input: * ib_hca_handle HCA handle * qp_handle QP handle * ep_attr Sanitized EP Params * * Output: * none * * Returns: * DAT_SUCCESS * DAT_INSUFFICIENT_RESOURCES * DAT_INVALID_PARAMETER * */ DAT_RETURN dapls_ib_qp_modify(IN DAPL_IA *ia_ptr, IN DAPL_EP *ep_ptr, IN DAT_EP_ATTR *ep_attr) { dapl_ep_modify_t epm_args; boolean_t epm_needed; int ia_fd; int retval; if (ep_ptr->qp_handle == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_modify: qp_handle == NULL\n"); return (DAT_INVALID_PARAMETER); } if (ia_ptr->hca_ptr->ib_hca_handle == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_modify: hca_handle == NULL\n"); return (DAT_INVALID_PARAMETER); } epm_needed = B_FALSE; /* * NOTE: ep_attr->max_mtu_size indicates the maximum message * size, which is always 2GB for IB. Nothing to do with the IB * implementation, nothing to set up. */ if (ep_attr->max_rdma_size > 0) { if (ep_attr->max_rdma_size > DAPL_IB_MAX_MESSAGE_SIZE) { return (DAT_ERROR(DAT_INVALID_PARAMETER, 0)); } } (void) memset((void *)&epm_args, 0, sizeof (epm_args)); /* * The following parameters are dealt by creating a new qp * in dapl_ep_modify. * - max_recv_dtos * - max_request_dtos * - max_recv_iov * - max_request_iov */ if (ep_attr->max_rdma_read_in > 0) { epm_args.epm_flags |= IBT_CEP_SET_RDMARA_IN; epm_args.epm_rdma_ra_in = ep_attr->max_rdma_read_in; epm_needed = B_TRUE; } if (ep_attr->max_rdma_read_out > 0) { epm_args.epm_flags |= IBT_CEP_SET_RDMARA_OUT; epm_args.epm_rdma_ra_out = ep_attr->max_rdma_read_out; epm_needed = B_TRUE; } if (!epm_needed) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_modify: ep_hkey = %016llx nothing to do\n", ep_ptr->qp_handle->ep_hkey); return (DAT_SUCCESS); } epm_args.epm_hkey = ep_ptr->qp_handle->ep_hkey; ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd; retval = ioctl(ia_fd, DAPL_EP_MODIFY, &epm_args); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_modify: ioctl failed errno %d, retval %d\n", errno, retval); return (dapls_convert_error(errno, retval)); } dapl_dbg_log(DAPL_DBG_TYPE_EP, "qp_modify: ep_hkey = %016llx\n", ep_ptr->qp_handle->ep_hkey); return (DAT_SUCCESS); } /* * Allocate the srq data structure as well as the kernel resource * corresponding to it. */ DAT_RETURN dapls_ib_srq_alloc(IN DAPL_IA *ia_ptr, IN DAPL_SRQ *srq_ptr) { dapl_srq_create_t srqc_args; dapl_srq_free_t srqf_args; ib_srq_handle_t ibsrq_p; DAPL_PZ *pz_handle; uint32_t i; size_t premev_size; size_t freeev_size; int ia_fd; int hca_fd; int retval; mlnx_umap_srq_data_out_t *msrq; /* check parameters */ if (ia_ptr->hca_ptr->ib_hca_handle == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: hca_handle == NULL\n"); return (DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG1)); } ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd; hca_fd = ia_ptr->hca_ptr->ib_hca_handle->hca_fd; dapl_os_assert(srq_ptr->param.pz_handle != NULL); /* fill in args for srq_create */ pz_handle = (DAPL_PZ *)srq_ptr->param.pz_handle; ibsrq_p = (ib_srq_handle_t)dapl_os_alloc(sizeof (*ibsrq_p)); if (ibsrq_p == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: os_alloc failed\n"); return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } (void) dapl_os_memzero(ibsrq_p, sizeof (*ibsrq_p)); (void) dapl_os_memzero(&srqc_args, sizeof (srqc_args)); msrq = (mlnx_umap_srq_data_out_t *)srqc_args.srqc_data_out; srqc_args.srqc_pd_hkey = pz_handle->pd_handle->pd_hkey; srqc_args.srqc_sizes.srqs_sz = srq_ptr->param.max_recv_dtos; srqc_args.srqc_sizes.srqs_sgl = srq_ptr->param.max_recv_iov; dapl_dbg_log(DAPL_DBG_TYPE_EP, "srq_alloc: srq_ptr 0x%p, pz 0x%p (0x%llx), srq_sz %d" " srq_sgl %d\n", srq_ptr, pz_handle, srqc_args.srqc_pd_hkey, srqc_args.srqc_sizes.srqs_sz, srqc_args.srqc_sizes.srqs_sgl); /* The next line is only needed for backward compatibility */ msrq->msrq_rev = MLNX_UMAP_IF_VERSION; retval = ioctl(ia_fd, DAPL_SRQ_CREATE, &srqc_args); if (retval != 0 || msrq->msrq_rev != MLNX_UMAP_IF_VERSION) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: srq_create failed errno %d, retval %d\n", errno, retval); dapl_os_free(ibsrq_p, sizeof (*ibsrq_p)); return (dapls_convert_error(errno, retval)); } /* In the case of Arbel or Hermon */ if (msrq->msrq_rdbr_mapoffset != 0 || msrq->msrq_rdbr_maplen != 0) ibsrq_p->srq_dbp = dapls_ib_get_dbp( msrq->msrq_rdbr_maplen, hca_fd, msrq->msrq_rdbr_mapoffset, msrq->msrq_rdbr_offset); ibsrq_p->srq_addr = mmap64((void *)0, msrq->msrq_maplen, (PROT_READ | PROT_WRITE), MAP_SHARED, hca_fd, msrq->msrq_mapoffset); if (ibsrq_p->srq_addr == MAP_FAILED || ibsrq_p->srq_dbp == MAP_FAILED) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: mmap failed(%d)\n", errno); srqf_args.srqf_hkey = srqc_args.srqc_hkey; retval = ioctl(ia_fd, DAPL_SRQ_FREE, &srqf_args); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: SRQ_FREE err:%d\n", errno); } dapl_os_free(ibsrq_p, sizeof (*ibsrq_p)); return (dapls_convert_error(errno, 0)); } ibsrq_p->srq_hkey = srqc_args.srqc_hkey; ibsrq_p->srq_map_len = msrq->msrq_maplen; ibsrq_p->srq_map_offset = msrq->msrq_mapoffset; ibsrq_p->srq_num = msrq->msrq_srqnum; ibsrq_p->srq_iauar = ia_ptr->hca_ptr->ib_hca_handle->ia_uar; /* since 0 is a valid index, -1 indicates invalid value */ ibsrq_p->srq_wq_lastwqeindex = -1; ibsrq_p->srq_wq_desc_addr = msrq->msrq_desc_addr; ibsrq_p->srq_wq_numwqe = msrq->msrq_numwqe; ibsrq_p->srq_wq_wqesz = msrq->msrq_wqesz; ibsrq_p->srq_wq_sgl = srqc_args.srqc_real_sizes.srqs_sgl; /* * update the srq handle in the srq ptr, this is needed since from * here on cleanup is done by calling dapls_ib_srq_free() */ srq_ptr->srq_handle = ibsrq_p; premev_size = ibsrq_p->srq_wq_numwqe * sizeof (ib_work_completion_t); ibsrq_p->srq_premature_events = (ib_work_completion_t *) dapl_os_alloc(premev_size); if (ibsrq_p->srq_premature_events == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: os_alloc premature_events failed\n"); dapls_ib_srq_free(ia_ptr, srq_ptr); srq_ptr->srq_handle = NULL; return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } freeev_size = ibsrq_p->srq_wq_numwqe * sizeof (uint32_t); ibsrq_p->srq_freepr_events = (uint32_t *)dapl_os_alloc(freeev_size); if (ibsrq_p->srq_freepr_events == NULL) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: os_alloc freepr_events failed\n"); dapls_ib_srq_free(ia_ptr, srq_ptr); srq_ptr->srq_handle = NULL; return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } ibsrq_p->srq_freepr_head = 0; ibsrq_p->srq_freepr_tail = 0; ibsrq_p->srq_freepr_num_events = ibsrq_p->srq_wq_numwqe; /* initialize the free list of premature events */ for (i = 0; i < ibsrq_p->srq_freepr_num_events; i++) { ibsrq_p->srq_freepr_events[i] = i; /* * wc_res_hash field is used to mark entries in the premature * events list */ DAPL_SET_CQE_INVALID(&(ibsrq_p->srq_premature_events[i])); } dapl_dbg_log(DAPL_DBG_TYPE_EP, "srq_alloc: created, srq_ptr 0x%p, srq_hkey 0x%016llx\n", srq_ptr, srqc_args.srqc_hkey); DAPL_INIT_SRQ(ia_ptr)(ibsrq_p); if (dapls_tavor_srq_wrid_init(ibsrq_p) != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_alloc: wridlist alloc failed\n"); dapls_ib_srq_free(ia_ptr, srq_ptr); srq_ptr->srq_handle = NULL; return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } ibsrq_p->srq_ep_table = NULL; /* allocate a hash table to to store EPs */ retval = dapls_hash_create(DAPL_HASH_TABLE_DEFAULT_CAPACITY, DAT_FALSE, &ibsrq_p->srq_ep_table); if (retval != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapls_ib_srq_alloc hash " "create failed %d\n", retval); dapls_ib_srq_free(ia_ptr, srq_ptr); srq_ptr->srq_handle = NULL; return (retval); } return (DAT_SUCCESS); } /* * SRQ Free routine */ void dapls_ib_srq_free(IN DAPL_IA *ia_handle, IN DAPL_SRQ *srq_ptr) { ib_srq_handle_t srq_handle = srq_ptr->srq_handle; ib_hca_handle_t ib_hca_handle = ia_handle->hca_ptr->ib_hca_handle; dapl_srq_free_t srqf_args; int retval; if (srq_handle == IB_INVALID_HANDLE) { return; /* nothing to do */ } if (munmap((void *)srq_handle->srq_addr, srq_handle->srq_map_len) < 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_free: munmap failed(%d)\n", errno); } srqf_args.srqf_hkey = srq_handle->srq_hkey; retval = ioctl(ib_hca_handle->ia_fd, DAPL_SRQ_FREE, &srqf_args); if (retval != 0) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_free: ioctl errno = %d, retval = %d\n", errno, retval); } dapl_dbg_log(DAPL_DBG_TYPE_EP, "srq_free: freed, srq_ptr 0x%p, srq_hkey 0x%016llx\n", srq_ptr, srq_handle->srq_hkey); if (srq_handle->srq_ep_table) { (void) dapls_hash_free(srq_handle->srq_ep_table); } if (srq_handle->srq_wridlist) { dapls_tavor_srq_wrid_free(srq_handle); } if (srq_handle->srq_freepr_events) { dapl_os_free(srq_handle->srq_freepr_events, srq_handle->srq_wq_numwqe * sizeof (ib_work_completion_t)); } if (srq_handle->srq_premature_events) { dapl_os_free(srq_handle->srq_premature_events, srq_handle->srq_wq_numwqe * sizeof (uint32_t)); } dapl_os_free(srq_handle, sizeof (*srq_handle)); srq_ptr->srq_handle = NULL; } /* * Adds EP to a hashtable in SRQ */ static DAT_RETURN dapli_ib_srq_add_ep(IN ib_srq_handle_t srq_ptr, IN uint32_t qp_num, IN DAPL_EP *ep_ptr) { DAPL_HASH_TABLE *htable; DAPL_HASH_KEY key; dapl_os_assert(srq_ptr); htable = srq_ptr->srq_ep_table; key = qp_num; dapl_dbg_log(DAPL_DBG_TYPE_EP, "srq_insert_ep:%p %p %llx\n", srq_ptr, htable, key); return (dapls_hash_insert(htable, key, ep_ptr)); } /* * Removes an EP from the hashtable in SRQ */ static void dapli_ib_srq_remove_ep(IN ib_srq_handle_t srq_ptr, IN uint32_t qp_num) { DAPL_HASH_TABLE *htable; DAPL_HASH_KEY key; DAPL_EP *epp; DAT_RETURN retval; dapl_os_assert(srq_ptr); htable = srq_ptr->srq_ep_table; key = qp_num; retval = dapls_hash_remove(htable, key, (DAPL_HASH_DATA *)&epp); if (retval != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "srq_remove_ep(%d): %p %llx\n", retval, htable, key); } } /* * Lookup an EP from the hashtable in SRQ */ DAPL_EP * dapls_ib_srq_lookup_ep(IN DAPL_SRQ *srq_ptr, IN ib_work_completion_t *cqe_ptr) { DAPL_HASH_TABLE *htable; DAPL_HASH_KEY key; DAPL_EP *epp; DAT_RETURN retval; dapl_os_assert(srq_ptr && srq_ptr->srq_handle); htable = srq_ptr->srq_handle->srq_ep_table; key = DAPL_GET_CQE_QPN(cqe_ptr); epp = NULL; retval = dapls_hash_search(htable, key, (DAPL_HASH_DATA *)&epp); if (retval != DAT_SUCCESS) { dapl_dbg_log(DAPL_DBG_TYPE_EP, "srq_lookup_ep(%x): %p %llx\n", retval, htable, key); } return (epp); } /* * dapl_ib_srq_resize * * Resize an SRQ * * Input: * srq_ptr pointer to SRQ struct * srqlen new length of the SRQ * Output: * none * * Returns: * DAT_SUCCESS * DAT_INVALID_HANDLE * DAT_INTERNAL_ERROR * DAT_INSUFFICIENT_RESOURCES * */ DAT_RETURN dapls_ib_srq_resize( IN DAPL_SRQ *srq_ptr, IN DAT_COUNT srqlen) { ib_srq_handle_t srq_handle; DAT_RETURN dat_status; dat_status = dapli_ib_srq_resize_internal(srq_ptr, srqlen); if (DAT_INSUFFICIENT_RESOURCES == DAT_GET_TYPE(dat_status)) { srq_handle = srq_ptr->srq_handle; /* attempt to resize back to the current size */ dat_status = dapli_ib_srq_resize_internal(srq_ptr, srq_handle->srq_wq_numwqe); if (DAT_SUCCESS != dat_status) { /* * XXX this is catastrophic need to post an event * to the async evd */ return (DAT_INTERNAL_ERROR); } } return (dat_status); } /* * dapli_ib_srq_resize_internal * * An internal routine to resize a SRQ. * * Input: * srq_ptr pointer to SRQ struct * srqlen new length of the srq * Output: * none * * Returns: * DAT_SUCCESS * DAT_INVALID_HANDLE * DAT_INSUFFICIENT_RESOURCES * */ static DAT_RETURN dapli_ib_srq_resize_internal( IN DAPL_SRQ *srq_ptr, IN DAT_COUNT srqlen) { ib_srq_handle_t srq_handle; dapl_srq_resize_t resize_msg; int ia_fd; int hca_fd; ib_work_completion_t *new_premature_events; ib_work_completion_t *old_premature_events; uint32_t *new_freepr_events; uint32_t *old_freepr_events; size_t old_premature_size; size_t old_freepr_size; size_t new_premature_size; size_t new_freepr_size; int idx, i; int retval; mlnx_umap_srq_data_out_t *msrq; dapl_dbg_log(DAPL_DBG_TYPE_EP, "dapls_ib_srq_resize: srq 0x%p srq_hdl 0x%p " "srq_hkey 0x%016llx srqlen %d\n", srq_ptr, (void *)srq_ptr->srq_handle, srq_ptr->srq_handle->srq_hkey, srqlen); srq_handle = srq_ptr->srq_handle; /* * Since SRQs are created in powers of 2 its possible that the * previously allocated SRQ has sufficient entries. If the current * SRQ is big enough and it is mapped we are done. */ if ((srqlen < srq_handle->srq_wq_numwqe) && (srq_handle->srq_addr)) { return (DAT_SUCCESS); } /* unmap the SRQ before resizing it */ if ((srq_handle->srq_addr) && (munmap((char *)srq_handle->srq_addr, srq_handle->srq_map_len) < 0)) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_resize: munmap(%p:0x%llx) failed(%d)\n", srq_handle->srq_addr, srq_handle->srq_map_len, errno); return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_SRQ)); } /* srq_addr is unmapped and no longer valid */ srq_handle->srq_addr = NULL; ia_fd = srq_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd; hca_fd = srq_ptr->header.owner_ia->hca_ptr->ib_hca_handle->hca_fd; (void) dapl_os_memzero(&resize_msg, sizeof (resize_msg)); resize_msg.srqr_hkey = srq_handle->srq_hkey; resize_msg.srqr_new_size = srqlen; msrq = (mlnx_umap_srq_data_out_t *)resize_msg.srqr_data_out; /* The next line is only needed for backward compatibility */ msrq->msrq_rev = MLNX_UMAP_IF_VERSION; retval = ioctl(ia_fd, DAPL_SRQ_RESIZE, &resize_msg); if (retval != 0 || msrq->msrq_rev != MLNX_UMAP_IF_VERSION) { dapl_dbg_log(DAPL_DBG_TYPE_ERR, "dapls_ib_srq_resize: srq 0x%p, err: %s\n", srq_ptr, strerror(errno)); if (errno == EINVAL) { /* Couldn't find this srq */ return (DAT_ERROR(DAT_INVALID_HANDLE, DAT_INVALID_HANDLE_SRQ)); } else { /* Need to retry resize with a smaller qlen */ return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_SRQ)); } } dapl_os_assert(srq_handle->srq_num == msrq->msrq_srqnum); /* In the case of Arbel or Hermon */ if (msrq->msrq_rdbr_mapoffset != 0 || msrq->msrq_rdbr_maplen != 0) srq_handle->srq_dbp = dapls_ib_get_dbp( msrq->msrq_rdbr_maplen, hca_fd, msrq->msrq_rdbr_mapoffset, msrq->msrq_rdbr_offset); srq_handle->srq_addr = mmap64((void *)0, msrq->msrq_maplen, (PROT_READ | PROT_WRITE), MAP_SHARED, hca_fd, msrq->msrq_mapoffset); if (srq_handle->srq_addr == MAP_FAILED || srq_handle->srq_dbp == MAP_FAILED) { srq_handle->srq_addr = NULL; dapl_dbg_log(DAPL_DBG_TYPE_ERR, "srq_resize: mmap failed(%d)\n", errno); /* Need to retry resize with a smaller qlen */ return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); } old_freepr_size = srq_handle->srq_wq_numwqe * sizeof (uint32_t); old_premature_size = srq_handle->srq_wq_numwqe * sizeof (ib_work_completion_t); old_freepr_events = srq_handle->srq_freepr_events; old_premature_events = srq_handle->srq_premature_events; new_freepr_size = resize_msg.srqr_real_size * sizeof (uint32_t); new_premature_size = resize_msg.srqr_real_size * sizeof (ib_work_completion_t); new_freepr_events = (uint32_t *)dapl_os_alloc(new_freepr_size); if (new_freepr_events == NULL) { goto bail; } new_premature_events = (ib_work_completion_t *)dapl_os_alloc( new_premature_size); if (new_premature_events == NULL) { goto bail; } if (!dapls_tavor_srq_wrid_resize(srq_handle, resize_msg.srqr_real_size)) { goto bail; } idx = 0; /* copy valid premature events */ for (i = 0; i < srq_handle->srq_wq_numwqe; i++) { if (!DAPL_CQE_IS_VALID(&old_premature_events[i])) { continue; } (void) dapl_os_memcpy(&new_premature_events[idx], &old_premature_events[i], sizeof (ib_work_completion_t)); idx++; } dapl_os_assert(srq_handle->srq_wq_numwqe - idx == srq_handle->srq_freepr_num_events); /* Initialize free events lists */ for (i = 0; i < resize_msg.srqr_real_size - idx; i++) { new_freepr_events[i] = idx + i; } srq_handle->srq_freepr_events = new_freepr_events; srq_handle->srq_premature_events = new_premature_events; srq_handle->srq_freepr_num_events = resize_msg.srqr_real_size - idx; srq_handle->srq_freepr_head = 0; /* a full freepr list has tail at 0 */ if (idx == 0) { srq_handle->srq_freepr_tail = 0; } else { srq_handle->srq_freepr_tail = srq_handle->srq_freepr_num_events; } if (old_freepr_events) { old_freepr_size = old_freepr_size; /* pacify lint */ dapl_os_free(old_freepr_events, old_freepr_size); } if (old_premature_events) { old_premature_size = old_premature_size; /* pacify lint */ dapl_os_free(old_premature_events, old_premature_size); } /* * update the srq fields, * note: the srq_wq_lastwqeindex doesn't change since the old * work queue is copied as a whole into the new work queue. */ srq_handle->srq_map_offset = msrq->msrq_mapoffset; srq_handle->srq_map_len = msrq->msrq_maplen; srq_handle->srq_wq_desc_addr = msrq->msrq_desc_addr; srq_handle->srq_wq_numwqe = msrq->msrq_numwqe; srq_handle->srq_wq_wqesz = msrq->msrq_wqesz; return (DAT_SUCCESS); bail: if (new_freepr_events) { dapl_os_free(new_freepr_events, new_freepr_size); } if (new_premature_events) { dapl_os_free(new_premature_events, new_premature_size); } return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY)); }