/* * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "mlx5.h" #include "mlx5-abi.h" #include "wqe.h" int mlx5_single_threaded = 0; static inline int is_xrc_tgt(int type) { return type == IBV_QPT_XRC_RECV; } int mlx5_query_device(struct ibv_context *context, struct ibv_device_attr *attr) { struct ibv_query_device cmd; uint64_t raw_fw_ver; unsigned major, minor, sub_minor; int ret; ret = ibv_cmd_query_device(context, attr, &raw_fw_ver, &cmd, sizeof cmd); if (ret) return ret; major = (raw_fw_ver >> 32) & 0xffff; minor = (raw_fw_ver >> 16) & 0xffff; sub_minor = raw_fw_ver & 0xffff; snprintf(attr->fw_ver, sizeof attr->fw_ver, "%d.%d.%04d", major, minor, sub_minor); return 0; } #define READL(ptr) (*((uint32_t *)(ptr))) static int mlx5_read_clock(struct ibv_context *context, uint64_t *cycles) { unsigned int clockhi, clocklo, clockhi1; int i; struct mlx5_context *ctx = to_mctx(context); if (!ctx->hca_core_clock) return -EOPNOTSUPP; /* Handle wraparound */ for (i = 0; i < 2; i++) { clockhi = be32toh(READL(ctx->hca_core_clock)); clocklo = be32toh(READL(ctx->hca_core_clock + 4)); clockhi1 = be32toh(READL(ctx->hca_core_clock)); if (clockhi == clockhi1) break; } *cycles = (uint64_t)clockhi << 32 | (uint64_t)clocklo; return 0; } int mlx5_query_rt_values(struct ibv_context *context, struct ibv_values_ex *values) { uint32_t comp_mask = 0; int err = 0; if (values->comp_mask & IBV_VALUES_MASK_RAW_CLOCK) { uint64_t cycles; err = mlx5_read_clock(context, &cycles); if (!err) { values->raw_clock.tv_sec = 0; values->raw_clock.tv_nsec = cycles; comp_mask |= IBV_VALUES_MASK_RAW_CLOCK; } } values->comp_mask = comp_mask; return err; } int mlx5_query_port(struct ibv_context *context, uint8_t port, struct ibv_port_attr *attr) { struct ibv_query_port cmd; return ibv_cmd_query_port(context, port, attr, &cmd, sizeof cmd); } struct ibv_pd *mlx5_alloc_pd(struct ibv_context *context) { struct ibv_alloc_pd cmd; struct mlx5_alloc_pd_resp resp; struct mlx5_pd *pd; pd = calloc(1, sizeof *pd); if (!pd) return NULL; if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof cmd, &resp.ibv_resp, sizeof resp)) { free(pd); return NULL; } pd->pdn = resp.pdn; return &pd->ibv_pd; } int mlx5_free_pd(struct ibv_pd *pd) { int ret; ret = ibv_cmd_dealloc_pd(pd); if (ret) return ret; free(to_mpd(pd)); return 0; } struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length, int acc) { struct mlx5_mr *mr; struct ibv_reg_mr cmd; int ret; enum ibv_access_flags access = (enum ibv_access_flags)acc; struct ibv_reg_mr_resp resp; mr = calloc(1, sizeof(*mr)); if (!mr) return NULL; ret = ibv_cmd_reg_mr(pd, addr, length, (uintptr_t)addr, access, &(mr->ibv_mr), &cmd, sizeof(cmd), &resp, sizeof resp); if (ret) { mlx5_free_buf(&(mr->buf)); free(mr); return NULL; } mr->alloc_flags = acc; return &mr->ibv_mr; } int mlx5_rereg_mr(struct ibv_mr *ibmr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access) { struct ibv_rereg_mr cmd; struct ibv_rereg_mr_resp resp; if (flags & IBV_REREG_MR_KEEP_VALID) return ENOTSUP; return ibv_cmd_rereg_mr(ibmr, flags, addr, length, (uintptr_t)addr, access, pd, &cmd, sizeof(cmd), &resp, sizeof(resp)); } int mlx5_dereg_mr(struct ibv_mr *ibmr) { int ret; struct mlx5_mr *mr = to_mmr(ibmr); ret = ibv_cmd_dereg_mr(ibmr); if (ret) return ret; free(mr); return 0; } struct ibv_mw *mlx5_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type) { struct ibv_mw *mw; struct ibv_alloc_mw cmd; struct ibv_alloc_mw_resp resp; int ret; mw = malloc(sizeof(*mw)); if (!mw) return NULL; memset(mw, 0, sizeof(*mw)); ret = ibv_cmd_alloc_mw(pd, type, mw, &cmd, sizeof(cmd), &resp, sizeof(resp)); if (ret) { free(mw); return NULL; } return mw; } int mlx5_dealloc_mw(struct ibv_mw *mw) { int ret; struct ibv_dealloc_mw cmd; ret = ibv_cmd_dealloc_mw(mw, &cmd, sizeof(cmd)); if (ret) return ret; free(mw); return 0; } int mlx5_round_up_power_of_two(long long sz) { long long ret; for (ret = 1; ret < sz; ret <<= 1) ; /* nothing */ if (ret > INT_MAX) { fprintf(stderr, "%s: roundup overflow\n", __func__); return -ENOMEM; } return (int)ret; } static int align_queue_size(long long req) { return mlx5_round_up_power_of_two(req); } static int get_cqe_size(void) { char *env; int size = 64; env = getenv("MLX5_CQE_SIZE"); if (env) size = atoi(env); switch (size) { case 64: case 128: return size; default: return -EINVAL; } } static int use_scatter_to_cqe(void) { char *env; env = getenv("MLX5_SCATTER_TO_CQE"); if (env && !strcmp(env, "0")) return 0; return 1; } static int srq_sig_enabled(void) { char *env; env = getenv("MLX5_SRQ_SIGNATURE"); if (env) return 1; return 0; } static int qp_sig_enabled(void) { char *env; env = getenv("MLX5_QP_SIGNATURE"); if (env) return 1; return 0; } enum { CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | IBV_WC_EX_WITH_COMPLETION_TIMESTAMP | IBV_WC_EX_WITH_CVLAN | IBV_WC_EX_WITH_FLOW_TAG }; enum { CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS }; enum { CREATE_CQ_SUPPORTED_FLAGS = IBV_CREATE_CQ_ATTR_SINGLE_THREADED }; static struct ibv_cq_ex *create_cq(struct ibv_context *context, const struct ibv_cq_init_attr_ex *cq_attr, int cq_alloc_flags, struct mlx5dv_cq_init_attr *mlx5cq_attr) { struct mlx5_create_cq cmd; struct mlx5_create_cq_resp resp; struct mlx5_cq *cq; int cqe_sz; int ret; int ncqe; struct mlx5_context *mctx = to_mctx(context); FILE *fp = to_mctx(context)->dbg_fp; if (!cq_attr->cqe) { mlx5_dbg(fp, MLX5_DBG_CQ, "CQE invalid\n"); errno = EINVAL; return NULL; } if (cq_attr->comp_mask & ~CREATE_CQ_SUPPORTED_COMP_MASK) { mlx5_dbg(fp, MLX5_DBG_CQ, "Unsupported comp_mask for create_cq\n"); errno = EINVAL; return NULL; } if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && cq_attr->flags & ~CREATE_CQ_SUPPORTED_FLAGS) { mlx5_dbg(fp, MLX5_DBG_CQ, "Unsupported creation flags requested for create_cq\n"); errno = EINVAL; return NULL; } if (cq_attr->wc_flags & ~CREATE_CQ_SUPPORTED_WC_FLAGS) { mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); errno = ENOTSUP; return NULL; } cq = calloc(1, sizeof *cq); if (!cq) { mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); return NULL; } memset(&cmd, 0, sizeof cmd); cq->cons_index = 0; if (mlx5_spinlock_init(&cq->lock)) goto err; ncqe = align_queue_size(cq_attr->cqe + 1); if ((ncqe > (1 << 24)) || (ncqe < (cq_attr->cqe + 1))) { mlx5_dbg(fp, MLX5_DBG_CQ, "ncqe %d\n", ncqe); errno = EINVAL; goto err_spl; } cqe_sz = get_cqe_size(); if (cqe_sz < 0) { mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); errno = -cqe_sz; goto err_spl; } if (mlx5_alloc_cq_buf(to_mctx(context), cq, &cq->buf_a, ncqe, cqe_sz)) { mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); goto err_spl; } cq->dbrec = mlx5_alloc_dbrec(to_mctx(context)); if (!cq->dbrec) { mlx5_dbg(fp, MLX5_DBG_CQ, "\n"); goto err_buf; } cq->dbrec[MLX5_CQ_SET_CI] = 0; cq->dbrec[MLX5_CQ_ARM_DB] = 0; cq->arm_sn = 0; cq->cqe_sz = cqe_sz; cq->flags = cq_alloc_flags; if (cq_attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_FLAGS && cq_attr->flags & IBV_CREATE_CQ_ATTR_SINGLE_THREADED) cq->flags |= MLX5_CQ_FLAGS_SINGLE_THREADED; cmd.buf_addr = (uintptr_t) cq->buf_a.buf; cmd.db_addr = (uintptr_t) cq->dbrec; cmd.cqe_size = cqe_sz; if (mlx5cq_attr) { if (mlx5cq_attr->comp_mask & ~(MLX5DV_CQ_INIT_ATTR_MASK_RESERVED - 1)) { mlx5_dbg(fp, MLX5_DBG_CQ, "Unsupported vendor comp_mask for create_cq\n"); errno = EINVAL; goto err_db; } if (mlx5cq_attr->comp_mask & MLX5DV_CQ_INIT_ATTR_MASK_COMPRESSED_CQE) { if (mctx->cqe_comp_caps.max_num && (mlx5cq_attr->cqe_comp_res_format & mctx->cqe_comp_caps.supported_format)) { cmd.cqe_comp_en = 1; cmd.cqe_comp_res_format = mlx5cq_attr->cqe_comp_res_format; } else { mlx5_dbg(fp, MLX5_DBG_CQ, "CQE Compression is not supported\n"); errno = EINVAL; goto err_db; } } } ret = ibv_cmd_create_cq(context, ncqe - 1, cq_attr->channel, cq_attr->comp_vector, ibv_cq_ex_to_cq(&cq->ibv_cq), &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (ret) { mlx5_dbg(fp, MLX5_DBG_CQ, "ret %d\n", ret); goto err_db; } cq->active_buf = &cq->buf_a; cq->resize_buf = NULL; cq->cqn = resp.cqn; cq->stall_enable = to_mctx(context)->stall_enable; cq->stall_adaptive_enable = to_mctx(context)->stall_adaptive_enable; cq->stall_cycles = to_mctx(context)->stall_cycles; if (cq_alloc_flags & MLX5_CQ_FLAGS_EXTENDED) mlx5_cq_fill_pfns(cq, cq_attr); return &cq->ibv_cq; err_db: mlx5_free_db(to_mctx(context), cq->dbrec); err_buf: mlx5_free_cq_buf(to_mctx(context), &cq->buf_a); err_spl: mlx5_spinlock_destroy(&cq->lock); err: free(cq); return NULL; } struct ibv_cq *mlx5_create_cq(struct ibv_context *context, int cqe, struct ibv_comp_channel *channel, int comp_vector) { struct ibv_cq_ex *cq; struct ibv_cq_init_attr_ex cq_attr = {.cqe = cqe, .channel = channel, .comp_vector = comp_vector, .wc_flags = IBV_WC_STANDARD_FLAGS}; if (cqe <= 0) { errno = EINVAL; return NULL; } cq = create_cq(context, &cq_attr, 0, NULL); return cq ? ibv_cq_ex_to_cq(cq) : NULL; } struct ibv_cq_ex *mlx5_create_cq_ex(struct ibv_context *context, struct ibv_cq_init_attr_ex *cq_attr) { return create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, NULL); } struct ibv_cq_ex *mlx5dv_create_cq(struct ibv_context *context, struct ibv_cq_init_attr_ex *cq_attr, struct mlx5dv_cq_init_attr *mlx5_cq_attr) { struct ibv_cq_ex *cq; int err = 0; cq = create_cq(context, cq_attr, MLX5_CQ_FLAGS_EXTENDED, mlx5_cq_attr); if (!cq) return NULL; err = verbs_init_cq(ibv_cq_ex_to_cq(cq), context, cq_attr->channel, cq_attr->cq_context); if (err) goto err; return cq; err: context->ops.destroy_cq(ibv_cq_ex_to_cq(cq)); return NULL; } int mlx5_resize_cq(struct ibv_cq *ibcq, int cqe) { struct mlx5_cq *cq = to_mcq(ibcq); struct mlx5_resize_cq_resp resp; struct mlx5_resize_cq cmd; struct mlx5_context *mctx = to_mctx(ibcq->context); int err; if (cqe < 0) { errno = EINVAL; return errno; } memset(&cmd, 0, sizeof(cmd)); memset(&resp, 0, sizeof(resp)); if (((long long)cqe * 64) > INT_MAX) return EINVAL; mlx5_spin_lock(&cq->lock); cq->active_cqes = cq->ibv_cq.cqe; if (cq->active_buf == &cq->buf_a) cq->resize_buf = &cq->buf_b; else cq->resize_buf = &cq->buf_a; cqe = align_queue_size(cqe + 1); if (cqe == ibcq->cqe + 1) { cq->resize_buf = NULL; err = 0; goto out; } /* currently we don't change cqe size */ cq->resize_cqe_sz = cq->cqe_sz; cq->resize_cqes = cqe; err = mlx5_alloc_cq_buf(mctx, cq, cq->resize_buf, cq->resize_cqes, cq->resize_cqe_sz); if (err) { cq->resize_buf = NULL; errno = ENOMEM; goto out; } cmd.buf_addr = (uintptr_t)cq->resize_buf->buf; cmd.cqe_size = cq->resize_cqe_sz; err = ibv_cmd_resize_cq(ibcq, cqe - 1, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (err) goto out_buf; mlx5_cq_resize_copy_cqes(cq); mlx5_free_cq_buf(mctx, cq->active_buf); cq->active_buf = cq->resize_buf; cq->ibv_cq.cqe = cqe - 1; mlx5_spin_unlock(&cq->lock); cq->resize_buf = NULL; return 0; out_buf: mlx5_free_cq_buf(mctx, cq->resize_buf); cq->resize_buf = NULL; out: mlx5_spin_unlock(&cq->lock); return err; } int mlx5_destroy_cq(struct ibv_cq *cq) { int ret; struct mlx5_cq *mcq = to_mcq(cq); ret = ibv_cmd_destroy_cq(cq); if (ret) return ret; verbs_cleanup_cq(cq); mlx5_free_db(to_mctx(cq->context), to_mcq(cq)->dbrec); mlx5_free_cq_buf(to_mctx(cq->context), to_mcq(cq)->active_buf); mlx5_spinlock_destroy(&mcq->lock); free(to_mcq(cq)); return 0; } struct ibv_srq *mlx5_create_srq(struct ibv_pd *pd, struct ibv_srq_init_attr *attr) { struct mlx5_create_srq cmd; struct mlx5_create_srq_resp resp; struct mlx5_srq *srq; int ret; struct mlx5_context *ctx; int max_sge; struct ibv_srq *ibsrq; ctx = to_mctx(pd->context); srq = calloc(1, sizeof *srq); if (!srq) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); return NULL; } ibsrq = &srq->vsrq.srq; memset(&cmd, 0, sizeof cmd); if (mlx5_spinlock_init(&srq->lock)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err; } if (attr->attr.max_wr > ctx->max_srq_recv_wr) { fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__, attr->attr.max_wr, ctx->max_srq_recv_wr); errno = EINVAL; goto err_spl; } /* * this calculation does not consider required control segments. The * final calculation is done again later. This is done so to avoid * overflows of variables */ max_sge = ctx->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg); if (attr->attr.max_sge > max_sge) { fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__, attr->attr.max_wr, ctx->max_srq_recv_wr); errno = EINVAL; goto err_spl; } srq->max = align_queue_size(attr->attr.max_wr + 1); srq->max_gs = attr->attr.max_sge; srq->counter = 0; if (mlx5_alloc_srq_buf(pd->context, srq)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err_spl; } srq->db = mlx5_alloc_dbrec(to_mctx(pd->context)); if (!srq->db) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err_free; } *srq->db = 0; cmd.buf_addr = (uintptr_t) srq->buf.buf; cmd.db_addr = (uintptr_t) srq->db; srq->wq_sig = srq_sig_enabled(); if (srq->wq_sig) cmd.flags = MLX5_SRQ_FLAG_SIGNATURE; attr->attr.max_sge = srq->max_gs; pthread_mutex_lock(&ctx->srq_table_mutex); ret = ibv_cmd_create_srq(pd, ibsrq, attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (ret) goto err_db; ret = mlx5_store_srq(ctx, resp.srqn, srq); if (ret) goto err_destroy; pthread_mutex_unlock(&ctx->srq_table_mutex); srq->srqn = resp.srqn; srq->rsc.rsn = resp.srqn; srq->rsc.type = MLX5_RSC_TYPE_SRQ; return ibsrq; err_destroy: ibv_cmd_destroy_srq(ibsrq); err_db: pthread_mutex_unlock(&ctx->srq_table_mutex); mlx5_free_db(to_mctx(pd->context), srq->db); err_free: free(srq->wrid); mlx5_free_buf(&srq->buf); err_spl: mlx5_spinlock_destroy(&srq->lock); err: free(srq); return NULL; } int mlx5_modify_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr, int attr_mask) { struct ibv_modify_srq cmd; return ibv_cmd_modify_srq(srq, attr, attr_mask, &cmd, sizeof cmd); } int mlx5_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *attr) { struct ibv_query_srq cmd; return ibv_cmd_query_srq(srq, attr, &cmd, sizeof cmd); } int mlx5_destroy_srq(struct ibv_srq *srq) { int ret; struct mlx5_srq *msrq = to_msrq(srq); struct mlx5_context *ctx = to_mctx(srq->context); ret = ibv_cmd_destroy_srq(srq); if (ret) return ret; if (ctx->cqe_version && msrq->rsc.type == MLX5_RSC_TYPE_XSRQ) mlx5_clear_uidx(ctx, msrq->rsc.rsn); else mlx5_clear_srq(ctx, msrq->srqn); mlx5_free_db(ctx, msrq->db); mlx5_free_buf(&msrq->buf); free(msrq->wrid); mlx5_spinlock_destroy(&msrq->lock); free(msrq); return 0; } static int sq_overhead(enum ibv_qp_type qp_type) { size_t size = 0; size_t mw_bind_size = sizeof(struct mlx5_wqe_umr_ctrl_seg) + sizeof(struct mlx5_wqe_mkey_context_seg) + max_t(size_t, sizeof(struct mlx5_wqe_umr_klm_seg), 64); switch (qp_type) { case IBV_QPT_RC: size += sizeof(struct mlx5_wqe_ctrl_seg) + max(sizeof(struct mlx5_wqe_atomic_seg) + sizeof(struct mlx5_wqe_raddr_seg), mw_bind_size); break; case IBV_QPT_UC: size = sizeof(struct mlx5_wqe_ctrl_seg) + max(sizeof(struct mlx5_wqe_raddr_seg), mw_bind_size); break; case IBV_QPT_UD: size = sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_datagram_seg); break; case IBV_QPT_XRC_SEND: size = sizeof(struct mlx5_wqe_ctrl_seg) + mw_bind_size; SWITCH_FALLTHROUGH; case IBV_QPT_XRC_RECV: size = max(size, sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_xrc_seg) + sizeof(struct mlx5_wqe_raddr_seg)); break; case IBV_QPT_RAW_PACKET: size = sizeof(struct mlx5_wqe_ctrl_seg) + sizeof(struct mlx5_wqe_eth_seg); break; default: return -EINVAL; } return size; } static int mlx5_calc_send_wqe(struct mlx5_context *ctx, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp) { int size; int inl_size = 0; int max_gather; int tot_size; size = sq_overhead(attr->qp_type); if (size < 0) return size; if (attr->cap.max_inline_data) { inl_size = size + align(sizeof(struct mlx5_wqe_inl_data_seg) + attr->cap.max_inline_data, 16); } if (attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) { size += align(attr->max_tso_header, 16); qp->max_tso_header = attr->max_tso_header; } max_gather = (ctx->max_sq_desc_sz - size) / sizeof(struct mlx5_wqe_data_seg); if (attr->cap.max_send_sge > max_gather) return -EINVAL; size += attr->cap.max_send_sge * sizeof(struct mlx5_wqe_data_seg); tot_size = max_int(size, inl_size); if (tot_size > ctx->max_sq_desc_sz) return -EINVAL; return align(tot_size, MLX5_SEND_WQE_BB); } static int mlx5_calc_rcv_wqe(struct mlx5_context *ctx, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp) { uint32_t size; int num_scatter; if (attr->srq) return 0; num_scatter = max_t(uint32_t, attr->cap.max_recv_sge, 1); size = sizeof(struct mlx5_wqe_data_seg) * num_scatter; if (qp->wq_sig) size += sizeof(struct mlx5_rwqe_sig); if (size > ctx->max_rq_desc_sz) return -EINVAL; size = mlx5_round_up_power_of_two(size); return size; } static int mlx5_calc_sq_size(struct mlx5_context *ctx, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp) { int wqe_size; int wq_size; FILE *fp = ctx->dbg_fp; if (!attr->cap.max_send_wr) return 0; wqe_size = mlx5_calc_send_wqe(ctx, attr, qp); if (wqe_size < 0) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return wqe_size; } if (wqe_size > ctx->max_sq_desc_sz) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return -EINVAL; } qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) - sizeof(struct mlx5_wqe_inl_data_seg); attr->cap.max_inline_data = qp->max_inline_data; /* * to avoid overflow, we limit max_send_wr so * that the multiplication will fit in int */ if (attr->cap.max_send_wr > 0x7fffffff / ctx->max_sq_desc_sz) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return -EINVAL; } wq_size = mlx5_round_up_power_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > ctx->max_send_wqebb) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return -EINVAL; } qp->sq.wqe_shift = mlx5_ilog2(MLX5_SEND_WQE_BB); qp->sq.max_gs = attr->cap.max_send_sge; qp->sq.max_post = wq_size / wqe_size; return wq_size; } static int mlx5_calc_rwq_size(struct mlx5_context *ctx, struct mlx5_rwq *rwq, struct ibv_wq_init_attr *attr) { size_t wqe_size; int wq_size; uint32_t num_scatter; int scat_spc; if (!attr->max_wr) return -EINVAL; /* TBD: check caps for RQ */ num_scatter = max_t(uint32_t, attr->max_sge, 1); wqe_size = sizeof(struct mlx5_wqe_data_seg) * num_scatter; if (rwq->wq_sig) wqe_size += sizeof(struct mlx5_rwqe_sig); if (wqe_size <= 0 || wqe_size > ctx->max_rq_desc_sz) return -EINVAL; wqe_size = mlx5_round_up_power_of_two(wqe_size); wq_size = mlx5_round_up_power_of_two(attr->max_wr) * wqe_size; wq_size = max(wq_size, MLX5_SEND_WQE_BB); rwq->rq.wqe_cnt = wq_size / wqe_size; rwq->rq.wqe_shift = mlx5_ilog2(wqe_size); rwq->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size); scat_spc = wqe_size - ((rwq->wq_sig) ? sizeof(struct mlx5_rwqe_sig) : 0); rwq->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg); return wq_size; } static int mlx5_calc_rq_size(struct mlx5_context *ctx, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp) { int wqe_size; int wq_size; int scat_spc; FILE *fp = ctx->dbg_fp; if (!attr->cap.max_recv_wr) return 0; if (attr->cap.max_recv_wr > ctx->max_recv_wr) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return -EINVAL; } wqe_size = mlx5_calc_rcv_wqe(ctx, attr, qp); if (wqe_size < 0 || wqe_size > ctx->max_rq_desc_sz) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return -EINVAL; } wq_size = mlx5_round_up_power_of_two(attr->cap.max_recv_wr) * wqe_size; if (wqe_size) { wq_size = max(wq_size, MLX5_SEND_WQE_BB); qp->rq.wqe_cnt = wq_size / wqe_size; qp->rq.wqe_shift = mlx5_ilog2(wqe_size); qp->rq.max_post = 1 << mlx5_ilog2(wq_size / wqe_size); scat_spc = wqe_size - (qp->wq_sig ? sizeof(struct mlx5_rwqe_sig) : 0); qp->rq.max_gs = scat_spc / sizeof(struct mlx5_wqe_data_seg); } else { qp->rq.wqe_cnt = 0; qp->rq.wqe_shift = 0; qp->rq.max_post = 0; qp->rq.max_gs = 0; } return wq_size; } static int mlx5_calc_wq_size(struct mlx5_context *ctx, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp) { int ret; int result; ret = mlx5_calc_sq_size(ctx, attr, qp); if (ret < 0) return ret; result = ret; ret = mlx5_calc_rq_size(ctx, attr, qp); if (ret < 0) return ret; result += ret; qp->sq.offset = ret; qp->rq.offset = 0; return result; } static void map_uuar(struct ibv_context *context, struct mlx5_qp *qp, int uuar_index) { struct mlx5_context *ctx = to_mctx(context); qp->bf = &ctx->bfs[uuar_index]; } static const char *qptype2key(enum ibv_qp_type type) { switch (type) { case IBV_QPT_RC: return "HUGE_RC"; case IBV_QPT_UC: return "HUGE_UC"; case IBV_QPT_UD: return "HUGE_UD"; case IBV_QPT_RAW_PACKET: return "HUGE_RAW_ETH"; default: return "HUGE_NA"; } } static int mlx5_alloc_qp_buf(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp, int size) { int err; enum mlx5_alloc_type alloc_type; enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_ANON; const char *qp_huge_key; if (qp->sq.wqe_cnt) { qp->sq.wrid = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wrid)); if (!qp->sq.wrid) { errno = ENOMEM; err = -1; return err; } qp->sq.wr_data = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wr_data)); if (!qp->sq.wr_data) { errno = ENOMEM; err = -1; goto ex_wrid; } } qp->sq.wqe_head = malloc(qp->sq.wqe_cnt * sizeof(*qp->sq.wqe_head)); if (!qp->sq.wqe_head) { errno = ENOMEM; err = -1; goto ex_wrid; } if (qp->rq.wqe_cnt) { qp->rq.wrid = malloc(qp->rq.wqe_cnt * sizeof(uint64_t)); if (!qp->rq.wrid) { errno = ENOMEM; err = -1; goto ex_wrid; } } /* compatibility support */ qp_huge_key = qptype2key(qp->ibv_qp->qp_type); if (mlx5_use_huge(qp_huge_key)) default_alloc_type = MLX5_ALLOC_TYPE_HUGE; mlx5_get_alloc_type(MLX5_QP_PREFIX, &alloc_type, default_alloc_type); err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->buf, align(qp->buf_size, to_mdev (context->device)->page_size), to_mdev(context->device)->page_size, alloc_type, MLX5_QP_PREFIX); if (err) { err = -ENOMEM; goto ex_wrid; } memset(qp->buf.buf, 0, qp->buf_size); if (attr->qp_type == IBV_QPT_RAW_PACKET) { size_t aligned_sq_buf_size = align(qp->sq_buf_size, to_mdev(context->device)->page_size); /* For Raw Packet QP, allocate a separate buffer for the SQ */ err = mlx5_alloc_prefered_buf(to_mctx(context), &qp->sq_buf, aligned_sq_buf_size, to_mdev(context->device)->page_size, alloc_type, MLX5_QP_PREFIX); if (err) { err = -ENOMEM; goto rq_buf; } memset(qp->sq_buf.buf, 0, aligned_sq_buf_size); } return 0; rq_buf: mlx5_free_actual_buf(to_mctx(qp->verbs_qp.qp.context), &qp->buf); ex_wrid: if (qp->rq.wrid) free(qp->rq.wrid); if (qp->sq.wqe_head) free(qp->sq.wqe_head); if (qp->sq.wr_data) free(qp->sq.wr_data); if (qp->sq.wrid) free(qp->sq.wrid); return err; } static void mlx5_free_qp_buf(struct mlx5_qp *qp) { struct mlx5_context *ctx = to_mctx(qp->ibv_qp->context); mlx5_free_actual_buf(ctx, &qp->buf); if (qp->sq_buf.buf) mlx5_free_actual_buf(ctx, &qp->sq_buf); if (qp->rq.wrid) free(qp->rq.wrid); if (qp->sq.wqe_head) free(qp->sq.wqe_head); if (qp->sq.wrid) free(qp->sq.wrid); if (qp->sq.wr_data) free(qp->sq.wr_data); } static int mlx5_cmd_create_rss_qp(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr, struct mlx5_qp *qp) { struct mlx5_create_qp_ex_rss cmd_ex_rss = {}; struct mlx5_create_qp_resp_ex resp = {}; int ret; if (attr->rx_hash_conf.rx_hash_key_len > sizeof(cmd_ex_rss.rx_hash_key)) { errno = EINVAL; return errno; } cmd_ex_rss.rx_hash_fields_mask = attr->rx_hash_conf.rx_hash_fields_mask; cmd_ex_rss.rx_hash_function = attr->rx_hash_conf.rx_hash_function; cmd_ex_rss.rx_key_len = attr->rx_hash_conf.rx_hash_key_len; memcpy(cmd_ex_rss.rx_hash_key, attr->rx_hash_conf.rx_hash_key, attr->rx_hash_conf.rx_hash_key_len); ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr, &cmd_ex_rss.ibv_cmd, sizeof(cmd_ex_rss.ibv_cmd), sizeof(cmd_ex_rss), &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); if (ret) return ret; qp->rss_qp = 1; return 0; } static int mlx5_cmd_create_qp_ex(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr, struct mlx5_create_qp *cmd, struct mlx5_qp *qp, struct mlx5_create_qp_resp_ex *resp) { struct mlx5_create_qp_ex cmd_ex; int ret; memset(&cmd_ex, 0, sizeof(cmd_ex)); memcpy(&cmd_ex.ibv_cmd.base, &cmd->ibv_cmd.user_handle, offsetof(typeof(cmd->ibv_cmd), is_srq) + sizeof(cmd->ibv_cmd.is_srq) - offsetof(typeof(cmd->ibv_cmd), user_handle)); memcpy(&cmd_ex.drv_ex, &cmd->buf_addr, offsetof(typeof(*cmd), sq_buf_addr) + sizeof(cmd->sq_buf_addr) - sizeof(cmd->ibv_cmd)); ret = ibv_cmd_create_qp_ex2(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr, &cmd_ex.ibv_cmd, sizeof(cmd_ex.ibv_cmd), sizeof(cmd_ex), &resp->ibv_resp, sizeof(resp->ibv_resp), sizeof(*resp)); return ret; } enum { MLX5_CREATE_QP_SUP_COMP_MASK = (IBV_QP_INIT_ATTR_PD | IBV_QP_INIT_ATTR_XRCD | IBV_QP_INIT_ATTR_CREATE_FLAGS | IBV_QP_INIT_ATTR_MAX_TSO_HEADER | IBV_QP_INIT_ATTR_IND_TABLE | IBV_QP_INIT_ATTR_RX_HASH), }; enum { MLX5_CREATE_QP_EX2_COMP_MASK = (IBV_QP_INIT_ATTR_CREATE_FLAGS | IBV_QP_INIT_ATTR_MAX_TSO_HEADER | IBV_QP_INIT_ATTR_IND_TABLE | IBV_QP_INIT_ATTR_RX_HASH), }; static struct ibv_qp *create_qp(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr) { struct mlx5_create_qp cmd; struct mlx5_create_qp_resp resp; struct mlx5_create_qp_resp_ex resp_ex; struct mlx5_qp *qp; int ret; struct mlx5_context *ctx = to_mctx(context); struct ibv_qp *ibqp; int32_t usr_idx = 0; uint32_t uuar_index; FILE *fp = ctx->dbg_fp; if (attr->comp_mask & ~MLX5_CREATE_QP_SUP_COMP_MASK) return NULL; if ((attr->comp_mask & IBV_QP_INIT_ATTR_MAX_TSO_HEADER) && (attr->qp_type != IBV_QPT_RAW_PACKET)) return NULL; qp = calloc(1, sizeof(*qp)); if (!qp) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); return NULL; } ibqp = (struct ibv_qp *)&qp->verbs_qp; qp->ibv_qp = ibqp; memset(&cmd, 0, sizeof(cmd)); memset(&resp, 0, sizeof(resp)); memset(&resp_ex, 0, sizeof(resp_ex)); if (attr->comp_mask & IBV_QP_INIT_ATTR_RX_HASH) { ret = mlx5_cmd_create_rss_qp(context, attr, qp); if (ret) goto err; return ibqp; } qp->wq_sig = qp_sig_enabled(); if (qp->wq_sig) cmd.flags |= MLX5_QP_FLAG_SIGNATURE; if (use_scatter_to_cqe()) cmd.flags |= MLX5_QP_FLAG_SCATTER_CQE; ret = mlx5_calc_wq_size(ctx, attr, qp); if (ret < 0) { errno = -ret; goto err; } if (attr->qp_type == IBV_QPT_RAW_PACKET) { qp->buf_size = qp->sq.offset; qp->sq_buf_size = ret - qp->buf_size; qp->sq.offset = 0; } else { qp->buf_size = ret; qp->sq_buf_size = 0; } if (mlx5_alloc_qp_buf(context, attr, qp, ret)) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); goto err; } if (attr->qp_type == IBV_QPT_RAW_PACKET) { qp->sq_start = qp->sq_buf.buf; qp->sq.qend = qp->sq_buf.buf + (qp->sq.wqe_cnt << qp->sq.wqe_shift); } else { qp->sq_start = qp->buf.buf + qp->sq.offset; qp->sq.qend = qp->buf.buf + qp->sq.offset + (qp->sq.wqe_cnt << qp->sq.wqe_shift); } mlx5_init_qp_indices(qp); if (mlx5_spinlock_init(&qp->sq.lock)) goto err_free_qp_buf; if (mlx5_spinlock_init(&qp->rq.lock)) goto err_sq_spl; qp->db = mlx5_alloc_dbrec(ctx); if (!qp->db) { mlx5_dbg(fp, MLX5_DBG_QP, "\n"); goto err_rq_spl; } qp->db[MLX5_RCV_DBR] = 0; qp->db[MLX5_SND_DBR] = 0; cmd.buf_addr = (uintptr_t) qp->buf.buf; cmd.sq_buf_addr = (attr->qp_type == IBV_QPT_RAW_PACKET) ? (uintptr_t) qp->sq_buf.buf : 0; cmd.db_addr = (uintptr_t) qp->db; cmd.sq_wqe_count = qp->sq.wqe_cnt; cmd.rq_wqe_count = qp->rq.wqe_cnt; cmd.rq_wqe_shift = qp->rq.wqe_shift; if (ctx->atomic_cap == IBV_ATOMIC_HCA) qp->atomics_enabled = 1; if (!ctx->cqe_version) { cmd.uidx = 0xffffff; pthread_mutex_lock(&ctx->qp_table_mutex); } else if (!is_xrc_tgt(attr->qp_type)) { usr_idx = mlx5_store_uidx(ctx, qp); if (usr_idx < 0) { mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n"); goto err_rq_db; } cmd.uidx = usr_idx; } if (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK) ret = mlx5_cmd_create_qp_ex(context, attr, &cmd, qp, &resp_ex); else ret = ibv_cmd_create_qp_ex(context, &qp->verbs_qp, sizeof(qp->verbs_qp), attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (ret) { mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); goto err_free_uidx; } uuar_index = (attr->comp_mask & MLX5_CREATE_QP_EX2_COMP_MASK) ? resp_ex.uuar_index : resp.uuar_index; if (!ctx->cqe_version) { if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) { ret = mlx5_store_qp(ctx, ibqp->qp_num, qp); if (ret) { mlx5_dbg(fp, MLX5_DBG_QP, "ret %d\n", ret); goto err_destroy; } } pthread_mutex_unlock(&ctx->qp_table_mutex); } map_uuar(context, qp, uuar_index); qp->rq.max_post = qp->rq.wqe_cnt; if (attr->sq_sig_all) qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE; else qp->sq_signal_bits = 0; attr->cap.max_send_wr = qp->sq.max_post; attr->cap.max_recv_wr = qp->rq.max_post; attr->cap.max_recv_sge = qp->rq.max_gs; qp->rsc.type = MLX5_RSC_TYPE_QP; qp->rsc.rsn = (ctx->cqe_version && !is_xrc_tgt(attr->qp_type)) ? usr_idx : ibqp->qp_num; return ibqp; err_destroy: ibv_cmd_destroy_qp(ibqp); err_free_uidx: if (!ctx->cqe_version) pthread_mutex_unlock(&to_mctx(context)->qp_table_mutex); else if (!is_xrc_tgt(attr->qp_type)) mlx5_clear_uidx(ctx, usr_idx); err_rq_db: mlx5_free_db(to_mctx(context), qp->db); err_rq_spl: mlx5_spinlock_destroy(&qp->rq.lock); err_sq_spl: mlx5_spinlock_destroy(&qp->sq.lock); err_free_qp_buf: mlx5_free_qp_buf(qp); err: free(qp); return NULL; } struct ibv_qp *mlx5_create_qp(struct ibv_pd *pd, struct ibv_qp_init_attr *attr) { struct ibv_qp *qp; struct ibv_qp_init_attr_ex attrx; memset(&attrx, 0, sizeof(attrx)); memcpy(&attrx, attr, sizeof(*attr)); attrx.comp_mask = IBV_QP_INIT_ATTR_PD; attrx.pd = pd; qp = create_qp(pd->context, &attrx); if (qp) memcpy(attr, &attrx, sizeof(*attr)); return qp; } static void mlx5_lock_cqs(struct ibv_qp *qp) { struct mlx5_cq *send_cq = to_mcq(qp->send_cq); struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq); if (send_cq && recv_cq) { if (send_cq == recv_cq) { mlx5_spin_lock(&send_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { mlx5_spin_lock(&send_cq->lock); mlx5_spin_lock(&recv_cq->lock); } else { mlx5_spin_lock(&recv_cq->lock); mlx5_spin_lock(&send_cq->lock); } } else if (send_cq) { mlx5_spin_lock(&send_cq->lock); } else if (recv_cq) { mlx5_spin_lock(&recv_cq->lock); } } static void mlx5_unlock_cqs(struct ibv_qp *qp) { struct mlx5_cq *send_cq = to_mcq(qp->send_cq); struct mlx5_cq *recv_cq = to_mcq(qp->recv_cq); if (send_cq && recv_cq) { if (send_cq == recv_cq) { mlx5_spin_unlock(&send_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { mlx5_spin_unlock(&recv_cq->lock); mlx5_spin_unlock(&send_cq->lock); } else { mlx5_spin_unlock(&send_cq->lock); mlx5_spin_unlock(&recv_cq->lock); } } else if (send_cq) { mlx5_spin_unlock(&send_cq->lock); } else if (recv_cq) { mlx5_spin_unlock(&recv_cq->lock); } } int mlx5_destroy_qp(struct ibv_qp *ibqp) { struct mlx5_qp *qp = to_mqp(ibqp); struct mlx5_context *ctx = to_mctx(ibqp->context); int ret; if (qp->rss_qp) { ret = ibv_cmd_destroy_qp(ibqp); if (ret) return ret; goto free; } if (!ctx->cqe_version) pthread_mutex_lock(&ctx->qp_table_mutex); ret = ibv_cmd_destroy_qp(ibqp); if (ret) { if (!ctx->cqe_version) pthread_mutex_unlock(&ctx->qp_table_mutex); return ret; } mlx5_lock_cqs(ibqp); __mlx5_cq_clean(to_mcq(ibqp->recv_cq), qp->rsc.rsn, ibqp->srq ? to_msrq(ibqp->srq) : NULL); if (ibqp->send_cq != ibqp->recv_cq) __mlx5_cq_clean(to_mcq(ibqp->send_cq), qp->rsc.rsn, NULL); if (!ctx->cqe_version) { if (qp->sq.wqe_cnt || qp->rq.wqe_cnt) mlx5_clear_qp(ctx, ibqp->qp_num); } mlx5_unlock_cqs(ibqp); if (!ctx->cqe_version) pthread_mutex_unlock(&ctx->qp_table_mutex); else if (!is_xrc_tgt(ibqp->qp_type)) mlx5_clear_uidx(ctx, qp->rsc.rsn); mlx5_free_db(ctx, qp->db); mlx5_spinlock_destroy(&qp->rq.lock); mlx5_spinlock_destroy(&qp->sq.lock); mlx5_free_qp_buf(qp); free: free(qp); return 0; } int mlx5_query_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr, int attr_mask, struct ibv_qp_init_attr *init_attr) { struct ibv_query_qp cmd; struct mlx5_qp *qp = to_mqp(ibqp); int ret; if (qp->rss_qp) return ENOSYS; ret = ibv_cmd_query_qp(ibqp, attr, attr_mask, init_attr, &cmd, sizeof(cmd)); if (ret) return ret; init_attr->cap.max_send_wr = qp->sq.max_post; init_attr->cap.max_send_sge = qp->sq.max_gs; init_attr->cap.max_inline_data = qp->max_inline_data; attr->cap = init_attr->cap; return 0; } enum { MLX5_MODIFY_QP_EX_ATTR_MASK = IBV_QP_RATE_LIMIT, }; int mlx5_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { struct ibv_modify_qp cmd = {}; struct ibv_modify_qp_ex cmd_ex = {}; struct ibv_modify_qp_resp_ex resp = {}; struct mlx5_qp *mqp = to_mqp(qp); struct mlx5_context *context = to_mctx(qp->context); int ret; uint32_t *db; if (mqp->rss_qp) return ENOSYS; if (attr_mask & IBV_QP_PORT) { switch (qp->qp_type) { case IBV_QPT_RAW_PACKET: if (context->cached_link_layer[attr->port_num - 1] == IBV_LINK_LAYER_ETHERNET) { if (context->cached_device_cap_flags & IBV_DEVICE_RAW_IP_CSUM) mqp->qp_cap_cache |= MLX5_CSUM_SUPPORT_RAW_OVER_ETH | MLX5_RX_CSUM_VALID; if (ibv_is_qpt_supported( context->cached_tso_caps.supported_qpts, IBV_QPT_RAW_PACKET)) mqp->max_tso = context->cached_tso_caps.max_tso; } break; default: break; } } if (attr_mask & MLX5_MODIFY_QP_EX_ATTR_MASK) ret = ibv_cmd_modify_qp_ex(qp, attr, attr_mask, &cmd_ex, sizeof(cmd_ex), sizeof(cmd_ex), &resp, sizeof(resp), sizeof(resp)); else ret = ibv_cmd_modify_qp(qp, attr, attr_mask, &cmd, sizeof(cmd)); if (!ret && (attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RESET) { if (qp->recv_cq) { mlx5_cq_clean(to_mcq(qp->recv_cq), mqp->rsc.rsn, qp->srq ? to_msrq(qp->srq) : NULL); } if (qp->send_cq != qp->recv_cq && qp->send_cq) mlx5_cq_clean(to_mcq(qp->send_cq), to_mqp(qp)->rsc.rsn, NULL); mlx5_init_qp_indices(mqp); db = mqp->db; db[MLX5_RCV_DBR] = 0; db[MLX5_SND_DBR] = 0; } /* * When the Raw Packet QP is in INIT state, its RQ * underneath is already in RDY, which means it can * receive packets. According to the IB spec, a QP can't * receive packets until moved to RTR state. To achieve this, * for Raw Packet QPs, we update the doorbell record * once the QP is moved to RTR. */ if (!ret && (attr_mask & IBV_QP_STATE) && attr->qp_state == IBV_QPS_RTR && qp->qp_type == IBV_QPT_RAW_PACKET) { mlx5_spin_lock(&mqp->rq.lock); mqp->db[MLX5_RCV_DBR] = htobe32(mqp->rq.head & 0xffff); mlx5_spin_unlock(&mqp->rq.lock); } return ret; } #define RROCE_UDP_SPORT_MIN 0xC000 #define RROCE_UDP_SPORT_MAX 0xFFFF struct ibv_ah *mlx5_create_ah(struct ibv_pd *pd, struct ibv_ah_attr *attr) { struct mlx5_context *ctx = to_mctx(pd->context); struct ibv_port_attr port_attr; struct mlx5_ah *ah; uint32_t gid_type; uint32_t tmp; uint8_t grh; int is_eth; if (attr->port_num < 1 || attr->port_num > ctx->num_ports) return NULL; if (ctx->cached_link_layer[attr->port_num - 1]) { is_eth = ctx->cached_link_layer[attr->port_num - 1] == IBV_LINK_LAYER_ETHERNET; } else { if (ibv_query_port(pd->context, attr->port_num, &port_attr)) return NULL; is_eth = (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET); } if (unlikely((!attr->is_global) && is_eth)) { errno = EINVAL; return NULL; } ah = calloc(1, sizeof *ah); if (!ah) return NULL; if (is_eth) { if (ibv_query_gid_type(pd->context, attr->port_num, attr->grh.sgid_index, &gid_type)) goto err; if (gid_type == IBV_GID_TYPE_ROCE_V2) ah->av.rlid = htobe16(rand() % (RROCE_UDP_SPORT_MAX + 1 - RROCE_UDP_SPORT_MIN) + RROCE_UDP_SPORT_MIN); /* Since RoCE packets must contain GRH, this bit is reserved * for RoCE and shouldn't be set. */ grh = 0; } else { ah->av.fl_mlid = attr->src_path_bits & 0x7f; ah->av.rlid = htobe16(attr->dlid); grh = 1; } ah->av.stat_rate_sl = (attr->static_rate << 4) | attr->sl; if (attr->is_global) { ah->av.tclass = attr->grh.traffic_class; ah->av.hop_limit = attr->grh.hop_limit; tmp = htobe32((grh << 30) | ((attr->grh.sgid_index & 0xff) << 20) | (attr->grh.flow_label & 0xfffff)); ah->av.grh_gid_fl = tmp; memcpy(ah->av.rgid, attr->grh.dgid.raw, 16); } if (is_eth) { if (ctx->cmds_supp_uhw & MLX5_USER_CMDS_SUPP_UHW_CREATE_AH) { struct mlx5_create_ah_resp resp = {}; if (ibv_cmd_create_ah(pd, &ah->ibv_ah, attr, &resp.ibv_resp, sizeof(resp))) goto err; ah->kern_ah = true; memcpy(ah->av.rmac, resp.dmac, ETHERNET_LL_SIZE); } else { uint16_t vid; if (ibv_resolve_eth_l2_from_gid(pd->context, attr, ah->av.rmac, &vid)) goto err; } } return &ah->ibv_ah; err: free(ah); return NULL; } int mlx5_destroy_ah(struct ibv_ah *ah) { struct mlx5_ah *mah = to_mah(ah); int err; if (mah->kern_ah) { err = ibv_cmd_destroy_ah(ah); if (err) return err; } free(mah); return 0; } int mlx5_attach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) { return ibv_cmd_attach_mcast(qp, gid, lid); } int mlx5_detach_mcast(struct ibv_qp *qp, const union ibv_gid *gid, uint16_t lid) { return ibv_cmd_detach_mcast(qp, gid, lid); } struct ibv_qp *mlx5_create_qp_ex(struct ibv_context *context, struct ibv_qp_init_attr_ex *attr) { return create_qp(context, attr); } int mlx5_get_srq_num(struct ibv_srq *srq, uint32_t *srq_num) { struct mlx5_srq *msrq = to_msrq(srq); *srq_num = msrq->srqn; return 0; } struct ibv_xrcd * mlx5_open_xrcd(struct ibv_context *context, struct ibv_xrcd_init_attr *xrcd_init_attr) { int err; struct verbs_xrcd *xrcd; struct ibv_open_xrcd cmd = {}; struct ibv_open_xrcd_resp resp = {}; xrcd = calloc(1, sizeof(*xrcd)); if (!xrcd) return NULL; err = ibv_cmd_open_xrcd(context, xrcd, sizeof(*xrcd), xrcd_init_attr, &cmd, sizeof(cmd), &resp, sizeof(resp)); if (err) { free(xrcd); return NULL; } return &xrcd->xrcd; } int mlx5_close_xrcd(struct ibv_xrcd *ib_xrcd) { struct verbs_xrcd *xrcd = container_of(ib_xrcd, struct verbs_xrcd, xrcd); int ret; ret = ibv_cmd_close_xrcd(xrcd); if (!ret) free(xrcd); return ret; } static struct ibv_srq * mlx5_create_xrc_srq(struct ibv_context *context, struct ibv_srq_init_attr_ex *attr) { int err; struct mlx5_create_srq_ex cmd; struct mlx5_create_srq_resp resp; struct mlx5_srq *msrq; struct mlx5_context *ctx = to_mctx(context); int max_sge; struct ibv_srq *ibsrq; int uidx; FILE *fp = ctx->dbg_fp; msrq = calloc(1, sizeof(*msrq)); if (!msrq) return NULL; ibsrq = (struct ibv_srq *)&msrq->vsrq; memset(&cmd, 0, sizeof(cmd)); memset(&resp, 0, sizeof(resp)); if (mlx5_spinlock_init(&msrq->lock)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err; } if (attr->attr.max_wr > ctx->max_srq_recv_wr) { fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__, attr->attr.max_wr, ctx->max_srq_recv_wr); errno = EINVAL; goto err_spl; } /* * this calculation does not consider required control segments. The * final calculation is done again later. This is done so to avoid * overflows of variables */ max_sge = ctx->max_recv_wr / sizeof(struct mlx5_wqe_data_seg); if (attr->attr.max_sge > max_sge) { fprintf(stderr, "%s-%d:max_wr %d, max_srq_recv_wr %d\n", __func__, __LINE__, attr->attr.max_wr, ctx->max_srq_recv_wr); errno = EINVAL; goto err_spl; } msrq->max = align_queue_size(attr->attr.max_wr + 1); msrq->max_gs = attr->attr.max_sge; msrq->counter = 0; if (mlx5_alloc_srq_buf(context, msrq)) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err_spl; } msrq->db = mlx5_alloc_dbrec(ctx); if (!msrq->db) { fprintf(stderr, "%s-%d:\n", __func__, __LINE__); goto err_free; } *msrq->db = 0; cmd.buf_addr = (uintptr_t)msrq->buf.buf; cmd.db_addr = (uintptr_t)msrq->db; msrq->wq_sig = srq_sig_enabled(); if (msrq->wq_sig) cmd.flags = MLX5_SRQ_FLAG_SIGNATURE; attr->attr.max_sge = msrq->max_gs; if (ctx->cqe_version) { uidx = mlx5_store_uidx(ctx, msrq); if (uidx < 0) { mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n"); goto err_free_db; } cmd.uidx = uidx; } else { cmd.uidx = 0xffffff; pthread_mutex_lock(&ctx->srq_table_mutex); } err = ibv_cmd_create_srq_ex(context, &msrq->vsrq, sizeof(msrq->vsrq), attr, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp)); if (err) goto err_free_uidx; if (!ctx->cqe_version) { err = mlx5_store_srq(to_mctx(context), resp.srqn, msrq); if (err) goto err_destroy; pthread_mutex_unlock(&ctx->srq_table_mutex); } msrq->srqn = resp.srqn; msrq->rsc.type = MLX5_RSC_TYPE_XSRQ; msrq->rsc.rsn = ctx->cqe_version ? cmd.uidx : resp.srqn; return ibsrq; err_destroy: ibv_cmd_destroy_srq(ibsrq); err_free_uidx: if (ctx->cqe_version) mlx5_clear_uidx(ctx, cmd.uidx); else pthread_mutex_unlock(&ctx->srq_table_mutex); err_free_db: mlx5_free_db(ctx, msrq->db); err_free: free(msrq->wrid); mlx5_free_buf(&msrq->buf); err_spl: mlx5_spinlock_destroy(&msrq->lock); err: free(msrq); return NULL; } struct ibv_srq *mlx5_create_srq_ex(struct ibv_context *context, struct ibv_srq_init_attr_ex *attr) { if (!(attr->comp_mask & IBV_SRQ_INIT_ATTR_TYPE) || (attr->srq_type == IBV_SRQT_BASIC)) return mlx5_create_srq(attr->pd, (struct ibv_srq_init_attr *)attr); else if (attr->srq_type == IBV_SRQT_XRC) return mlx5_create_xrc_srq(context, attr); return NULL; } int mlx5_query_device_ex(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr, size_t attr_size) { struct mlx5_context *mctx = to_mctx(context); struct mlx5_query_device_ex_resp resp; struct mlx5_query_device_ex cmd; struct ibv_device_attr *a; uint64_t raw_fw_ver; unsigned sub_minor; unsigned major; unsigned minor; int err; int cmd_supp_uhw = mctx->cmds_supp_uhw & MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; memset(&cmd, 0, sizeof(cmd)); memset(&resp, 0, sizeof(resp)); err = ibv_cmd_query_device_ex(context, input, attr, attr_size, &raw_fw_ver, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, sizeof(resp.ibv_resp), cmd_supp_uhw ? sizeof(resp) : sizeof(resp.ibv_resp)); if (err) return err; attr->tso_caps = resp.tso_caps; attr->rss_caps.rx_hash_fields_mask = resp.rss_caps.rx_hash_fields_mask; attr->rss_caps.rx_hash_function = resp.rss_caps.rx_hash_function; attr->packet_pacing_caps = resp.packet_pacing_caps.caps; if (resp.support_multi_pkt_send_wqe) mctx->vendor_cap_flags |= MLX5_VENDOR_CAP_FLAGS_MPW; mctx->cqe_comp_caps = resp.cqe_comp_caps; major = (raw_fw_ver >> 32) & 0xffff; minor = (raw_fw_ver >> 16) & 0xffff; sub_minor = raw_fw_ver & 0xffff; a = &attr->orig_attr; snprintf(a->fw_ver, sizeof(a->fw_ver), "%d.%d.%04d", major, minor, sub_minor); return 0; } static int rwq_sig_enabled(struct ibv_context *context) { char *env; env = getenv("MLX5_RWQ_SIGNATURE"); if (env) return 1; return 0; } static void mlx5_free_rwq_buf(struct mlx5_rwq *rwq, struct ibv_context *context) { struct mlx5_context *ctx = to_mctx(context); mlx5_free_actual_buf(ctx, &rwq->buf); free(rwq->rq.wrid); } static int mlx5_alloc_rwq_buf(struct ibv_context *context, struct mlx5_rwq *rwq, int size) { int err; enum mlx5_alloc_type default_alloc_type = MLX5_ALLOC_TYPE_PREFER_CONTIG; rwq->rq.wrid = malloc(rwq->rq.wqe_cnt * sizeof(uint64_t)); if (!rwq->rq.wrid) { errno = ENOMEM; return -1; } err = mlx5_alloc_prefered_buf(to_mctx(context), &rwq->buf, align(rwq->buf_size, to_mdev (context->device)->page_size), to_mdev(context->device)->page_size, default_alloc_type, MLX5_RWQ_PREFIX); if (err) { free(rwq->rq.wrid); errno = ENOMEM; return -1; } return 0; } struct ibv_wq *mlx5_create_wq(struct ibv_context *context, struct ibv_wq_init_attr *attr) { struct mlx5_create_wq cmd; struct mlx5_create_wq_resp resp; int err; struct mlx5_rwq *rwq; struct mlx5_context *ctx = to_mctx(context); int ret; int32_t usr_idx = 0; FILE *fp = ctx->dbg_fp; if (attr->wq_type != IBV_WQT_RQ) return NULL; memset(&cmd, 0, sizeof(cmd)); memset(&resp, 0, sizeof(resp)); rwq = calloc(1, sizeof(*rwq)); if (!rwq) return NULL; ret = ibv_init_wq(&rwq->wq); if (ret < 0) goto err; rwq->wq_sig = rwq_sig_enabled(context); if (rwq->wq_sig) cmd.drv.flags = MLX5_RWQ_FLAG_SIGNATURE; ret = mlx5_calc_rwq_size(ctx, rwq, attr); if (ret < 0) { errno = -ret; goto err_cleanup_wq; } rwq->buf_size = ret; if (mlx5_alloc_rwq_buf(context, rwq, ret)) goto err_cleanup_wq; mlx5_init_rwq_indices(rwq); if (mlx5_spinlock_init(&rwq->rq.lock)) goto err_free_rwq_buf; rwq->db = mlx5_alloc_dbrec(ctx); if (!rwq->db) goto err_spl; rwq->db[MLX5_RCV_DBR] = 0; rwq->db[MLX5_SND_DBR] = 0; rwq->pbuff = rwq->buf.buf + rwq->rq.offset; rwq->recv_db = &rwq->db[MLX5_RCV_DBR]; cmd.drv.buf_addr = (uintptr_t)rwq->buf.buf; cmd.drv.db_addr = (uintptr_t)rwq->db; cmd.drv.rq_wqe_count = rwq->rq.wqe_cnt; cmd.drv.rq_wqe_shift = rwq->rq.wqe_shift; usr_idx = mlx5_store_uidx(ctx, rwq); if (usr_idx < 0) { mlx5_dbg(fp, MLX5_DBG_QP, "Couldn't find free user index\n"); goto err_free_db_rec; } cmd.drv.user_index = usr_idx; err = ibv_cmd_create_wq(context, attr, &rwq->wq, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd), &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); if (err) goto err_create; rwq->rsc.type = MLX5_RSC_TYPE_RWQ; rwq->rsc.rsn = cmd.drv.user_index; rwq->wq.post_recv = mlx5_post_wq_recv; return &rwq->wq; err_create: mlx5_clear_uidx(ctx, cmd.drv.user_index); err_free_db_rec: mlx5_free_db(to_mctx(context), rwq->db); err_spl: mlx5_spinlock_destroy(&rwq->rq.lock); err_free_rwq_buf: mlx5_free_rwq_buf(rwq, context); err_cleanup_wq: ibv_cleanup_wq(&rwq->wq); err: free(rwq); return NULL; } int mlx5_modify_wq(struct ibv_wq *wq, struct ibv_wq_attr *attr) { struct mlx5_modify_wq cmd = {}; struct mlx5_rwq *rwq = to_mrwq(wq); if ((attr->attr_mask & IBV_WQ_ATTR_STATE) && attr->wq_state == IBV_WQS_RDY) { if ((attr->attr_mask & IBV_WQ_ATTR_CURR_STATE) && attr->curr_wq_state != wq->state) return -EINVAL; if (wq->state == IBV_WQS_RESET) { mlx5_spin_lock(&to_mcq(wq->cq)->lock); __mlx5_cq_clean(to_mcq(wq->cq), rwq->rsc.rsn, NULL); mlx5_spin_unlock(&to_mcq(wq->cq)->lock); mlx5_init_rwq_indices(rwq); rwq->db[MLX5_RCV_DBR] = 0; rwq->db[MLX5_SND_DBR] = 0; } } return ibv_cmd_modify_wq(wq, attr, &cmd.ibv_cmd, sizeof(cmd.ibv_cmd), sizeof(cmd)); } int mlx5_destroy_wq(struct ibv_wq *wq) { struct mlx5_rwq *rwq = to_mrwq(wq); int ret; ret = ibv_cmd_destroy_wq(wq); if (ret) return ret; mlx5_spin_lock(&to_mcq(wq->cq)->lock); __mlx5_cq_clean(to_mcq(wq->cq), rwq->rsc.rsn, NULL); mlx5_spin_unlock(&to_mcq(wq->cq)->lock); mlx5_clear_uidx(to_mctx(wq->context), rwq->rsc.rsn); mlx5_free_db(to_mctx(wq->context), rwq->db); mlx5_spinlock_destroy(&rwq->rq.lock); mlx5_free_rwq_buf(rwq, wq->context); ibv_cleanup_wq(&rwq->wq); free(rwq); return 0; } struct ibv_rwq_ind_table *mlx5_create_rwq_ind_table(struct ibv_context *context, struct ibv_rwq_ind_table_init_attr *init_attr) { struct ibv_create_rwq_ind_table *cmd; struct mlx5_create_rwq_ind_table_resp resp; struct ibv_rwq_ind_table *ind_table; uint32_t required_tbl_size; int num_tbl_entries; int cmd_size; int err; num_tbl_entries = 1 << init_attr->log_ind_tbl_size; /* Data must be u64 aligned */ required_tbl_size = (num_tbl_entries * sizeof(uint32_t)) < sizeof(uint64_t) ? sizeof(uint64_t) : (num_tbl_entries * sizeof(uint32_t)); cmd_size = required_tbl_size + sizeof(*cmd); cmd = calloc(1, cmd_size); if (!cmd) return NULL; memset(&resp, 0, sizeof(resp)); ind_table = calloc(1, sizeof(*ind_table)); if (!ind_table) goto free_cmd; err = ibv_cmd_create_rwq_ind_table(context, init_attr, ind_table, cmd, cmd_size, cmd_size, &resp.ibv_resp, sizeof(resp.ibv_resp), sizeof(resp)); if (err) goto err; free(cmd); return ind_table; err: free(ind_table); free_cmd: free(cmd); return NULL; } int mlx5_destroy_rwq_ind_table(struct ibv_rwq_ind_table *rwq_ind_table) { int ret; ret = ibv_cmd_destroy_rwq_ind_table(rwq_ind_table); if (ret) return ret; free(rwq_ind_table); return 0; }