/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ /* * sol_uverbs.c * * Solaris OFED User Verbs kernel agent module * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void *statep; static ibt_clnt_hdl_t sol_uverbs_ib_clntp = NULL; char *sol_uverbs_dbg_str = "sol_uverbs"; static int sol_uverbs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); static int sol_uverbs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); static int sol_uverbs_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp); static int sol_uverbs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, char *name, caddr_t valuep, int *lengthp); static int sol_uverbs_open(dev_t *devp, int flag, int otyp, cred_t *cred); static int sol_uverbs_close(dev_t dev, int flag, int otyp, cred_t *cred); static int sol_uverbs_poll(dev_t, short, int, short *, struct pollhead **); static int sol_uverbs_read(dev_t dev, struct uio *uiop, cred_t *credp); static int sol_uverbs_mmap(dev_t dev, off_t sol_uverbs_mmap, int prot); static int sol_uverbs_write(dev_t dev, struct uio *uiop, cred_t *credp); static struct cb_ops sol_uverbs_cb_ops = { .cb_open = sol_uverbs_open, .cb_close = sol_uverbs_close, .cb_strategy = nodev, .cb_print = nodev, .cb_dump = nodev, .cb_read = sol_uverbs_read, .cb_write = sol_uverbs_write, .cb_ioctl = nodev, .cb_devmap = nodev, .cb_mmap = sol_uverbs_mmap, .cb_segmap = nodev, .cb_chpoll = sol_uverbs_poll, .cb_prop_op = sol_uverbs_prop_op, .cb_str = NULL, .cb_flag = D_NEW | D_MP, .cb_rev = CB_REV, .cb_aread = nodev, .cb_awrite = nodev }; static struct dev_ops sol_uverbs_dev_ops = { .devo_rev = DEVO_REV, .devo_refcnt = 0, .devo_getinfo = sol_uverbs_getinfo, .devo_identify = nulldev, .devo_probe = nulldev, .devo_attach = sol_uverbs_attach, .devo_detach = sol_uverbs_detach, .devo_reset = nodev, .devo_cb_ops = &sol_uverbs_cb_ops, .devo_bus_ops = NULL, .devo_power = nodev, .devo_quiesce = ddi_quiesce_not_needed }; static struct modldrv modldrv = { .drv_modops = &mod_driverops, .drv_linkinfo = "Solaris User Verbs driver", .drv_dev_ops = &sol_uverbs_dev_ops }; static struct modlinkage modlinkage = { .ml_rev = MODREV_1, .ml_linkage = { [0] = &modldrv, [1] = NULL, } }; /* * User Object Tables for management of user resources. The tables are driver * wide, but each user context maintains a list of the objects it has created * that is used in cleanup. */ sol_ofs_uobj_table_t uverbs_uctxt_uo_tbl; sol_ofs_uobj_table_t uverbs_upd_uo_tbl; sol_ofs_uobj_table_t uverbs_uah_uo_tbl; sol_ofs_uobj_table_t uverbs_umr_uo_tbl; sol_ofs_uobj_table_t uverbs_ucq_uo_tbl; sol_ofs_uobj_table_t uverbs_usrq_uo_tbl; sol_ofs_uobj_table_t uverbs_uqp_uo_tbl; sol_ofs_uobj_table_t uverbs_ufile_uo_tbl; static void sol_uverbs_user_objects_init(void); static void sol_uverbs_user_objects_fini(void); /* * Open Fabric User Verbs API, command table. See ib_user_verbs.h for * definitions. */ static int (*uverbs_cmd_table[])(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) = { [IB_USER_VERBS_CMD_GET_CONTEXT] = sol_uverbs_get_context, [IB_USER_VERBS_CMD_QUERY_DEVICE] = sol_uverbs_query_device, [IB_USER_VERBS_CMD_QUERY_PORT] = sol_uverbs_query_port, [IB_USER_VERBS_CMD_ALLOC_PD] = sol_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = sol_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = sol_uverbs_reg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = sol_uverbs_dereg_mr, [IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] = sol_uverbs_create_comp_channel, [IB_USER_VERBS_CMD_CREATE_CQ] = sol_uverbs_create_cq, [IB_USER_VERBS_CMD_RESIZE_CQ] = sol_uverbs_resize_cq, [IB_USER_VERBS_CMD_POLL_CQ] = sol_uverbs_poll_cq, [IB_USER_VERBS_CMD_REQ_NOTIFY_CQ] = sol_uverbs_req_notify_cq, [IB_USER_VERBS_CMD_DESTROY_CQ] = sol_uverbs_destroy_cq, [IB_USER_VERBS_CMD_CREATE_QP] = sol_uverbs_create_qp, [IB_USER_VERBS_CMD_QUERY_QP] = sol_uverbs_query_qp, [IB_USER_VERBS_CMD_MODIFY_QP] = sol_uverbs_modify_qp, [IB_USER_VERBS_CMD_DESTROY_QP] = sol_uverbs_destroy_qp, [IB_USER_VERBS_CMD_POST_SEND] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_POST_RECV] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_POST_SRQ_RECV] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_CREATE_AH] = sol_uverbs_create_ah, [IB_USER_VERBS_CMD_DESTROY_AH] = sol_uverbs_destroy_ah, [IB_USER_VERBS_CMD_ATTACH_MCAST] = sol_uverbs_attach_mcast, [IB_USER_VERBS_CMD_DETACH_MCAST] = sol_uverbs_detach_mcast, [IB_USER_VERBS_CMD_CREATE_SRQ] = sol_uverbs_create_srq, [IB_USER_VERBS_CMD_MODIFY_SRQ] = sol_uverbs_modify_srq, [IB_USER_VERBS_CMD_QUERY_SRQ] = sol_uverbs_query_srq, [IB_USER_VERBS_CMD_DESTROY_SRQ] = sol_uverbs_destroy_srq, /* TODO - XRC */ [IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = sol_uverbs_dummy_command, [IB_USER_VERBS_CMD_QUERY_GID] = sol_uverbs_query_gid, [IB_USER_VERBS_CMD_QUERY_PKEY] = sol_uverbs_query_pkey, }; /* * Function: * sol_uverbs_hca_open * Input: * mod_ctxt - Pointer to the user verbs module context. * Output: * None * Returns: * Zero on success, else error code. * Description: * Register as a client with the IBT framework and open all of the * HCA's present. */ static int sol_uverbs_hca_open(uverbs_module_context_t *mod_ctxt) { int status; int hca_ndx; #ifdef DEBUG llist_head_t *entry; sol_uverbs_hca_t *temp; #endif mod_ctxt->hca_count = 0; mod_ctxt->hca_guid_list = NULL; mod_ctxt->hcas = NULL; mod_ctxt->clnt_hdl = NULL; mod_ctxt->clnt_modinfo.mi_ibt_version = IBTI_V_CURR; mod_ctxt->clnt_modinfo.mi_clnt_class = IBT_USER; mod_ctxt->clnt_modinfo.mi_async_handler = uverbs_async_event_handler; mod_ctxt->clnt_modinfo.mi_reserved = NULL; mod_ctxt->clnt_modinfo.mi_clnt_name = "sol_uverbs"; status = ibt_attach(&mod_ctxt->clnt_modinfo, mod_ctxt->dip, mod_ctxt, &mod_ctxt->clnt_hdl); if (status != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "hca_open:ibt_attach fail %d", status); status = ENODEV; goto out_err; } if (sol_uverbs_ib_clntp == NULL) sol_uverbs_ib_clntp = mod_ctxt->clnt_hdl; mod_ctxt->hca_count = ibt_get_hca_list(&mod_ctxt->hca_guid_list); if (mod_ctxt->hca_count == 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "hca_open: Zero HCAs on this system!"); status = ENODEV; goto out_err; } if (mod_ctxt->hca_count > SOL_UVERBS_DRIVER_MAX_HCA_MINOR) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "hca_open: HCA count %d exceeds max %d", mod_ctxt->hca_count, SOL_UVERBS_DRIVER_MAX_HCA_MINOR); status = ENODEV; goto out_err; } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "hca_open: No. of HCAs present %d", mod_ctxt->hca_count); mod_ctxt->hcas = kmem_zalloc(mod_ctxt->hca_count * sizeof (sol_uverbs_hca_t), KM_SLEEP); ASSERT(mod_ctxt->hcas != NULL); /* * Note: we open these in the reverse order of the guid list, although * this is technically not required it is done this way so that the * mapping will be in same order as the interfaces. Also note, that we * provide a guid property, and the guid should be used to map a verbs * device to an interface (i.e. don't depend on the order). */ for (hca_ndx = 0; hca_ndx < mod_ctxt->hca_count; hca_ndx++) { status = ibt_open_hca(mod_ctxt->clnt_hdl, mod_ctxt->hca_guid_list[mod_ctxt->hca_count - hca_ndx -1], &mod_ctxt->hcas[hca_ndx].hdl); if (status != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "hca_open: ibt_open_hca() returned %d", status); goto out_err; } mod_ctxt->hcas[hca_ndx].clnt_hdl = mod_ctxt->clnt_hdl; llist_head_init(&mod_ctxt->hcas[hca_ndx].list, &mod_ctxt->hcas[hca_ndx]); mutex_init(&mod_ctxt->hcas[hca_ndx].event_handler_lock, NULL, MUTEX_DRIVER, NULL); llist_head_init(&mod_ctxt->hcas[hca_ndx].event_handler_list, NULL); mutex_init(&mod_ctxt->hcas[hca_ndx].client_data_lock, NULL, MUTEX_DRIVER, NULL); llist_head_init(&mod_ctxt->hcas[hca_ndx].client_data_list, NULL); mutex_enter(&sol_uverbs_hca_lock); llist_add_tail(&mod_ctxt->hcas[hca_ndx].list, &sol_uverbs_hca_list); mutex_exit(&sol_uverbs_hca_lock); mod_ctxt->hcas[hca_ndx].guid = mod_ctxt->hca_guid_list[mod_ctxt->hca_count - hca_ndx -1]; /* * Get a cached copy of the HCA's attributes for easy access. */ status = ibt_query_hca(mod_ctxt->hcas[hca_ndx].hdl, &mod_ctxt->hcas[hca_ndx].attr); if (status != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "hca_open: ibt_query_hca() failed " "(status=%d)", status); goto out_err; } /* Note : GUID is in host order here */ SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "hca_open: HCA index %d, HCA GUID: 0x%016llX", hca_ndx, (u_longlong_t)mod_ctxt->hcas[hca_ndx].guid); } #ifdef DEBUG mutex_enter(&sol_uverbs_hca_lock); list_for_each(entry, &sol_uverbs_hca_list) { temp = (sol_uverbs_hca_t *)entry->ptr; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "HCA list: entry: %p, handle: %p, " "GUID: 0x%016llX", (void *)entry, (void *)temp->hdl, (u_longlong_t)temp->guid); } mutex_exit(&sol_uverbs_hca_lock); #endif return (0); out_err: /* * Note, cleanup of hca list and associated resources is done via * uverbs_hca_close called outside this routine in the case of bad * status. */ return (status); } /* * Function: * sol_uverbs_hca_close * Input: * mod_ctxt - Pointer to the module context. * Output: * None * Returns: * None * Description: * Close all of the IBT HCAs opened by the driver and detach from * the IBT framework. */ static void sol_uverbs_hca_close(uverbs_module_context_t *mod_ctxt) { int hca_ndx; if (mod_ctxt->hcas != NULL) { mutex_enter(&sol_uverbs_hca_lock); llist_head_init(&sol_uverbs_hca_list, NULL); mutex_exit(&sol_uverbs_hca_lock); for (hca_ndx = 0; hca_ndx < mod_ctxt->hca_count; hca_ndx++) { if (mod_ctxt->hcas[hca_ndx].hdl != NULL) { mutex_destroy(&mod_ctxt->hcas[hca_ndx]. event_handler_lock); mutex_destroy(&mod_ctxt->hcas[hca_ndx]. client_data_lock); (void) ibt_close_hca(mod_ctxt-> hcas[hca_ndx].hdl); } } kmem_free(mod_ctxt->hcas, mod_ctxt->hca_count * sizeof (sol_uverbs_hca_t)); mod_ctxt->hcas = NULL; } if ((mod_ctxt->hca_guid_list != NULL) && (mod_ctxt->hca_count > 0)) { ibt_free_hca_list(mod_ctxt->hca_guid_list, mod_ctxt->hca_count); mod_ctxt->hca_count = 0; mod_ctxt->hca_guid_list = NULL; } if (mod_ctxt->clnt_hdl != NULL) { (void) ibt_detach(mod_ctxt->clnt_hdl); mod_ctxt->clnt_hdl = NULL; } } /* * Function: * _init * Input: * None * Output: * None * Returns: * DDI_SUCCESS on success, else error code. * Description: * Perform Solaris OFED user verbs kernel agent driver initialization. */ int _init(void) { int error; error = ddi_soft_state_init(&statep, sizeof (uverbs_module_context_t), 0); if (error != 0) { return (error); } sol_uverbs_user_objects_init(); if (sol_uverbs_common_hca_init()) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_hca_init() failed"); ddi_soft_state_fini(&statep); return (ENODEV); } error = mod_install(&modlinkage); if (error != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs: mod_install failed!!"); sol_uverbs_common_hca_fini(); sol_uverbs_user_objects_fini(); ddi_soft_state_fini(&statep); } return (error); } /* * Function: * _info * Input: * modinfop - Pointer to an opqque modinfo structure. * Output: * modinfop - Updated structure. * Returns: * The mod_info() return code. * Description: * Return information about the loadable module via the mod_info() * kernel function call. */ int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } /* * Function: * _fini * Input: * None * Output: * None * Returns: * DDI_SUCCESS on success, else error code returned by * mod_remove kernel function. * Description: * Perform Solaris OFED user verbs kernel agent driver cleanup. */ int _fini(void) { int rc; rc = mod_remove(&modlinkage); if (!rc) { sol_uverbs_common_hca_fini(); sol_uverbs_user_objects_fini(); } return (rc); } int sol_uverbs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { uverbs_module_context_t *mod_ctxt; int rc, instance, hca_ndx; switch (cmd) { case DDI_ATTACH: break; case DDI_RESUME: return (DDI_SUCCESS); default: return (DDI_FAILURE); } /* * Allocate a soft data structure based on this dev info */ instance = ddi_get_instance(dip); if (instance != 0) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "attach: bad instance number %d", instance); return (DDI_FAILURE); } if (ddi_soft_state_zalloc(statep, instance) != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: bad state zalloc"); return (DDI_FAILURE); } mod_ctxt = ddi_get_soft_state(statep, instance); if (mod_ctxt == NULL) { ddi_soft_state_free(statep, instance); SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: cannot get soft state"); return (DDI_FAILURE); } /* * Save off our private context in the dev_info */ mod_ctxt->dip = dip; ddi_set_driver_private(dip, mod_ctxt); /* * Opening of the hca will perform the ibt_attach and build a list of * devices. */ rc = sol_uverbs_hca_open(mod_ctxt); if (rc) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: sol_uverbs_hca_open() (rc=%d)", rc); goto error; } /* * Export our ABI revision as a property. */ rc = ddi_prop_update_int(makedevice(ddi_driver_major(dip), 0), dip, "abi-version", IB_USER_VERBS_ABI_VERSION); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add abi-version property"); } /* * Create the filesystem device node for each HCA. */ for (hca_ndx = 0; hca_ndx < mod_ctxt->hca_count; hca_ndx++) { char name[20]; (void) snprintf(name, 20, "uverbs%d", hca_ndx); rc = ddi_create_minor_node(dip, name, S_IFCHR, hca_ndx, DDI_PSEUDO, 0); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add character node"); goto error; } rc = ddi_prop_update_int64(makedevice(ddi_driver_major(dip), hca_ndx), dip, "guid", mod_ctxt->hcas[hca_ndx].guid); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add GUID property"); } rc = ddi_prop_update_int(makedevice(ddi_driver_major(dip), hca_ndx), dip, "vendor-id", mod_ctxt->hcas[hca_ndx].attr.hca_vendor_id); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add vendor-id property"); } rc = ddi_prop_update_int(makedevice(ddi_driver_major(dip), hca_ndx), dip, "device-id", mod_ctxt-> hcas[hca_ndx].attr.hca_device_id); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add device-id property"); } } rc = ddi_create_minor_node(dip, "ucma", S_IFCHR, SOL_UVERBS_DRIVER_MAX_HCA_MINOR, DDI_PSEUDO, 0); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add minor for ucma"); goto error; } rc = ddi_create_minor_node(dip, "event", S_IFCHR, SOL_UVERBS_DRIVER_EVENT_MINOR, DDI_PSEUDO, 0); if (rc != DDI_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "attach: could not add minor for events"); goto error; } ddi_report_dev(dip); return (DDI_SUCCESS); error: /* * Cleanup any resources and dettach. */ sol_uverbs_hca_close(mod_ctxt); ddi_soft_state_free(statep, instance); return (rc); } /* * Function: * sol_uverbs_detach * Input: * dip - A pointer to the devices dev_info_t structure. * cmd - Type of detach (DDI_DETACH or DDI_SUSPEND). * Output: * None * Returns: * DDI_SUCCESS on success, else error code. * Description: * Detaches thea driver module and will cause the driver to close * the underlying IBT HCA and detach from the IBT driver. Note * that this call will fail if user verb consumers or ucma have a * sol_uverbs device open. */ static int sol_uverbs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { int instance; uverbs_module_context_t *mod_ctxt; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "detach()"); if (cmd != DDI_DETACH) { return (DDI_FAILURE); } instance = ddi_get_instance(dip); if (instance != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "detach: bad instance number 0x%x", instance); return (DDI_FAILURE); } rw_enter(&uverbs_uctxt_uo_tbl.uobj_tbl_lock, RW_WRITER); if (uverbs_uctxt_uo_tbl.uobj_tbl_uo_cnt > 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "detach(): device in use"); rw_exit(&uverbs_uctxt_uo_tbl.uobj_tbl_lock); return (DDI_FAILURE); } rw_exit(&uverbs_uctxt_uo_tbl.uobj_tbl_lock); mod_ctxt = ddi_get_soft_state(statep, instance); /* * Sanity check, do not detach if other kernel agents * are still using sol_uverbs IBT handles. */ mutex_enter(&sol_uverbs_hca_lock); if (!llist_empty(&sol_uverbs_client_list)) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "detach: agents still registered"); mutex_exit(&sol_uverbs_hca_lock); return (DDI_FAILURE); } mutex_exit(&sol_uverbs_hca_lock); /* * Hca close will perform the detach from IBTF. */ sol_uverbs_hca_close(mod_ctxt); ddi_soft_state_free(statep, instance); ddi_remove_minor_node(dip, NULL); return (DDI_SUCCESS); } /* * Function: * sol_uverbs_getinfo * Input: * dip - Deprecated, do not use. * cmd - Command argument (DDI_INFO_DEVT2DEVINFO or * DDI_INFO_DEVT2INSTANCE). * arg - Command specific argument. * resultp - Pointer to place results. * Output: * resultp - Location is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * Depending on the request (cmd) return either the dev_info_t pointer * associated with the dev_info_t specified, or the instance. Note * that we have only a single instance. */ /* ARGSUSED */ static int sol_uverbs_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) { uverbs_module_context_t *mod_ctxt; switch (cmd) { case DDI_INFO_DEVT2DEVINFO: mod_ctxt = ddi_get_soft_state(statep, 0); if (!mod_ctxt) { return (DDI_FAILURE); } *resultp = (void *)mod_ctxt->dip; return (DDI_SUCCESS); case DDI_INFO_DEVT2INSTANCE: *resultp = 0; return (DDI_SUCCESS); default: return (DDI_FAILURE); } } /* * Function: * sol_uverbs_prop_op * Input: * dev - The device number associated with this device. * dip - A pointer to the device information structure for this device. * prop_op - Property operator (PROP_LEN, PROP_LEN_AND_VAL_BUF, or * PROP_LEN_AND_VAL_ALLOC). * flags - Only possible flag value is DDI_PROP_DONTPASS. * name - Pointer to the property to be interrogated. * valuep - Address of pointer if ALLOC, otherwise a pointer to the * users buffer. * lengthp - Pointer to update with property length. * Output: * valuep - Updated with the property value. * lenghtp - Updated with the property length. * Returns: * DDI_SUCCESS on success, else error code. * Description: * Driver entry point to report the values of certain properties of the * driver or device. */ static int sol_uverbs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, char *name, caddr_t valuep, int *lengthp) { return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp)); } static uverbs_uctxt_uobj_t *sol_uverbs_alloc_uctxt(dev_t *, uverbs_module_context_t *, minor_t); /* * Function: * sol_uverbs_open * Input: * devp - A pointer to the device number. * flag - Flags specified by caller (FEXCL, FNDELAY, FREAD, FWRITE). * otyp - Open type (OTYP_BLK, OTYP_CHR, OTYP_LYR). * cred - Pointer to the callers credentials. * Output: * devp - On success devp has been cloned to point to a unique minor * device. * Returns: * DDI_SUCCESS on success, else error code. * Description: * Handles a user process open of a specific user verbs minor device by * allocating a user context user object and creating a unique device * to identify the user. Note: The first SOL_UVERBS_DRIVER_MAX_MINOR * minor numbers are reserved for : * 0 to SOL_UVERBS_DRIVER_MAX_HCA_MINOR - 1 : actual HCA devices * SOL_UVERBS_DRIVER_MAX_HCA_MINOR : UCMA node * SOL_UVERBS_DRIVER_EVENT_MINOR : * Event file for opening an event file for completion * or async notifications. */ /* ARGSUSED */ static int sol_uverbs_open(dev_t *devp, int flag, int otyp, cred_t *cred) { uverbs_module_context_t *mod_ctxt; uverbs_uctxt_uobj_t *uctxt; int minor; /* Char only */ if (otyp != OTYP_CHR) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "open: not CHR"); return (EINVAL); } mod_ctxt = ddi_get_soft_state(statep, 0); if (mod_ctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "open: get soft state failed"); return (ENXIO); } minor = getminor(*devp); /* * Special case of ucma module. */ if (minor == SOL_UVERBS_DRIVER_MAX_HCA_MINOR) { extern cred_t *kcred; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "open: ucma_open"); if (cred != kcred) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "open: ucma_open non-kernel context"); return (ENOTSUP); } return (DDI_SUCCESS); } /* * If this is not an open for sol_uverbs event file, * A device minor number must be less than the user verb max * minor device number and the HCA count. */ if (minor != SOL_UVERBS_DRIVER_EVENT_MINOR && (minor >= SOL_UVERBS_DRIVER_MAX_HCA_MINOR || minor >= mod_ctxt->hca_count)) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "open: bad minor %d", minor); return (ENODEV); } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "open() - minor %d", minor); /* * Allocate a user context and return a unique ID that can be used * in identify the new user context object. Create a clone device * that uses this unique ID as the minor number. Allocation of the * user context object places one reference against it; which will * be held until the device is closed. * * sol_uverbs_alloc_uctxt() returns a sucessful allocation of uctx * with the uobj uo_lock held for WRITTER. */ uctxt = sol_uverbs_alloc_uctxt(devp, mod_ctxt, minor); if (!uctxt) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "open: user context alloc failed"); return (ENODEV); } /* * Indicate the object is alive and release the user object write lock * which was placed on the user context at allocation. */ uctxt->uobj.uo_live = 1; rw_exit(&uctxt->uobj.uo_lock); return (DDI_SUCCESS); } /* * Function: * sol_uverbs_close * Input: * dev - Device number. * flag - File status flag. * otyp - Open type. * cred - A pointer to the callers credientials. * Output: * None * Returns: * DDI_SUCCESS on success, else error code. * Description: * Handles a user process close of a specific user verbs minor device by * freeing any user objects this process may still have allocated and * deleting the associated user context object. */ /* ARGSUSED */ static int sol_uverbs_close(dev_t dev, int flag, int otyp, cred_t *cred) { minor_t id = getminor(dev); genlist_entry_t *entry, *new_entry; uverbs_uctxt_uobj_t *uctxt; int rc; genlist_t tmp_genlist; /* * HCA specific device nodes created during attach are been * closed. Return SUCCESS. */ if (id < SOL_UVERBS_DRIVER_MAX_MINOR) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_close: dev_t %x, minor %x < %x", dev, id, SOL_UVERBS_DRIVER_MAX_MINOR); return (0); } /* * Must be a user or kernel open, i.e. not a minor node that * that represents a user verbs device. If it is the UCMA * nothing needs to be done. */ if (id == SOL_UVERBS_DRIVER_MAX_HCA_MINOR) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_close: ucma close"); return (DDI_SUCCESS); } uctxt = uverbs_uobj_get_uctxt_write(id - SOL_UVERBS_DRIVER_MAX_MINOR); if (uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_close: Unknown user context"); return (ENXIO); } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_close- " "uctxt %p", uctxt); /* * Remove from the user context resource table, cleanup all * user resources that may still be hanging around. */ if (!sol_ofs_uobj_remove(&uverbs_uctxt_uo_tbl, &uctxt->uobj)) { /* * It was already removed, drop the lock held from * get above and exit. */ sol_ofs_uobj_put(&uctxt->uobj); return (ENXIO); } if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC || uctxt->uctxt_type == SOL_UVERBS_UCTXT_COMPL) { uverbs_uctxt_uobj_t *verbs_uctxt; SOL_OFS_DPRINTF_L4(sol_uverbs_dbg_str, "uverbs_close: Async or Compl user context"); /* * Verbs uctxt has already been freed, just return. */ if (!uctxt->uctxt_verbs_id) { sol_ofs_uobj_put(&uctxt->uobj); sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free); return (0); } /* * Verbs uctxt has not been freed. Close the ufile. This * also frees the ufile if reference count is 0. */ verbs_uctxt = uverbs_uobj_get_uctxt_write( uctxt->uctxt_verbs_id - SOL_UVERBS_DRIVER_MAX_MINOR); if (verbs_uctxt && uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC) { sol_uverbs_event_file_close(verbs_uctxt->async_evfile); verbs_uctxt->async_evfile = NULL; } else if (uctxt->comp_evfile) { uctxt->comp_evfile = NULL; } if (verbs_uctxt) sol_ofs_uobj_put(&verbs_uctxt->uobj); sol_ofs_uobj_put(&uctxt->uobj); sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free); return (0); } else if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_EVENT) { sol_ofs_uobj_put(&uctxt->uobj); sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free); return (0); } ASSERT(uctxt->hca != NULL); /* * Release resources that may still be held by this user context. * Remove the resources from the associated resource managment * table and free it. */ mutex_enter(&uctxt->lock); entry = remove_genlist_head(&uctxt->ah_list); while (entry) { uverbs_uah_uobj_t *uah = (uverbs_uah_uobj_t *)entry->data; rw_enter(&(uah->uobj.uo_lock), RW_WRITER); (void) sol_ofs_uobj_remove(&uverbs_uah_uo_tbl, &uah->uobj); rw_exit(&(uah->uobj.uo_lock)); (void) ibt_free_ah(uctxt->hca->hdl, uah->ah); sol_ofs_uobj_free(&uah->uobj); kmem_free((void *)entry, sizeof (genlist_entry_t)); entry = remove_genlist_head(&uctxt->ah_list); } init_genlist(&tmp_genlist); entry = remove_genlist_head(&uctxt->qp_list); while (entry) { uverbs_uqp_uobj_t *uqp = (uverbs_uqp_uobj_t *)entry->data; /* Free unreaped asynchronous events. */ uverbs_release_uqp_uevents(uctxt->async_evfile, uqp); /* * If ucma has disabled QP free for this QP, set the * uqp_free_state to FREE_PENDING. Free QP if not. */ rw_enter(&(uqp->uobj.uo_lock), RW_WRITER); if (uqp->uqp_free_state != SOL_UVERBS2UCMA_ENABLE_QP_FREE) { new_entry = add_genlist(&tmp_genlist, entry->data, entry->data_context); uqp->list_entry = new_entry; uqp->uqp_free_state = SOL_UVERBS2UCMA_FREE_PENDING; rw_exit(&(uqp->uobj.uo_lock)); } else { uqp->list_entry = NULL; mutex_exit(&uctxt->lock); sol_ofs_uobj_ref(&uqp->uobj); rc = uverbs_uqp_free(uqp, uctxt); mutex_enter(&uctxt->lock); if (rc) SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uqp_free(%p) failed", uqp); } kmem_free(entry, sizeof (genlist_entry_t)); entry = remove_genlist_head(&uctxt->qp_list); } (uctxt->qp_list).count = tmp_genlist.count; (uctxt->qp_list).head = tmp_genlist.head; (uctxt->qp_list).tail = tmp_genlist.tail; init_genlist(&tmp_genlist); entry = remove_genlist_head(&uctxt->cq_list); while (entry) { uverbs_ucq_uobj_t *ucq = (uverbs_ucq_uobj_t *)entry->data; rw_enter(&(ucq->uobj.uo_lock), RW_WRITER); /* Free events associated with the CQ. */ uverbs_release_ucq_channel(uctxt, ucq->comp_chan, ucq); if (ucq->active_qp_cnt) { new_entry = add_genlist(&tmp_genlist, entry->data, entry->data_context); ucq->list_entry = new_entry; ucq->free_pending = 1; rw_exit(&(ucq->uobj.uo_lock)); } else { ucq->list_entry = NULL; sol_ofs_uobj_ref(&ucq->uobj); mutex_exit(&uctxt->lock); rc = uverbs_ucq_free(ucq, uctxt); mutex_enter(&uctxt->lock); if (rc) SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "ucq_free(%p) failed", ucq); } kmem_free((void *)entry, sizeof (genlist_entry_t)); entry = remove_genlist_head(&uctxt->cq_list); } (uctxt->cq_list).count = tmp_genlist.count; (uctxt->cq_list).head = tmp_genlist.head; (uctxt->cq_list).tail = tmp_genlist.tail; init_genlist(&tmp_genlist); entry = remove_genlist_head(&uctxt->srq_list); while (entry) { uverbs_usrq_uobj_t *usrq = (uverbs_usrq_uobj_t *)entry->data; rw_enter(&(usrq->uobj.uo_lock), RW_WRITER); /* Free unreaped asynchronous events. */ uverbs_release_usrq_uevents(uctxt->async_evfile, usrq); if (usrq->active_qp_cnt) { new_entry = add_genlist(&tmp_genlist, entry->data, entry->data_context); usrq->list_entry = new_entry; usrq->free_pending = 1; rw_exit(&(usrq->uobj.uo_lock)); } else { usrq->list_entry = NULL; sol_ofs_uobj_ref(&usrq->uobj); mutex_exit(&uctxt->lock); rc = uverbs_usrq_free(usrq, uctxt); mutex_enter(&uctxt->lock); if (rc) SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "usrq_free(%p) failed", usrq); } kmem_free((void *)entry, sizeof (genlist_entry_t)); entry = remove_genlist_head(&uctxt->srq_list); } (uctxt->srq_list).count = tmp_genlist.count; (uctxt->srq_list).head = tmp_genlist.head; (uctxt->srq_list).tail = tmp_genlist.tail; entry = remove_genlist_head(&uctxt->mr_list); while (entry) { uverbs_umr_uobj_t *umr = (uverbs_umr_uobj_t *)entry->data; rw_enter(&(umr->uobj.uo_lock), RW_WRITER); (void) sol_ofs_uobj_remove(&uverbs_umr_uo_tbl, &umr->uobj); rw_exit(&(umr->uobj.uo_lock)); (void) ibt_deregister_mr(uctxt->hca->hdl, umr->mr); sol_ofs_uobj_free(&umr->uobj); kmem_free((void *)entry, sizeof (genlist_entry_t)); entry = remove_genlist_head(&uctxt->mr_list); } entry = remove_genlist_head(&uctxt->pd_list); while (entry) { uverbs_upd_uobj_t *upd = (uverbs_upd_uobj_t *)entry->data; rw_enter(&(upd->uobj.uo_lock), RW_WRITER); if (upd->active_qp_cnt) { new_entry = add_genlist(&tmp_genlist, entry->data, entry->data_context); upd->list_entry = new_entry; upd->free_pending = 1; rw_exit(&(upd->uobj.uo_lock)); } else { upd->list_entry = NULL; sol_ofs_uobj_ref(&upd->uobj); mutex_exit(&uctxt->lock); rc = uverbs_upd_free(upd, uctxt); mutex_enter(&uctxt->lock); if (rc) SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "upd_free(%p) failed", upd); } kmem_free((void *)entry, sizeof (genlist_entry_t)); entry = remove_genlist_head(&uctxt->pd_list); } (uctxt->pd_list).count = tmp_genlist.count; (uctxt->pd_list).head = tmp_genlist.head; (uctxt->pd_list).tail = tmp_genlist.tail; mutex_exit(&uctxt->lock); /* * Release the user file structure to the async file if it * has not be released yet. The uctxt for async file will * be closed when the async file is closed. */ if (uctxt->async_evfile) { uverbs_uctxt_uobj_t *async_uctxt; async_uctxt = uverbs_uobj_get_uctxt_write( uctxt->uctxt_async_id - SOL_UVERBS_DRIVER_MAX_MINOR); if (!async_uctxt) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_close: Invalid async_id %x", uctxt->uctxt_async_id); sol_ofs_uobj_put(&uctxt->uobj); return (ENXIO); } async_uctxt->uctxt_verbs_id = 0; sol_uverbs_event_file_close(uctxt->async_evfile); uctxt->async_evfile = NULL; sol_ofs_uobj_put(&async_uctxt->uobj); } /* * Release the write lock and the reference from the get above, and * release the reference placed on the user context as process open * to release context. */ sol_ofs_uobj_put(&uctxt->uobj); /* * If some QPs have not been freed, donot free the uctxt. * Set uctxt_free_pending flag. This will be freed when * the QP will be freed. */ if ((uctxt->qp_list).count) { SOL_OFS_DPRINTF_L3(sol_uverbs_dbg_str, "close: uctxt %p, has pending uqp", uctxt); uctxt->uctxt_free_pending = 1; return (0); } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "close: deallocated user context: %p, ref = %d", (void *)uctxt, uctxt->uobj.uo_refcnt); sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free); return (0); } /* * Function: * sol_uverbs_read * Input: * dev - Device number. * uiop - Pointer to the uio structgure where data is to be stored. * credp - A pointer to the credentials for the I/O transaction. * Output: * None * Returns: * DDI_SUCCESS on success, else error code. * Description: * User process read stub. */ static int sol_uverbs_read(dev_t dev, struct uio *uiop, cred_t *credp) { minor_t id = getminor(dev); uverbs_uctxt_uobj_t *uctxt, *verbs_uctxt; int rc; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_read(%x, %p, %p)", dev, uiop, credp); ASSERT(id >= SOL_UVERBS_DRIVER_MAX_MINOR); uctxt = uverbs_uobj_get_uctxt_read(id - SOL_UVERBS_DRIVER_MAX_MINOR); if (uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_read: Failed get user context"); return (ENXIO); } if (uctxt->uctxt_verbs_id < SOL_UVERBS_DRIVER_MAX_MINOR) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_read: Invalid Verbs user context id, %x", uctxt->uctxt_verbs_id); sol_ofs_uobj_put(&uctxt->uobj); return (ENXIO); } verbs_uctxt = uverbs_uobj_get_uctxt_read(uctxt->uctxt_verbs_id - SOL_UVERBS_DRIVER_MAX_MINOR); if (verbs_uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_read: Failed get verbs user context"); sol_ofs_uobj_put(&uctxt->uobj); return (ENXIO); } if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC) { ASSERT(verbs_uctxt->async_evfile); rc = sol_uverbs_event_file_read(verbs_uctxt->async_evfile, uiop, credp); } else if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_COMPL) { rc = sol_uverbs_event_file_read(uctxt->comp_evfile, uiop, credp); } else { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_read: invalid user context type %x", uctxt->uctxt_type); rc = ENXIO; } sol_ofs_uobj_put(&verbs_uctxt->uobj); sol_ofs_uobj_put(&uctxt->uobj); return (rc); } /* * Function: * sol_uverbs_mmap * Input: * dev - Device whose memory is to be mapped. * sol_uverbs_mmap - Offset within the device memory at which mapping * begins. * prot - Bitmask specifying protection. * Output: * None * Returns: * DDI_SUCCESS on success, else error code. * Description: * User process mmap stub. Mmap operations are performed directly * by the underlying IB HCA driver, bypassing the user verbs. */ /* ARGSUSED */ static int sol_uverbs_mmap(dev_t dev, off_t mmap_offset, int prot) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "sol_uverbs_mmap(%d)- not yet used", mmap_offset); return (DDI_SUCCESS); } /* * Function: * sol_uverbs_get_context * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to return the unique user context to the process * that opened the associated user verb driver instance. Note that upon * entry a reference will have already been placed on the user * context user space object, so an additional reference is not * required here. */ int sol_uverbs_get_context(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct mthca_alloc_ucontext_resp uresp; struct ib_uverbs_get_context cmd; struct ib_uverbs_get_context_resp resp; struct ib_udata udata; int rc; minor_t async_id; uverbs_uctxt_uobj_t *async_uctxt; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_get_context() - buf %p, sizeof (cmd) %d", buf, sizeof (cmd)); ASSERT(uctxt->hca); (void) memcpy(&cmd, buf, sizeof (cmd)); udata.inbuf = (void *)(buf + sizeof (cmd)); #ifdef _LP64 udata.outbuf = (void *)(cmd.response.r_laddr + sizeof (resp)); #else udata.outbuf = (void *)(cmd.response.r_addr + sizeof (resp)); #endif udata.inlen = in_len - sizeof (cmd); udata.outlen = out_len - sizeof (resp); /* * libibverbs will have passed minor of the async file in * resp.fd. Use this to determine the uctxt created for * asyncs. */ #ifdef _LP64 rc = copyin((void*)cmd.response.r_laddr, (void*)&resp, sizeof (resp)); #else rc = copyin((void*)cmd.response.r_addr, (void*)&resp, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: copyin (rc=%d)", rc); rc = EFAULT; goto out; } async_id = resp.async_fd; if (async_id < SOL_UVERBS_DRIVER_MAX_MINOR) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: Invalid async user context " "id %x", async_id); return (ENXIO); } async_uctxt = uverbs_uobj_get_uctxt_read(async_id - SOL_UVERBS_DRIVER_MAX_MINOR); if (async_uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: Failed get async user context"); return (ENXIO); } if (async_uctxt->uctxt_type != SOL_UVERBS_UCTXT_EVENT || async_uctxt->uctxt_verbs_id != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: Invalid user context - " "possibly reused"); return (ENXIO); } async_uctxt->uctxt_type = SOL_UVERBS_UCTXT_ASYNC; async_uctxt->uctxt_verbs_id = uctxt->uobj.uo_id + SOL_UVERBS_DRIVER_MAX_MINOR; uctxt->uctxt_async_id = async_id; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "get_context: uctxt %p, async_uctxt %p, async_id %x", uctxt, async_uctxt, async_id); sol_ofs_uobj_put(&async_uctxt->uobj); uctxt->async_evfile = uverbs_alloc_event_file(uctxt, 1); if (!uctxt->async_evfile) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: async event file allocation failed"); goto out; } (void) memset(&resp, 0, sizeof (resp)); resp.num_comp_vectors = 1; #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: copyout (rc=%d)", rc); rc = EFAULT; goto out; } /* * This unfortunately is Mellanox specific, we need to consider moving * this directly into the command response as opaque data, instead of * using this method. */ (void) memset(&uresp, 0, sizeof (uresp)); uresp.uarc_size = 0; uresp.qp_tab_size = uctxt->hca->attr.hca_max_chans; rc = copyout((void*)&uresp, (void*)udata.outbuf, sizeof (uresp)); if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "get_context: copyout outbuf (rc=%d)", rc); rc = EFAULT; goto out; } rc = DDI_SUCCESS; out: return (rc); } /* * Function: * sol_uverbs_alloc_pd * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing a alloc PD command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to allocate a device protection domain. */ /* ARGSUSED */ int sol_uverbs_alloc_pd(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_alloc_pd cmd; struct ib_uverbs_alloc_pd_resp resp; uverbs_upd_uobj_t *upd; int rc; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "alloc_pd()"); (void) memcpy(&cmd, buf, sizeof (cmd)); (void) memset(&resp, 0, sizeof (resp)); upd = kmem_zalloc(sizeof (*upd), KM_NOSLEEP); if (upd == NULL) { rc = ENOMEM; goto out; } sol_ofs_uobj_init(&upd->uobj, 0, SOL_UVERBS_UPD_UOBJ_TYPE); rw_enter(&upd->uobj.uo_lock, RW_WRITER); rc = ibt_alloc_pd(uctxt->hca->hdl, IBT_PD_NO_FLAGS, &upd->pd); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "alloc_pd: ibt_alloc_pd() (rc=%d)", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); upd->uobj.uo_uobj_sz = sizeof (uverbs_upd_uobj_t); goto alloc_err; } if (sol_ofs_uobj_add(&uverbs_upd_uo_tbl, &upd->uobj) != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "alloc_pd: User object add failed"); rc = ENOMEM; goto err_add_uobj; } resp.pd_handle = upd->uobj.uo_id; /* * Query underlying hardware driver for data that may be required * when using the PD in an OS Bypass creation of UD address vectors. */ rc = ibt_ci_data_out(uctxt->hca->hdl, IBT_CI_NO_FLAGS, IBT_HDL_PD, (void *)upd->pd, &resp.drv_out, sizeof (resp.drv_out)); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "alloc_pd: ibt_ci_data_out() (rc=%d)", rc); rc = EFAULT; goto err_response; } #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "alloc_pd: copyout fail (rc=%d)", rc); rc = EFAULT; goto err_response; } mutex_enter(&uctxt->lock); upd->list_entry = add_genlist(&uctxt->pd_list, (uintptr_t)upd, uctxt); mutex_exit(&uctxt->lock); if (!upd->list_entry) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "alloc_pd: Error adding upd to pd_list\n"); rc = ENOMEM; goto err_response; } upd->uobj.uo_live = 1; rw_exit(&upd->uobj.uo_lock); return (DDI_SUCCESS); err_response: /* * Need to set uo_live, so sol_ofs_uobj_remove() will * remove the object from the object table. */ upd->uobj.uo_live = 1; (void) sol_ofs_uobj_remove(&uverbs_upd_uo_tbl, &upd->uobj); err_add_uobj: (void) ibt_free_pd(uctxt->hca->hdl, upd->pd); alloc_err: rw_exit(&upd->uobj.uo_lock); sol_ofs_uobj_deref(&upd->uobj, sol_ofs_uobj_free); out: SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "alloc_pd:error (rc=%d)", rc); return (rc); } int uverbs_upd_free(uverbs_upd_uobj_t *upd, uverbs_uctxt_uobj_t *uctxt) { int rc; rc = ibt_free_pd(uctxt->hca->hdl, upd->pd); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_upd_free: ibt_free_pd() failed %d", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); sol_ofs_uobj_put(&upd->uobj); return (rc); } /* * Remove from the list of this contexts PD resources, then remove from * the resource managment table and the reference placed on the user * object at PD allocation. */ upd->pd = NULL; if (upd->list_entry) { mutex_enter(&uctxt->lock); delete_genlist(&uctxt->pd_list, upd->list_entry); mutex_exit(&uctxt->lock); } /* * list_entry is NULL when called from sol_uverbs_close. Remove * from upd_uo_tbl and free upd, when called from close also. */ sol_ofs_uobj_put(&upd->uobj); (void) sol_ofs_uobj_remove(&uverbs_upd_uo_tbl, &upd->uobj); sol_ofs_uobj_deref(&upd->uobj, sol_ofs_uobj_free); return (0); } /* * Function: * sol_uverbs_dealloc_pd * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing dealloc PD command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to de-allocate a device protection domain. */ /* ARGSUSED */ int sol_uverbs_dealloc_pd(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_dealloc_pd cmd; uverbs_upd_uobj_t *upd; int rc = 0; (void) memcpy(&cmd, buf, sizeof (cmd)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "dealloc_pd(%d)", cmd.pd_handle); upd = uverbs_uobj_get_upd_write(cmd.pd_handle); if (upd == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "dealloc_pd(%d) : invalid hdl", cmd.pd_handle); rc = EINVAL; goto err_out1; } if (upd->active_qp_cnt) { sol_ofs_uobj_put(&upd->uobj); rc = EBUSY; } else { rc = uverbs_upd_free(upd, uctxt); } cmd.pd_handle = 0; return (rc); err_out1: return (rc); } /* * Function: * sol_uverbs_query_device * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing query device command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to query device attributes. */ /* ARGSUSED */ int sol_uverbs_query_device(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_query_device cmd; struct ib_uverbs_query_device_resp resp; ibt_hca_attr_t hca_attr; int rc; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_device()"); (void) memcpy(&cmd, buf, sizeof (cmd)); rc = ibt_query_hca(uctxt->hca->hdl, &hca_attr); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_device: ibt_query_hca() (rc=%d)", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); goto out; } (void) memset(&resp, 0, sizeof (resp)); resp.fw_ver = ((uint64_t)hca_attr.hca_fw_major_version << 32) | ((uint64_t)hca_attr.hca_fw_minor_version << 16) | ((uint64_t)hca_attr.hca_fw_micro_version); /* * NOTE: node guid and system image guid must be returned in big * endian (network order). On solaris these are in host * order, so we swap it back here. */ resp.node_guid = htonll(hca_attr.hca_node_guid); resp.sys_image_guid = htonll(hca_attr.hca_si_guid); resp.max_mr_size = hca_attr.hca_max_memr_len; resp.page_size_cap = sol_uverbs_ibt_to_of_page_sz(hca_attr.hca_page_sz); resp.vendor_id = hca_attr.hca_vendor_id; resp.vendor_part_id = hca_attr.hca_device_id; resp.hw_ver = hca_attr.hca_version_id; resp.max_qp = hca_attr.hca_max_chans; resp.max_qp_wr = hca_attr.hca_max_chan_sz; resp.device_cap_flags = sol_uverbs_ibt_to_of_device_cap_flags(hca_attr.hca_flags, hca_attr.hca_flags2); resp.max_sge = hca_attr.hca_max_sgl; resp.max_sge_rd = hca_attr.hca_max_sgl; resp.max_cq = hca_attr.hca_max_cq; resp.max_cqe = hca_attr.hca_max_cq_sz; resp.max_mr = hca_attr.hca_max_memr; resp.max_pd = hca_attr.hca_max_pd; resp.max_qp_rd_atom = hca_attr.hca_max_rdma_in_chan; resp.max_ee_rd_atom = 0; resp.max_res_rd_atom = hca_attr.hca_max_rsc; resp.max_qp_init_rd_atom = hca_attr.hca_max_rdma_out_chan; resp.max_ee_init_rd_atom = 0; if (hca_attr.hca_flags & IBT_HCA_ATOMICS_GLOBAL) { resp.atomic_cap = IB_ATOMIC_GLOB; } else if (hca_attr.hca_flags & IBT_HCA_ATOMICS_HCA) { resp.atomic_cap = IB_ATOMIC_HCA; } else { resp.atomic_cap = IB_ATOMIC_NONE; } resp.max_ee = 0; resp.max_rdd = 0; resp.max_mw = hca_attr.hca_max_mem_win; resp.max_raw_ipv6_qp = hca_attr.hca_max_ipv6_chan; resp.max_raw_ethy_qp = hca_attr.hca_max_ether_chan; resp.max_mcast_grp = hca_attr.hca_max_mcg; resp.max_mcast_qp_attach = hca_attr.hca_max_chan_per_mcg; resp.max_total_mcast_qp_attach = hca_attr.hca_max_mcg_chans; resp.max_ah = hca_attr.hca_max_ud_dest; resp.max_fmr = hca_attr.hca_max_fmrs; resp.max_map_per_fmr = 0; resp.max_srq = hca_attr.hca_max_srqs; resp.max_srq_wr = hca_attr.hca_max_srqs_sz; resp.max_srq_sge = hca_attr.hca_max_srq_sgl; resp.max_pkeys = hca_attr.hca_max_port_pkey_tbl_sz; resp.local_ca_ack_delay = hca_attr.hca_local_ack_delay; resp.phys_port_cnt = hca_attr.hca_nports; #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_device: Error writing resp data (rc=%d)", rc); rc = EFAULT; goto out; } rc = DDI_SUCCESS; out: return (rc); } /* * Function: * sol_uverbs_query_port * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing query port command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to query a device port attributes. */ /* ARGSUSED */ int sol_uverbs_query_port(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_query_port cmd; struct ib_uverbs_query_port_resp resp; ibt_hca_portinfo_t *port_info; uint_t port_info_n; uint_t port_info_size; int rc; (void) memcpy(&cmd, buf, sizeof (cmd)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "sol_uverbs_query_port: %d", cmd.port_num); if (!cmd.port_num || cmd.port_num > uctxt->hca->attr.hca_nports) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_port: Invalid port specified"); rc = EINVAL; goto out; } rc = ibt_query_hca_ports(uctxt->hca->hdl, cmd.port_num, &port_info, &port_info_n, &port_info_size); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_port: ibt_query_hca_ports() (rc=%d)", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); goto out; } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_port: " "port_num %d, port_info %x, lid %x, sm_lid %x", cmd.port_num, port_info, port_info->p_opaque1, port_info->p_sm_lid); (void) memset(&resp, 0, sizeof (resp)); resp.state = port_info->p_linkstate; resp.max_mtu = port_info->p_mtu; resp.active_mtu = port_info->p_mtu; resp.gid_tbl_len = port_info->p_sgid_tbl_sz; resp.port_cap_flags = port_info->p_capabilities; resp.max_msg_sz = port_info->p_msg_sz; resp.bad_pkey_cntr = port_info->p_pkey_violations; resp.qkey_viol_cntr = port_info->p_qkey_violations; resp.pkey_tbl_len = port_info->p_pkey_tbl_sz; resp.lid = port_info->p_opaque1; resp.sm_lid = port_info->p_sm_lid; resp.lmc = port_info->p_lmc; resp.max_vl_num = port_info->p_max_vl; resp.sm_sl = port_info->p_sm_sl; resp.subnet_timeout = port_info->p_subnet_timeout; resp.init_type_reply = port_info->p_init_type_reply; resp.active_width = port_info->p_width_active; resp.active_speed = port_info->p_speed_active; resp.phys_state = port_info->p_phys_state; ibt_free_portinfo(port_info, port_info_size); #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_port : copyout fail %x", rc); rc = EFAULT; goto out; } rc = DDI_SUCCESS; out: return (rc); } /* * Function: * sol_uverbs_query_gid * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing query gid command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to query the device gid for the specified * port and gid index. */ /* ARGSUSED */ int sol_uverbs_query_gid(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_query_gid cmd; struct ib_uverbs_query_gid_resp resp; ibt_hca_portinfo_t *port_info; uint_t port_info_n; uint_t port_info_size; int rc; uint64_t temp; (void) memcpy(&cmd, buf, sizeof (cmd)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_gid() : port_num %x, gid_index %x", cmd.port_num, cmd.gid_index); if (!cmd.port_num || cmd.port_num > uctxt->hca->attr.hca_nports) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_gid: Invalid port specified"); rc = EINVAL; goto out; } rc = ibt_query_hca_ports(uctxt->hca->hdl, cmd.port_num, &port_info, &port_info_n, &port_info_size); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_gid: ibt_query_hca_ports() (rc=%d)", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); goto out; } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "number of " "gid entries %d", cmd.port_num, cmd.gid_index, port_info->p_sgid_tbl_sz); if (cmd.gid_index >= port_info->p_sgid_tbl_sz) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_gid: cmd gid_index %x > port_info sz %x", cmd.gid_index, port_info->p_sgid_tbl_sz); rc = EINVAL; ibt_free_portinfo(port_info, port_info_size); goto out; } /* * The gid must be returned as a network ordered byte array, on solaris * it is a structure in host order so we swap the components as needed. */ temp = htonll(port_info->p_sgid_tbl[cmd.gid_index].gid.ucast_gid. ugid_prefix); (void) memcpy(&resp.gid[0], &temp, sizeof (temp)); temp = htonll(port_info->p_sgid_tbl[cmd.gid_index].gid.ucast_gid. ugid_guid); (void) memcpy(&resp.gid[8], &temp, sizeof (temp)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_gid: gid = " "0x%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:" "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:", resp.gid[0], resp.gid[1], resp.gid[2], resp.gid[3], resp.gid[4], resp.gid[5], resp.gid[6], resp.gid[7], resp.gid[8], resp.gid[9], resp.gid[10], resp.gid[11], resp.gid[12], resp.gid[13], resp.gid[14], resp.gid[15]); ibt_free_portinfo(port_info, port_info_size); #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_gid: copyout %d", rc); rc = EFAULT; goto out; } rc = DDI_SUCCESS; out: return (rc); } /* * Function: * sol_uverbs_query_pkey * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing a query pkey command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to query a device for the pkey at the specified * port and pkey index. */ /* ARGSUSED */ int sol_uverbs_query_pkey(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_query_pkey cmd; struct ib_uverbs_query_pkey_resp resp; ibt_hca_portinfo_t *port_info; uint_t port_info_n; uint_t port_info_size; int rc; (void) memcpy(&cmd, buf, sizeof (cmd)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_pkey: entry, port = %d, pkey index = %d", cmd.port_num, cmd.pkey_index); if (!cmd.port_num || cmd.port_num > uctxt->hca->attr.hca_nports) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_pkey: Invalid port specified"); rc = EINVAL; goto out; } rc = ibt_query_hca_ports(uctxt->hca->hdl, cmd.port_num, &port_info, &port_info_n, &port_info_size); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_pkey: ibt_query_hca_ports() %d", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); goto out; } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_pkey: port %d, requested index %d, number of pkey entries " "%d", cmd.port_num, cmd.pkey_index, port_info->p_pkey_tbl_sz); if (cmd.pkey_index >= port_info->p_pkey_tbl_sz) { SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_pkey: Invalid index %d, table size = %d", cmd.pkey_index, port_info->p_pkey_tbl_sz); ibt_free_portinfo(port_info, port_info_size); rc = EINVAL; goto out; } (void) memset(&resp, 0, sizeof (resp)); resp.pkey = port_info->p_pkey_tbl[cmd.pkey_index]; ibt_free_portinfo(port_info, port_info_size); #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "query_pkey: copyout %d", rc); rc = EFAULT; goto out; } rc = DDI_SUCCESS; out: return (rc); } /* * Function: * sol_uverbs_reg_mr * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to register a memory region. */ /* ARGSUSED */ int sol_uverbs_reg_mr(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_reg_mr cmd; struct ib_uverbs_reg_mr_resp resp; uverbs_upd_uobj_t *upd; uverbs_umr_uobj_t *umr; ibt_mr_attr_t new_mem_attr; ibt_mr_desc_t new_mr_desc; int rc; (void) memcpy(&cmd, buf, sizeof (cmd)); (void) memset(&resp, 0, sizeof (resp)); (void) memset(&new_mem_attr, 0, sizeof (new_mem_attr)); (void) memset(&new_mr_desc, 0, sizeof (new_mr_desc)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "reg_mr()"); new_mem_attr.mr_vaddr = cmd.start; new_mem_attr.mr_len = cmd.length; new_mem_attr.mr_as = curproc->p_as; new_mem_attr.mr_flags = IBT_MR_NOSLEEP; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "reg_mr : " "mr_vaddr 0x%0lX, mr_len %d, mr_as %d, mr_flags %d", new_mem_attr.mr_vaddr, new_mem_attr.mr_len, new_mem_attr.mr_as, new_mem_attr.mr_flags); if ((cmd.access_flags & IB_ACCESS_LOCAL_WRITE) == IB_ACCESS_LOCAL_WRITE) { new_mem_attr.mr_flags |= IBT_MR_ENABLE_LOCAL_WRITE; } if ((cmd.access_flags & IB_ACCESS_REMOTE_WRITE) == IB_ACCESS_REMOTE_WRITE) { new_mem_attr.mr_flags |= IBT_MR_ENABLE_REMOTE_WRITE; } if ((cmd.access_flags & IB_ACCESS_REMOTE_READ) == IB_ACCESS_REMOTE_READ) { new_mem_attr.mr_flags |= IBT_MR_ENABLE_REMOTE_READ; } if ((cmd.access_flags & IB_ACCESS_REMOTE_ATOMIC) == IB_ACCESS_REMOTE_ATOMIC) { new_mem_attr.mr_flags |= IBT_MR_ENABLE_REMOTE_ATOMIC; } if ((cmd.access_flags & IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND) { new_mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND; } if ((cmd.access_flags & IB_ACCESS_SO) == IB_ACCESS_SO) { new_mem_attr.mr_flags |= IBT_MR_DISABLE_RO; } umr = kmem_zalloc(sizeof (*umr), KM_NOSLEEP); if (umr == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "reg_mr: User object mem allocation error"); rc = ENOMEM; goto out; } sol_ofs_uobj_init(&umr->uobj, 0, SOL_UVERBS_UMR_UOBJ_TYPE); rw_enter(&umr->uobj.uo_lock, RW_WRITER); upd = uverbs_uobj_get_upd_read(cmd.pd_handle); if (upd == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "reg_mr: PD invalid"); rc = EINVAL; goto bad_pd; } rc = ibt_register_mr(uctxt->hca->hdl, upd->pd, &new_mem_attr, &umr->mr, &new_mr_desc); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "reg_mr: ibt_register_mr() (rc=%d)", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); umr->uobj.uo_uobj_sz = sizeof (uverbs_umr_uobj_t); goto err_register; } if (sol_ofs_uobj_add(&uverbs_umr_uo_tbl, &umr->uobj) != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "reg_mr: User object add failed"); rc = ENOMEM; goto err_add_uobj; } resp.mr_handle = umr->uobj.uo_id; resp.lkey = new_mr_desc.md_lkey; resp.rkey = new_mr_desc.md_rkey; #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "reg_mr: Error writing resp data (rc=%d)", rc); rc = EFAULT; goto err_response; } mutex_enter(&uctxt->lock); umr->list_entry = add_genlist(&uctxt->mr_list, (uintptr_t)umr, uctxt); mutex_exit(&uctxt->lock); if (!umr->list_entry) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "reg_mr: Error adding umr to mr_list\n"); rc = ENOMEM; goto err_response; } umr->uobj.uo_live = 1; rw_exit(&umr->uobj.uo_lock); sol_ofs_uobj_put(&upd->uobj); return (DDI_SUCCESS); err_response: /* * Need to set uo_live, so sol_ofs_uobj_remove() will * remove the object from the object table. */ umr->uobj.uo_live = 1; (void) sol_ofs_uobj_remove(&uverbs_umr_uo_tbl, &umr->uobj); err_add_uobj: (void) ibt_deregister_mr(uctxt->hca->hdl, umr->mr); err_register: sol_ofs_uobj_put(&upd->uobj); bad_pd: rw_exit(&umr->uobj.uo_lock); sol_ofs_uobj_deref(&umr->uobj, sol_ofs_uobj_free); out: return (rc); } /* * Function: * sol_uverbs_dereg_mr * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to de-register a memory region. */ /* ARGSUSED */ int sol_uverbs_dereg_mr(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_dereg_mr cmd; uverbs_umr_uobj_t *umr; int rc; (void) memcpy(&cmd, buf, sizeof (cmd)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "dereg_mr(mr_handle=%d)", cmd.mr_handle); umr = uverbs_uobj_get_umr_write(cmd.mr_handle); if (umr == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "dereg_mr: Invalid handle"); rc = EINVAL; goto err_out; } rc = ibt_deregister_mr(uctxt->hca->hdl, umr->mr); if (rc != IBT_SUCCESS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "dereg_mr: ibt_deregister_mr() (rc=%d)", rc); rc = sol_uverbs_ibt_to_kernel_status(rc); goto err_deregister; } /* * Remove from the list of this contexts MR resources, then remove from * the resource management table and the reference placed on the user * object at MR creation. */ mutex_enter(&uctxt->lock); delete_genlist(&uctxt->mr_list, umr->list_entry); mutex_exit(&uctxt->lock); (void) sol_ofs_uobj_remove(&uverbs_umr_uo_tbl, &umr->uobj); /* * Drop the lock and ref held by get_umr_write. */ sol_ofs_uobj_put(&umr->uobj); sol_ofs_uobj_deref(&umr->uobj, sol_ofs_uobj_free); cmd.mr_handle = 0; return (DDI_SUCCESS); err_deregister: /* * Drop the lock and ref held by get_umr_write. */ sol_ofs_uobj_put(&umr->uobj); err_out: return (rc); } /* * Function: * sol_uverbs_create_ah * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to for devices that require kernel AH creation. */ /* ARGSUSED */ int sol_uverbs_create_ah(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { SOL_OFS_DPRINTF_L3(sol_uverbs_dbg_str, "create_ah: kernel user verb not implemented"); return (ENOTSUP); } /* * Function: * sol_uverbs_destroy_ah * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to for devices that require kernel AH deletion. */ /* ARGSUSED */ int sol_uverbs_destroy_ah(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { SOL_OFS_DPRINTF_L3(sol_uverbs_dbg_str, "destroy_ah: kernel user verb not implemented"); return (ENOTSUP); } /* * Function: * sol_uverbs_create_comp_chan * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb entry point to create a completion event channel. */ int sol_uverbs_create_comp_channel(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { struct ib_uverbs_create_comp_channel cmd; struct ib_uverbs_create_comp_channel_resp resp; int rc; minor_t compl_id; uverbs_uctxt_uobj_t *compl_uctxt; (void) memcpy(&cmd, buf, sizeof (cmd)); (void) memset(&resp, 0, sizeof (resp)); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "create_comp_chan: entry, in_len=%d, out_len=%d", in_len, out_len); /* * libibverbs will have passed minor of the compl file in * resp.fd. Use this to determine the uctxt created for * completions. */ #ifdef _LP64 rc = copyin((void*)cmd.response.r_laddr, (void*)&resp, sizeof (resp)); #else rc = copyin((void*)cmd.response.r_addr, (void*)&resp, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "create_comp: copyin (rc=%d)", rc); rc = EFAULT; return (rc); } compl_id = resp.fd; if (compl_id < SOL_UVERBS_DRIVER_MAX_MINOR) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "create_comp: Invalid compl user context id %x", compl_id); return (ENXIO); } compl_uctxt = uverbs_uobj_get_uctxt_read(compl_id - SOL_UVERBS_DRIVER_MAX_MINOR); if (compl_uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "create_comp: Failed get compl user context"); return (ENXIO); } if (compl_uctxt->uctxt_type != SOL_UVERBS_UCTXT_EVENT || compl_uctxt->uctxt_verbs_id != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "create_comp_chan: Invalid user context - " "possibly reused"); return (ENXIO); } compl_uctxt->uctxt_type = SOL_UVERBS_UCTXT_COMPL; compl_uctxt->uctxt_verbs_id = uctxt->uobj.uo_id + SOL_UVERBS_DRIVER_MAX_MINOR; uctxt->uctxt_comp_id = compl_id; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "create_comp_chan: " "uctxt %p, compl_uctxt %p, compl_id %x", uctxt, compl_uctxt, compl_id); /* * Allocate an event file to be used for completion * event notification. */ compl_uctxt->comp_evfile = uverbs_alloc_event_file(uctxt, 0); if (compl_uctxt->comp_evfile == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "create_comp_chan: Event file alloc error"); rc = EINVAL; sol_ofs_uobj_put(&compl_uctxt->uobj); return (rc); } /* * Place an extra reference on the compl event file. These will * be used to handle the natural race of between the closing of * the compl event file and uverbs device file that can occur. */ sol_ofs_uobj_ref(&compl_uctxt->comp_evfile->uobj); sol_ofs_uobj_put(&compl_uctxt->uobj); #ifdef _LP64 rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp)); #else rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp)); #endif if (rc != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "create_comp_chan: copyout %d", rc); rc = EFAULT; return (rc); } return (0); } /* * Function: * sol_uverbs_dummy_command * Input: * uctxt - Pointer to the callers user context. * buf - Pointer to kernel buffer containing command. * in_len - Length in bytes of input command buffer. * out_len - Length in bytes of output response buffer. * Output: * The command output buffer is updated with command results. * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb generic place holder stub. */ /* ARGSUSED */ int sol_uverbs_dummy_command(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len, int out_len) { SOL_OFS_DPRINTF_L4(sol_uverbs_dbg_str, "sol_uverbs_dummy_command invoked"); return (0); } /* * Function: * sol_uverbs_write * Input: * dev - Device number. * uiop - Pointer to the uio structure that describes the data (i.e. * Solaris User Verbs command). * credp - A pointer to the user credentials for the I/O transaction. * Output: * uiop - * Returns: * DDI_SUCCESS on success, else error code. * Description: * User verb write entry point. A user deivce libraries use this * entry point to execute a kernel agent user verbs call. During * the course of the call the user process will hold a read reference * to the associated user context. */ #define SOL_UVERBS_MAX_CMD_PAYLOAD 512 /* ARGSUSED */ static int sol_uverbs_write(dev_t dev, struct uio *uiop, cred_t *credp) { uverbs_uctxt_uobj_t *uctxt; size_t len = uiop->uio_resid; int rc; struct ib_uverbs_cmd_hdr hdr; char payload[SOL_UVERBS_MAX_CMD_PAYLOAD]; minor_t id = getminor(dev); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_write: entry (len=%d)", len); ASSERT(id >= SOL_UVERBS_DRIVER_MAX_MINOR); uctxt = uverbs_uobj_get_uctxt_read(id - SOL_UVERBS_DRIVER_MAX_MINOR); if (uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: Failed get user context"); return (ENXIO); } if (uctxt->uctxt_type != SOL_UVERBS_UCTXT_VERBS) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: write() on invalid uctxt type %x", uctxt->uctxt_type); rc = ENXIO; goto out; } if (len < sizeof (hdr)) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: Header too small"); rc = EINVAL; goto out; } hdr.command = -1; hdr.in_words = 0; hdr.out_words = 0; if (uiomove(&hdr, sizeof (hdr), UIO_WRITE, uiop) != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: Error reading header"); rc = EFAULT; goto out; } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_write: hdr.command = %d", hdr.command); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_write: hdr.command = %d", hdr.command); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_write: hdr.in_words = %d", hdr.in_words); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_write: hdr.out_words = %d", hdr.out_words); if (hdr.in_words * 4 != len) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: Invalid header size"); rc = EINVAL; goto out; } if (hdr.command >= sizeof (uverbs_cmd_table)/sizeof (uverbs_cmd_table[0]) || !uverbs_cmd_table[hdr.command]) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: Invalid command (%d)", hdr.command); rc = EINVAL; goto out; } ASSERT(len <= SOL_UVERBS_MAX_CMD_PAYLOAD); if (uiomove(&payload, len, UIO_WRITE, uiop) != 0) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_write: Error reading payload"); rc = EFAULT; goto out; } #ifdef DEBUG unsigned int *payload_int = (unsigned int *)payload; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "payload: %08x, %08x, %08x, %08x", payload_int[0], payload_int[1], payload_int[2], payload_int[3]); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "payload: %08x, %08x, %08x, %08x", payload_int[4], payload_int[5], payload_int[6], payload_int[7]); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "payload: %08x, %08x, %08x, %08x", payload_int[8], payload_int[9], payload_int[10], payload_int[11]); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "payload: %08x, %08x, %08x", payload_int[12], payload_int[13], payload_int[14]); #endif rc = uverbs_cmd_table[hdr.command](uctxt, &payload[0], hdr.in_words * 4, hdr.out_words * 4); out: sol_ofs_uobj_put(&uctxt->uobj); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_write: rc = %d", rc); return (rc); } static int sol_uverbs_poll(dev_t dev, short events, int anyyet, short *reventsp, struct pollhead **phpp) { minor_t id = getminor(dev); uverbs_uctxt_uobj_t *uctxt, *verbs_uctxt; int rc; #ifdef DEBUG SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_poll(%p, %x, %x, " "%p, %p)", dev, events, anyyet, reventsp, phpp); #endif ASSERT(id >= SOL_UVERBS_DRIVER_MAX_MINOR); uctxt = uverbs_uobj_get_uctxt_read(id - SOL_UVERBS_DRIVER_MAX_MINOR); if (uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_poll: Failed get user context"); return (ENXIO); } if (uctxt->uctxt_verbs_id < SOL_UVERBS_DRIVER_MAX_MINOR) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_poll: Invalid Verbs user context id, %x", uctxt->uctxt_verbs_id); sol_ofs_uobj_put(&uctxt->uobj); return (ENXIO); } verbs_uctxt = uverbs_uobj_get_uctxt_read(uctxt->uctxt_verbs_id - SOL_UVERBS_DRIVER_MAX_MINOR); if (verbs_uctxt == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_poll: Failed get verbs user context"); sol_ofs_uobj_put(&uctxt->uobj); return (ENXIO); } if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC) { ASSERT(verbs_uctxt->async_evfile); rc = sol_uverbs_event_file_poll(verbs_uctxt->async_evfile, events, anyyet, reventsp, phpp); } else if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_COMPL) { ASSERT(uctxt->comp_evfile); rc = sol_uverbs_event_file_poll(uctxt->comp_evfile, events, anyyet, reventsp, phpp); } else { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "uverbs_poll: poll user context type %d", uctxt->uctxt_type); rc = ENXIO; } sol_ofs_uobj_put(&verbs_uctxt->uobj); sol_ofs_uobj_put(&uctxt->uobj); return (rc); } /* * Function: * sol_uverbs_alloc_uctxt * Input: * devp - A pointer to the device number associated with the open. * mod_ctxt - A pointer to the drivers module context. * minor - The minor device number. * Output: * None. * Returns: * On success a new user context user resource object associated with * the device passed via devp. NULL on error. * Description: * Allocate a new user context user resource object and initialize it. * The users asynchronous event file is created as part of this. On * successful allocation, the user context is returned with the * associated write lock enabled. */ static uverbs_uctxt_uobj_t * sol_uverbs_alloc_uctxt(dev_t *devp, uverbs_module_context_t *mod_ctxt, minor_t minor) { uverbs_uctxt_uobj_t *uctxt = NULL; uctxt = kmem_zalloc(sizeof (uverbs_uctxt_uobj_t), KM_SLEEP); ASSERT(uctxt != NULL); sol_ofs_uobj_init(&uctxt->uobj, 0, SOL_UVERBS_UCTXT_UOBJ_TYPE); rw_enter(&uctxt->uobj.uo_lock, RW_WRITER); if (sol_ofs_uobj_add(&uverbs_uctxt_uo_tbl, &uctxt->uobj) != 0) { /* * The initialization routine set's the initial reference, * we dereference the object here to clean it up. */ SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "alloc_uctxt: Object add failed"); rw_exit(&uctxt->uobj.uo_lock); sol_ofs_uobj_free(&uctxt->uobj); return (NULL); } /* * Create the new clone for this user context using the * object id as the minor number. Note we offset beyond all * real minor device numbers. */ *devp = makedevice(getmajor(*devp), uctxt->uobj.uo_id + SOL_UVERBS_DRIVER_MAX_MINOR); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_open : " "uctxt %p, minor %x- alloced", uctxt, uctxt->uobj.uo_id + SOL_UVERBS_DRIVER_MAX_MINOR); SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "alloc_uctxt: user context allocated: %p, ref = %d", (void *)uctxt, uctxt->uobj.uo_refcnt); mutex_init(&uctxt->lock, NULL, MUTEX_DRIVER, NULL); uctxt->mod_ctxt = mod_ctxt; if (minor == SOL_UVERBS_DRIVER_EVENT_MINOR) { uctxt->uctxt_type = SOL_UVERBS_UCTXT_EVENT; } else { uctxt->uctxt_type = SOL_UVERBS_UCTXT_VERBS; uctxt->hca = &mod_ctxt->hcas[minor]; } init_genlist(&uctxt->pd_list); init_genlist(&uctxt->mr_list); init_genlist(&uctxt->cq_list); init_genlist(&uctxt->srq_list); init_genlist(&uctxt->qp_list); init_genlist(&uctxt->ah_list); /* Return with uobj uo_lock held for WRITTER. */ return (uctxt); } /* * Function: * sol_uverbs_qpnum2uqpid * Input: * qp_num - used to find the user object that mapped to this qp_num * Output: * None * Returns: * DDI_FAILURE if not found else * the uo_id in the user object that matches the qp_num * Description: * Find the uo_id of the user object which mapped to the input qp_num */ uint32_t sol_uverbs_qpnum2uqpid(uint32_t qp_num) { sol_ofs_uobj_table_t *uo_tbl; sol_ofs_uobj_t *uobj; uverbs_uqp_uobj_t *uqp; int i, j; sol_ofs_uobj_blk_t *blk; uo_tbl = &uverbs_uqp_uo_tbl; rw_enter(&uo_tbl->uobj_tbl_lock, RW_READER); /* * Try to find an empty slot for the new user object. */ for (i = 0; i < uo_tbl->uobj_tbl_used_blks; i++) { blk = uo_tbl->uobj_tbl_uo_root[i]; if (blk != NULL) { for (j = 0; j < SOL_OFS_UO_BLKSZ; j++) { if ((uobj = blk->ofs_uoblk_blks[j]) != NULL) { uqp = (uverbs_uqp_uobj_t *)uobj; if (uqp->qp_num == qp_num) { rw_exit(&uo_tbl->uobj_tbl_lock); SOL_OFS_DPRINTF_L5( sol_uverbs_dbg_str, "qpnum2uqpid(%x) ret %x", qp_num, uobj->uo_id); return (uobj->uo_id); } } } } } rw_exit(&uo_tbl->uobj_tbl_lock); SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "qpnum2uqpid(%x) ret %x", qp_num, DDI_FAILURE); return (DDI_FAILURE); } void sol_uverbs_get_clnt_hdl(void **ibclnt_hdl, void **iwclnt_hdl) { *ibclnt_hdl = sol_uverbs_ib_clntp; *iwclnt_hdl = NULL; } void * sol_uverbs_qpnum2qphdl(uint32_t qpnum) { int32_t uqpid; uqpid = sol_uverbs_qpnum2uqpid(qpnum); if (uqpid == DDI_FAILURE) return (NULL); return (sol_uverbs_uqpid_to_ibt_handle(uqpid)); } int sol_uverbs_disable_uqpn_modify(uint32_t qpnum) { int32_t uqpid; uqpid = sol_uverbs_qpnum2uqpid(qpnum); if (uqpid == DDI_FAILURE) return (-1); return (sol_uverbs_disable_user_qp_modify(uqpid)); } extern int uverbs_uqpn_cq_ctrl(uint32_t, sol_uverbs_cq_ctrl_t); int sol_uverbs_uqpn_cq_ctrl(uint32_t qpnum, sol_uverbs_cq_ctrl_t ctrl) { int32_t uqpid; uqpid = sol_uverbs_qpnum2uqpid(qpnum); if (uqpid == DDI_FAILURE) return (-1); return (uverbs_uqpn_cq_ctrl(uqpid, ctrl)); } void sol_uverbs_set_qp_free_state(sol_uverbs_qp_free_state_t qp_free_state, uint32_t qpnum, void *qphdl) { int32_t uqpid; uverbs_uqp_uobj_t *uqp; SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "sol_uverbs_set_qp_free_state(%x, %x, %p)", qp_free_state, qpnum, qphdl); if (qp_free_state == SOL_UVERBS2UCMA_DISABLE_QP_FREE) { uqpid = sol_uverbs_qpnum2uqpid(qpnum); if (uqpid == DDI_FAILURE) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "set_qp_free_state(%d)-invalid qpnum", qpnum); return; } uqp = uverbs_uobj_get_uqp_write(uqpid); if (uqp == NULL) { SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "set_qp_free_state(%d)-uqp lookup failure", qpnum); return; } SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "set_qp_free_state : uqp %p, setting Disable QP Free", uqp); uqp->uqp_free_state = SOL_UVERBS2UCMA_DISABLE_QP_FREE; sol_ofs_uobj_put(&uqp->uobj); return; } ASSERT(qphdl); uqp = (uverbs_uqp_uobj_t *)ibt_get_qp_private((ibt_qp_hdl_t)qphdl); ASSERT(uqp); if (uqp->uqp_free_state != SOL_UVERBS2UCMA_FREE_PENDING) { /* * Enable free flag, so that close or userland free_qp * call can free this in the future. */ SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "set_qp_free_state : uqp %p, setting Enable QP Free", uqp); rw_enter(&(uqp->uobj.uo_lock), RW_WRITER); uqp->uqp_free_state = SOL_UVERBS2UCMA_ENABLE_QP_FREE; rw_exit(&(uqp->uobj.uo_lock)); } else { /* * uqp_free_state is set to FREE_PENDING, QP has been freed * by userland. Call uverbs_uqp_free() to free this. */ SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "set_qp_free_state : uqp %p calling uverbs_uqp_free()", uqp); rw_enter(&(uqp->uobj.uo_lock), RW_WRITER); sol_ofs_uobj_ref(&uqp->uobj); if (uverbs_uqp_free(uqp, uqp->uctxt)) SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "set_qp_free_state : uverbs_uqp_free(%p) failed", uqp); } } /* * Function: * sol_uverbs_user_objects_init * Input: * None * Output: * None * Returns: * None * Description: * Initializes all of the user object resource managment tables. */ static void sol_uverbs_user_objects_init() { sol_ofs_uobj_tbl_init(&uverbs_uctxt_uo_tbl, sizeof (uverbs_uctxt_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_upd_uo_tbl, sizeof (uverbs_upd_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_umr_uo_tbl, sizeof (uverbs_umr_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_ucq_uo_tbl, sizeof (uverbs_ucq_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_usrq_uo_tbl, sizeof (uverbs_usrq_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_uqp_uo_tbl, sizeof (uverbs_uqp_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_uah_uo_tbl, sizeof (uverbs_uah_uobj_t)); sol_ofs_uobj_tbl_init(&uverbs_ufile_uo_tbl, sizeof (uverbs_ufile_uobj_t)); } /* * Function: * sol_uverbs_user_objects_fini * Input: * None * Output: * None * Returns: * None * Description: * Releases all of the user object resource managment tables. */ static void sol_uverbs_user_objects_fini() { sol_ofs_uobj_tbl_fini(&uverbs_ufile_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_uah_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_uqp_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_usrq_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_ucq_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_umr_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_upd_uo_tbl); sol_ofs_uobj_tbl_fini(&uverbs_uctxt_uo_tbl); } /* * Function: * sol_uverbs_ibt_to_kernel_status * Input: * status - An IBT status code. * Output: * None * Returns: * The "errno" based kernel error code the IBT status maps to. * Description: * Map an IBT status to the "errno" code that should be returned. */ int sol_uverbs_ibt_to_kernel_status(ibt_status_t status) { int err; switch (status) { case IBT_NOT_SUPPORTED: err = ENOTSUP; break; case IBT_ILLEGAL_OP: case IBT_INVALID_PARAM: err = EINVAL; break; case IBT_HCA_IN_USE: case IBT_HCA_BUSY_DETACHING: case IBT_HCA_BUSY_CLOSING: case IBT_CHAN_IN_USE: case IBT_CQ_BUSY: case IBT_MR_IN_USE: case IBT_PD_IN_USE: case IBT_SRQ_IN_USE: err = EBUSY; break; case IBT_INSUFF_RESOURCE: case IBT_INSUFF_KERNEL_RESOURCE: case IBT_HCA_WR_EXCEEDED: case IBT_HCA_SGL_EXCEEDED: err = ENOMEM; break; default: err = EINVAL; } return (err); } /* ARGSUSED */ uint32_t sol_uverbs_ibt_to_of_device_cap_flags(ibt_hca_flags_t flags, ibt_hca_flags2_t flags2) { uint32_t of_flags = 0; if (flags && IBT_HCA_RESIZE_CHAN) of_flags |= IB_DEVICE_RESIZE_MAX_WR; if (flags && IBT_HCA_PKEY_CNTR) of_flags |= IB_DEVICE_BAD_PKEY_CNTR; if (flags && IBT_HCA_QKEY_CNTR) of_flags |= IB_DEVICE_BAD_QKEY_CNTR; if (flags && IBT_HCA_RAW_MULTICAST) of_flags |= IB_DEVICE_RAW_MULTI; if (flags && IBT_HCA_AUTO_PATH_MIG) of_flags |= IB_DEVICE_AUTO_PATH_MIG; if (flags && IBT_HCA_SQD_SQD_PORT) of_flags |= IB_DEVICE_CHANGE_PHY_PORT; if (flags && IBT_HCA_AH_PORT_CHECK) of_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE; if (flags && IBT_HCA_CURRENT_QP_STATE) of_flags |= IB_DEVICE_CURR_QP_STATE_MOD; if (flags && IBT_HCA_SHUTDOWN_PORT) of_flags |= IB_DEVICE_SHUTDOWN_PORT; if (flags && IBT_HCA_INIT_TYPE) of_flags |= IB_DEVICE_INIT_TYPE; if (flags && IBT_HCA_PORT_UP) of_flags |= IB_DEVICE_PORT_ACTIVE_EVENT; if (flags && IBT_HCA_SI_GUID) of_flags |= IB_DEVICE_SYS_IMAGE_GUID; if (flags && IBT_HCA_RNR_NAK) of_flags |= IB_DEVICE_RC_RNR_NAK_GEN; if (flags && IBT_HCA_RESIZE_SRQ) of_flags |= IB_DEVICE_SRQ_RESIZE; if (flags && IBT_HCA_BASE_QUEUE_MGT) of_flags |= IB_DEVICE_N_NOTIFY_CQ; if (flags && IBT_HCA_ZERO_BASED_VA) of_flags |= IB_DEVICE_ZERO_STAG; if (flags && IBT_HCA_LOCAL_INVAL_FENCE) of_flags |= IB_DEVICE_SEND_W_INV; if (flags && IBT_HCA_MEM_WIN_TYPE_2B) of_flags |= IB_DEVICE_MEM_WINDOW; return (of_flags); } uint64_t sol_uverbs_ibt_to_of_page_sz(ibt_page_sizes_t page_szs) { uint64_t of_page_sz = 0; if (page_szs && IBT_PAGE_4K) of_page_sz |= 1LL << 12; if (page_szs && IBT_PAGE_8K) of_page_sz |= 1LL << 13; if (page_szs && IBT_PAGE_16K) of_page_sz |= 1LL << 14; if (page_szs && IBT_PAGE_32K) of_page_sz |= 1LL << 15; if (page_szs && IBT_PAGE_64K) of_page_sz |= 1LL << 16; if (page_szs && IBT_PAGE_128K) of_page_sz |= 1LL << 17; if (page_szs && IBT_PAGE_256K) of_page_sz |= 1LL << 18; if (page_szs && IBT_PAGE_512K) of_page_sz |= 1LL << 19; if (page_szs && IBT_PAGE_1M) of_page_sz |= 1LL << 20; if (page_szs && IBT_PAGE_2M) of_page_sz |= 1LL << 21; if (page_szs && IBT_PAGE_4M) of_page_sz |= 1LL << 22; if (page_szs && IBT_PAGE_8M) of_page_sz |= 1LL << 23; if (page_szs && IBT_PAGE_16M) of_page_sz |= 1LL << 24; if (page_szs && IBT_PAGE_32M) of_page_sz |= 1LL << 25; if (page_szs && IBT_PAGE_64M) of_page_sz |= 1LL << 26; if (page_szs && IBT_PAGE_128M) of_page_sz |= 1LL << 27; if (page_szs && IBT_PAGE_256M) of_page_sz |= 1LL << 28; if (page_szs && IBT_PAGE_512M) of_page_sz |= 1LL << 29; if (page_szs && IBT_PAGE_1G) of_page_sz |= 1LL << 30; if (page_szs && IBT_PAGE_2G) of_page_sz |= 1LL << 31; if (page_szs && IBT_PAGE_4G) of_page_sz |= 1LL << 32; if (page_szs && IBT_PAGE_8G) of_page_sz |= 1LL << 33; if (page_szs && IBT_PAGE_16G) of_page_sz |= 1LL << 34; return (of_page_sz); }