xref: /titanic_51/usr/src/uts/common/io/ib/clients/of/sol_uverbs/sol_uverbs.c (revision fffafeb2cc01732fd6a28ed530e4424094685ece)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * sol_uverbs.c
28  *
29  * Solaris OFED User Verbs kernel agent module
30  *
31  */
32 #include <sys/devops.h>
33 #include <sys/conf.h>
34 #include <sys/modctl.h>
35 #include <sys/types.h>
36 #include <sys/file.h>
37 #include <sys/errno.h>
38 #include <sys/open.h>
39 #include <sys/cred.h>
40 #include <sys/uio.h>
41 #include <sys/semaphore.h>
42 #include <sys/stat.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/ib/clients/of/ofa_solaris.h>
46 
47 #include <sys/ib/ibtl/ibvti.h>
48 #include <sys/ib/clients/of/sol_ofs/sol_ofs_common.h>
49 #include <sys/ib/clients/of/ofed_kernel.h>
50 #include <sys/ib/clients/of/sol_uverbs/sol_uverbs.h>
51 #include <sys/ib/clients/of/sol_uverbs/sol_uverbs_event.h>
52 #include <sys/ib/clients/of/sol_uverbs/sol_uverbs_comp.h>
53 #include <sys/ib/clients/of/sol_uverbs/sol_uverbs_qp.h>
54 
55 static void *statep;
56 static ibt_clnt_hdl_t	sol_uverbs_ib_clntp = NULL;
57 
58 char	*sol_uverbs_dbg_str = "sol_uverbs";
59 
60 static int sol_uverbs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
61 static int sol_uverbs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
62 static int sol_uverbs_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
63     void **resultp);
64 static int sol_uverbs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
65     int flags, char *name, caddr_t valuep, int *lengthp);
66 static int sol_uverbs_open(dev_t *devp, int flag, int otyp, cred_t *cred);
67 static int sol_uverbs_close(dev_t dev, int flag, int otyp, cred_t *cred);
68 static int sol_uverbs_poll(dev_t, short, int, short *, struct pollhead **);
69 static int sol_uverbs_read(dev_t dev, struct uio *uiop, cred_t *credp);
70 static int sol_uverbs_mmap(dev_t dev, off_t sol_uverbs_mmap, int prot);
71 static int sol_uverbs_write(dev_t dev, struct uio *uiop, cred_t *credp);
72 
73 static struct cb_ops sol_uverbs_cb_ops = {
74 	.cb_open	= sol_uverbs_open,
75 	.cb_close	= sol_uverbs_close,
76 	.cb_strategy	= nodev,
77 	.cb_print	= nodev,
78 	.cb_dump	= nodev,
79 	.cb_read	= sol_uverbs_read,
80 	.cb_write	= sol_uverbs_write,
81 	.cb_ioctl	= nodev,
82 	.cb_devmap	= nodev,
83 	.cb_mmap	= sol_uverbs_mmap,
84 	.cb_segmap	= nodev,
85 	.cb_chpoll	= sol_uverbs_poll,
86 	.cb_prop_op	= sol_uverbs_prop_op,
87 	.cb_str		= NULL,
88 	.cb_flag	= D_NEW | D_MP,
89 	.cb_rev		= CB_REV,
90 	.cb_aread	= nodev,
91 	.cb_awrite	= nodev
92 };
93 
94 static struct dev_ops sol_uverbs_dev_ops = {
95 	.devo_rev	= DEVO_REV,
96 	.devo_refcnt	= 0,
97 	.devo_getinfo	= sol_uverbs_getinfo,
98 	.devo_identify	= nulldev,
99 	.devo_probe	= nulldev,
100 	.devo_attach	= sol_uverbs_attach,
101 	.devo_detach	= sol_uverbs_detach,
102 	.devo_reset	= nodev,
103 	.devo_cb_ops	= &sol_uverbs_cb_ops,
104 	.devo_bus_ops	= NULL,
105 	.devo_power	= nodev,
106 	.devo_quiesce	= ddi_quiesce_not_needed
107 };
108 
109 static struct modldrv modldrv = {
110 	.drv_modops	= &mod_driverops,
111 	.drv_linkinfo	= "Solaris User Verbs driver",
112 	.drv_dev_ops	= &sol_uverbs_dev_ops
113 };
114 
115 static struct modlinkage modlinkage = {
116 	.ml_rev			= MODREV_1,
117 	.ml_linkage = {
118 		[0]		= &modldrv,
119 		[1]		= NULL,
120 	}
121 };
122 
123 /*
124  * User Object Tables for management of user resources. The tables are driver
125  * wide, but each user context maintains a list of the objects it has created
126  * that is used in cleanup.
127  */
128 sol_ofs_uobj_table_t uverbs_uctxt_uo_tbl;
129 sol_ofs_uobj_table_t uverbs_upd_uo_tbl;
130 sol_ofs_uobj_table_t uverbs_uah_uo_tbl;
131 sol_ofs_uobj_table_t uverbs_umr_uo_tbl;
132 sol_ofs_uobj_table_t uverbs_ucq_uo_tbl;
133 sol_ofs_uobj_table_t uverbs_usrq_uo_tbl;
134 sol_ofs_uobj_table_t uverbs_uqp_uo_tbl;
135 sol_ofs_uobj_table_t uverbs_ufile_uo_tbl;
136 
137 static void sol_uverbs_user_objects_init(void);
138 static void sol_uverbs_user_objects_fini(void);
139 
140 /*
141  * Open Fabric User Verbs API, command table. See ib_user_verbs.h for
142  * definitions.
143  */
144 static int (*uverbs_cmd_table[])(uverbs_uctxt_uobj_t *uctxt, char *buf,
145 	int in_len, int out_len) = {
146 
147 	[IB_USER_VERBS_CMD_GET_CONTEXT]   	= sol_uverbs_get_context,
148 	[IB_USER_VERBS_CMD_QUERY_DEVICE]  	= sol_uverbs_query_device,
149 	[IB_USER_VERBS_CMD_QUERY_PORT]    	= sol_uverbs_query_port,
150 	[IB_USER_VERBS_CMD_ALLOC_PD]		= sol_uverbs_alloc_pd,
151 	[IB_USER_VERBS_CMD_DEALLOC_PD]		= sol_uverbs_dealloc_pd,
152 	[IB_USER_VERBS_CMD_REG_MR]		= sol_uverbs_reg_mr,
153 	[IB_USER_VERBS_CMD_DEREG_MR]		= sol_uverbs_dereg_mr,
154 	[IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL] =
155 					sol_uverbs_create_comp_channel,
156 	[IB_USER_VERBS_CMD_CREATE_CQ]		= sol_uverbs_create_cq,
157 	[IB_USER_VERBS_CMD_RESIZE_CQ]		= sol_uverbs_resize_cq,
158 	[IB_USER_VERBS_CMD_POLL_CQ]		= sol_uverbs_poll_cq,
159 	[IB_USER_VERBS_CMD_REQ_NOTIFY_CQ]	= sol_uverbs_req_notify_cq,
160 	[IB_USER_VERBS_CMD_DESTROY_CQ]    	= sol_uverbs_destroy_cq,
161 	[IB_USER_VERBS_CMD_CREATE_QP]		= sol_uverbs_create_qp,
162 	[IB_USER_VERBS_CMD_QUERY_QP]		= sol_uverbs_query_qp,
163 	[IB_USER_VERBS_CMD_MODIFY_QP]		= sol_uverbs_modify_qp,
164 	[IB_USER_VERBS_CMD_DESTROY_QP]    	= sol_uverbs_destroy_qp,
165 	[IB_USER_VERBS_CMD_POST_SEND]    	= sol_uverbs_dummy_command,
166 	[IB_USER_VERBS_CMD_POST_RECV]    	= sol_uverbs_dummy_command,
167 	[IB_USER_VERBS_CMD_POST_SRQ_RECV]    	= sol_uverbs_dummy_command,
168 	[IB_USER_VERBS_CMD_CREATE_AH]    	= sol_uverbs_create_ah,
169 	[IB_USER_VERBS_CMD_DESTROY_AH]    	= sol_uverbs_destroy_ah,
170 	[IB_USER_VERBS_CMD_ATTACH_MCAST]  	= sol_uverbs_attach_mcast,
171 	[IB_USER_VERBS_CMD_DETACH_MCAST]  	= sol_uverbs_detach_mcast,
172 	[IB_USER_VERBS_CMD_CREATE_SRQ]    	= sol_uverbs_create_srq,
173 	[IB_USER_VERBS_CMD_MODIFY_SRQ]		= sol_uverbs_modify_srq,
174 	[IB_USER_VERBS_CMD_QUERY_SRQ]		= sol_uverbs_query_srq,
175 	[IB_USER_VERBS_CMD_DESTROY_SRQ]   	= sol_uverbs_destroy_srq,
176 
177 		/* TODO - XRC */
178 
179 	[IB_USER_VERBS_CMD_CREATE_XRC_SRQ]   	= sol_uverbs_dummy_command,
180 	[IB_USER_VERBS_CMD_OPEN_XRC_DOMAIN]   	= sol_uverbs_dummy_command,
181 	[IB_USER_VERBS_CMD_CLOSE_XRC_DOMAIN]   	= sol_uverbs_dummy_command,
182 	[IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP]  	= sol_uverbs_dummy_command,
183 	[IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP]  	= sol_uverbs_dummy_command,
184 	[IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP]   	= sol_uverbs_dummy_command,
185 	[IB_USER_VERBS_CMD_REG_XRC_RCV_QP]   	= sol_uverbs_dummy_command,
186 	[IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP]   	= sol_uverbs_dummy_command,
187 	[IB_USER_VERBS_CMD_QUERY_GID]		= sol_uverbs_query_gid,
188 	[IB_USER_VERBS_CMD_QUERY_PKEY]		= sol_uverbs_query_pkey,
189 };
190 
191 /*
192  * Function:
193  *	sol_uverbs_hca_open
194  * Input:
195  *	mod_ctxt	- Pointer to the user verbs module context.
196  * Output:
197  *	None
198  * Returns:
199  *	Zero on success, else error code.
200  * Description:
201  *	Register as a client with the IBT framework and open all of the
202  *	HCA's present.
203  */
204 static int
205 sol_uverbs_hca_open(uverbs_module_context_t *mod_ctxt)
206 {
207 	int			status;
208 	int			hca_ndx;
209 #ifdef DEBUG
210 	llist_head_t		*entry;
211 	sol_uverbs_hca_t	*temp;
212 #endif
213 
214 	mod_ctxt->hca_count	= 0;
215 	mod_ctxt->hca_guid_list	= NULL;
216 	mod_ctxt->hcas		= NULL;
217 	mod_ctxt->clnt_hdl	= NULL;
218 
219 	mod_ctxt->clnt_modinfo.mi_ibt_version   = IBTI_V_CURR;
220 	mod_ctxt->clnt_modinfo.mi_clnt_class    = IBT_USER;
221 	mod_ctxt->clnt_modinfo.mi_async_handler = uverbs_async_event_handler;
222 	mod_ctxt->clnt_modinfo.mi_reserved	= NULL;
223 	mod_ctxt->clnt_modinfo.mi_clnt_name	= "sol_uverbs";
224 
225 	status = ibt_attach(&mod_ctxt->clnt_modinfo, mod_ctxt->dip,
226 	    mod_ctxt, &mod_ctxt->clnt_hdl);
227 
228 	if (status != IBT_SUCCESS) {
229 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
230 		    "hca_open:ibt_attach fail %d", status);
231 		status = ENODEV;
232 		goto out_err;
233 	}
234 	if (sol_uverbs_ib_clntp == NULL)
235 		sol_uverbs_ib_clntp  = mod_ctxt->clnt_hdl;
236 
237 	mod_ctxt->hca_count = ibt_get_hca_list(&mod_ctxt->hca_guid_list);
238 
239 	if (mod_ctxt->hca_count == 0) {
240 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
241 		    "hca_open: Zero HCAs on this system!");
242 		status = ENODEV;
243 		goto out_err;
244 	}
245 
246 	if (mod_ctxt->hca_count > SOL_UVERBS_DRIVER_MAX_HCA_MINOR) {
247 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
248 		    "hca_open: HCA count %d exceeds max %d",
249 		    mod_ctxt->hca_count, SOL_UVERBS_DRIVER_MAX_HCA_MINOR);
250 		status =  ENODEV;
251 		goto out_err;
252 	}
253 
254 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
255 	    "hca_open: No. of HCAs present %d", mod_ctxt->hca_count);
256 
257 	mod_ctxt->hcas = kmem_zalloc(mod_ctxt->hca_count *
258 	    sizeof (sol_uverbs_hca_t), KM_SLEEP);
259 	ASSERT(mod_ctxt->hcas != NULL);
260 
261 	/*
262 	 * Note: we open these in the reverse order of the guid list, although
263 	 * this is technically not required it is done this way so that the
264 	 * mapping will be in same order as the interfaces. Also note, that we
265 	 * provide a guid property, and the guid should be used to map a verbs
266 	 * device to an interface (i.e. don't depend on the order).
267 	 */
268 	for (hca_ndx = 0; hca_ndx < mod_ctxt->hca_count; hca_ndx++) {
269 		status = ibt_open_hca(mod_ctxt->clnt_hdl,
270 		    mod_ctxt->hca_guid_list[mod_ctxt->hca_count - hca_ndx -1],
271 		    &mod_ctxt->hcas[hca_ndx].hdl);
272 		if (status != IBT_SUCCESS) {
273 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
274 			    "hca_open: ibt_open_hca() returned %d",
275 			    status);
276 			goto out_err;
277 		}
278 
279 		mod_ctxt->hcas[hca_ndx].clnt_hdl = mod_ctxt->clnt_hdl;
280 		llist_head_init(&mod_ctxt->hcas[hca_ndx].list,
281 		    &mod_ctxt->hcas[hca_ndx]);
282 		mutex_init(&mod_ctxt->hcas[hca_ndx].event_handler_lock, NULL,
283 		    MUTEX_DRIVER, NULL);
284 		llist_head_init(&mod_ctxt->hcas[hca_ndx].event_handler_list,
285 		    NULL);
286 		mutex_init(&mod_ctxt->hcas[hca_ndx].client_data_lock, NULL,
287 		    MUTEX_DRIVER, NULL);
288 		llist_head_init(&mod_ctxt->hcas[hca_ndx].client_data_list,
289 		    NULL);
290 
291 		mutex_enter(&sol_uverbs_hca_lock);
292 		llist_add_tail(&mod_ctxt->hcas[hca_ndx].list,
293 		    &sol_uverbs_hca_list);
294 		mutex_exit(&sol_uverbs_hca_lock);
295 
296 		mod_ctxt->hcas[hca_ndx].guid =
297 		    mod_ctxt->hca_guid_list[mod_ctxt->hca_count - hca_ndx -1];
298 
299 		/*
300 		 * Get a cached copy of the HCA's attributes for easy access.
301 		 */
302 		status = ibt_query_hca(mod_ctxt->hcas[hca_ndx].hdl,
303 		    &mod_ctxt->hcas[hca_ndx].attr);
304 		if (status != IBT_SUCCESS) {
305 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
306 			    "hca_open: ibt_query_hca() failed "
307 			    "(status=%d)", status);
308 			goto out_err;
309 		}
310 
311 		/* Note : GUID is in host order here */
312 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
313 		    "hca_open: HCA index %d, HCA GUID: 0x%016llX",
314 		    hca_ndx, (u_longlong_t)mod_ctxt->hcas[hca_ndx].guid);
315 	}
316 
317 #ifdef DEBUG
318 	mutex_enter(&sol_uverbs_hca_lock);
319 	list_for_each(entry, &sol_uverbs_hca_list) {
320 		temp = (sol_uverbs_hca_t *)entry->ptr;
321 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
322 		    "HCA list: entry: %p, handle: %p, "
323 		    "GUID: 0x%016llX", (void *)entry, (void *)temp->hdl,
324 		    (u_longlong_t)temp->guid);
325 	}
326 	mutex_exit(&sol_uverbs_hca_lock);
327 #endif
328 	return (0);
329 
330 out_err:
331 	/*
332 	 * Note, cleanup of hca list and associated resources is done via
333 	 * uverbs_hca_close called outside this routine in the case of bad
334 	 * status.
335 	 */
336 	return (status);
337 }
338 
339 /*
340  * Function:
341  *	sol_uverbs_hca_close
342  * Input:
343  *	mod_ctxt	- Pointer to the module context.
344  * Output:
345  *	None
346  * Returns:
347  *	None
348  * Description:
349  * 	Close all of the IBT HCAs opened by the driver and detach from
350  *	the IBT framework.
351  */
352 static void
353 sol_uverbs_hca_close(uverbs_module_context_t *mod_ctxt)
354 {
355 	int    hca_ndx;
356 
357 	if (mod_ctxt->hcas != NULL) {
358 		mutex_enter(&sol_uverbs_hca_lock);
359 		llist_head_init(&sol_uverbs_hca_list, NULL);
360 		mutex_exit(&sol_uverbs_hca_lock);
361 		for (hca_ndx = 0; hca_ndx < mod_ctxt->hca_count; hca_ndx++) {
362 			if (mod_ctxt->hcas[hca_ndx].hdl != NULL) {
363 				mutex_destroy(&mod_ctxt->hcas[hca_ndx].
364 				    event_handler_lock);
365 				mutex_destroy(&mod_ctxt->hcas[hca_ndx].
366 				    client_data_lock);
367 				(void) ibt_close_hca(mod_ctxt->
368 				    hcas[hca_ndx].hdl);
369 			}
370 		}
371 		kmem_free(mod_ctxt->hcas,
372 		    mod_ctxt->hca_count * sizeof (sol_uverbs_hca_t));
373 		mod_ctxt->hcas = NULL;
374 	}
375 
376 	if ((mod_ctxt->hca_guid_list != NULL) && (mod_ctxt->hca_count > 0)) {
377 		ibt_free_hca_list(mod_ctxt->hca_guid_list, mod_ctxt->hca_count);
378 		mod_ctxt->hca_count	= 0;
379 		mod_ctxt->hca_guid_list = NULL;
380 	}
381 
382 	if (mod_ctxt->clnt_hdl != NULL) {
383 		(void) ibt_detach(mod_ctxt->clnt_hdl);
384 		mod_ctxt->clnt_hdl = NULL;
385 	}
386 }
387 
388 /*
389  * Function:
390  *	_init
391  * Input:
392  *	None
393  * Output:
394  *	None
395  * Returns:
396  *	DDI_SUCCESS  on success, else error code.
397  * Description:
398  * 	Perform Solaris OFED user verbs kernel agent driver initialization.
399  */
400 int
401 _init(void)
402 {
403 	int	error;
404 
405 	error = ddi_soft_state_init(&statep,
406 	    sizeof (uverbs_module_context_t), 0);
407 
408 	if (error != 0) {
409 		return (error);
410 	}
411 
412 	sol_uverbs_user_objects_init();
413 	if (sol_uverbs_common_hca_init()) {
414 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
415 		    "uverbs_hca_init() failed");
416 		ddi_soft_state_fini(&statep);
417 		return (ENODEV);
418 	}
419 
420 	error = mod_install(&modlinkage);
421 	if (error != 0) {
422 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
423 		    "uverbs: mod_install failed!!");
424 		sol_uverbs_common_hca_fini();
425 		sol_uverbs_user_objects_fini();
426 		ddi_soft_state_fini(&statep);
427 	}
428 	return (error);
429 }
430 
431 /*
432  * Function:
433  *	_info
434  * Input:
435  *	modinfop	- Pointer to an opqque modinfo structure.
436  * Output:
437  *	modinfop	- Updated structure.
438  * Returns:
439  *	The mod_info() return code.
440  * Description:
441  * 	Return information about the loadable module via the mod_info()
442  *	kernel function call.
443  */
444 int
445 _info(struct modinfo *modinfop)
446 {
447 	return (mod_info(&modlinkage, modinfop));
448 }
449 
450 /*
451  * Function:
452  *	_fini
453  * Input:
454  *	None
455  * Output:
456  *	None
457  * Returns:
458  *	DDI_SUCCESS  on success, else error code returned by
459  *	mod_remove kernel function.
460  * Description:
461  * 	Perform Solaris OFED user verbs kernel agent driver cleanup.
462  */
463 int
464 _fini(void)
465 {
466 	int    rc;
467 
468 	rc = mod_remove(&modlinkage);
469 	if (!rc) {
470 		sol_uverbs_common_hca_fini();
471 		sol_uverbs_user_objects_fini();
472 	}
473 	return (rc);
474 }
475 
476 int
477 sol_uverbs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
478 {
479 	uverbs_module_context_t	*mod_ctxt;
480 	int			rc, instance, hca_ndx;
481 
482 	switch (cmd) {
483 		case DDI_ATTACH:
484 			break;
485 		case DDI_RESUME:
486 			return (DDI_SUCCESS);
487 		default:
488 			return (DDI_FAILURE);
489 	}
490 
491 	/*
492 	 * Allocate a soft data structure based on this dev info
493 	 */
494 	instance = ddi_get_instance(dip);
495 	if (instance != 0) {
496 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
497 		    "attach: bad instance number %d", instance);
498 		return (DDI_FAILURE);
499 	}
500 
501 	if (ddi_soft_state_zalloc(statep, instance) != DDI_SUCCESS) {
502 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
503 		    "attach: bad state zalloc");
504 		return (DDI_FAILURE);
505 	}
506 
507 	mod_ctxt = ddi_get_soft_state(statep, instance);
508 	if (mod_ctxt == NULL) {
509 		ddi_soft_state_free(statep, instance);
510 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
511 		    "attach: cannot get soft state");
512 		return (DDI_FAILURE);
513 	}
514 
515 	/*
516 	 * Save off our private context in the dev_info
517 	 */
518 	mod_ctxt->dip = dip;
519 	ddi_set_driver_private(dip, mod_ctxt);
520 
521 	/*
522 	 * Opening of the hca will perform the ibt_attach and build a list of
523 	 * devices.
524 	 */
525 	rc = sol_uverbs_hca_open(mod_ctxt);
526 	if (rc) {
527 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
528 		    "attach: sol_uverbs_hca_open() (rc=%d)", rc);
529 		goto error;
530 	}
531 
532 	/*
533 	 * Export our ABI revision as a property.
534 	 */
535 	rc = ddi_prop_update_int(makedevice(ddi_driver_major(dip), 0),
536 	    dip, "abi-version", IB_USER_VERBS_ABI_VERSION);
537 	if (rc != DDI_SUCCESS) {
538 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
539 		    "attach: could not add abi-version property");
540 	}
541 
542 	/*
543 	 * Create the filesystem device node for each HCA.
544 	 */
545 	for (hca_ndx = 0; hca_ndx < mod_ctxt->hca_count; hca_ndx++) {
546 		char name[20];
547 
548 		(void) snprintf(name, 20, "uverbs%d", hca_ndx);
549 		rc = ddi_create_minor_node(dip, name, S_IFCHR, hca_ndx,
550 		    DDI_PSEUDO, 0);
551 		if (rc != DDI_SUCCESS) {
552 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
553 			    "attach: could not add character node");
554 			goto error;
555 		}
556 
557 		rc = ddi_prop_update_int64(makedevice(ddi_driver_major(dip),
558 		    hca_ndx), dip, "guid", mod_ctxt->hcas[hca_ndx].guid);
559 		if (rc != DDI_SUCCESS) {
560 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
561 			    "attach: could not add GUID property");
562 		}
563 
564 		rc = ddi_prop_update_int(makedevice(ddi_driver_major(dip),
565 		    hca_ndx), dip, "vendor-id",
566 		    mod_ctxt->hcas[hca_ndx].attr.hca_vendor_id);
567 		if (rc != DDI_SUCCESS) {
568 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
569 			    "attach: could not add vendor-id property");
570 		}
571 
572 		rc = ddi_prop_update_int(makedevice(ddi_driver_major(dip),
573 		    hca_ndx), dip, "device-id", mod_ctxt->
574 		    hcas[hca_ndx].attr.hca_device_id);
575 		if (rc != DDI_SUCCESS) {
576 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
577 			    "attach: could not add device-id property");
578 		}
579 	}
580 
581 	rc = ddi_create_minor_node(dip, "ucma",  S_IFCHR,
582 	    SOL_UVERBS_DRIVER_MAX_HCA_MINOR, DDI_PSEUDO, 0);
583 
584 	if (rc != DDI_SUCCESS) {
585 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
586 		    "attach: could not add minor for ucma");
587 		goto error;
588 	}
589 
590 	rc = ddi_create_minor_node(dip, "event",  S_IFCHR,
591 	    SOL_UVERBS_DRIVER_EVENT_MINOR, DDI_PSEUDO, 0);
592 
593 	if (rc != DDI_SUCCESS) {
594 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
595 		    "attach: could not add minor for events");
596 		goto error;
597 	}
598 
599 	ddi_report_dev(dip);
600 
601 	return (DDI_SUCCESS);
602 
603 error:
604 	/*
605 	 * Cleanup any resources and dettach.
606 	 */
607 	sol_uverbs_hca_close(mod_ctxt);
608 	ddi_soft_state_free(statep, instance);
609 
610 	return (rc);
611 }
612 
613 /*
614  * Function:
615  *	sol_uverbs_detach
616  * Input:
617  *	dip	- A pointer to the devices dev_info_t structure.
618  *	cmd	- Type of detach (DDI_DETACH or DDI_SUSPEND).
619  * Output:
620  *	None
621  * Returns:
622  *	DDI_SUCCESS on success, else error code.
623  * Description:
624  * 	Detaches thea driver module and will cause the driver to close
625  *	the underlying IBT HCA and detach from the IBT driver.  Note
626  *	that this call will fail if user verb consumers or ucma have a
627  *	sol_uverbs device open.
628  */
629 static int
630 sol_uverbs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
631 {
632 	int			instance;
633 	uverbs_module_context_t	*mod_ctxt;
634 
635 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "detach()");
636 
637 	if (cmd != DDI_DETACH) {
638 		return (DDI_FAILURE);
639 	}
640 
641 	instance = ddi_get_instance(dip);
642 	if (instance != 0) {
643 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
644 		    "detach: bad instance number 0x%x", instance);
645 		return (DDI_FAILURE);
646 	}
647 
648 	rw_enter(&uverbs_uctxt_uo_tbl.uobj_tbl_lock, RW_WRITER);
649 	if (uverbs_uctxt_uo_tbl.uobj_tbl_uo_cnt > 0) {
650 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
651 		    "detach(): device in use");
652 		rw_exit(&uverbs_uctxt_uo_tbl.uobj_tbl_lock);
653 		return (DDI_FAILURE);
654 	}
655 	rw_exit(&uverbs_uctxt_uo_tbl.uobj_tbl_lock);
656 
657 	mod_ctxt = ddi_get_soft_state(statep, instance);
658 
659 	/*
660 	 * Sanity check, do not detach if other kernel agents
661 	 * are still using sol_uverbs IBT handles.
662 	 */
663 	mutex_enter(&sol_uverbs_hca_lock);
664 	if (!llist_empty(&sol_uverbs_client_list)) {
665 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
666 		    "detach: agents still registered");
667 		mutex_exit(&sol_uverbs_hca_lock);
668 		return (DDI_FAILURE);
669 	}
670 	mutex_exit(&sol_uverbs_hca_lock);
671 
672 	/*
673 	 * Hca close will perform the detach from IBTF.
674 	 */
675 	sol_uverbs_hca_close(mod_ctxt);
676 
677 	ddi_soft_state_free(statep, instance);
678 	ddi_remove_minor_node(dip, NULL);
679 	return (DDI_SUCCESS);
680 }
681 
682 /*
683  * Function:
684  *	sol_uverbs_getinfo
685  * Input:
686  *	dip     - Deprecated, do not use.
687  *	cmd     - Command argument (DDI_INFO_DEVT2DEVINFO or
688  *	          DDI_INFO_DEVT2INSTANCE).
689  *	arg     - Command specific argument.
690  *	resultp - Pointer to place results.
691  * Output:
692  *	resultp	- Location is updated with command results.
693  * Returns:
694  *	DDI_SUCCESS on success, else error code.
695  * Description:
696  *	Depending on the request (cmd) return either the dev_info_t pointer
697  *	associated with the dev_info_t specified, or the instance.  Note
698  *	that we have only a single instance.
699  */
700 /* ARGSUSED */
701 static int
702 sol_uverbs_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
703     void **resultp)
704 {
705 	uverbs_module_context_t	*mod_ctxt;
706 
707 	switch (cmd) {
708 		case DDI_INFO_DEVT2DEVINFO:
709 			mod_ctxt = ddi_get_soft_state(statep, 0);
710 			if (!mod_ctxt) {
711 				return (DDI_FAILURE);
712 			}
713 			*resultp = (void *)mod_ctxt->dip;
714 			return (DDI_SUCCESS);
715 
716 		case DDI_INFO_DEVT2INSTANCE:
717 			*resultp = 0;
718 			return (DDI_SUCCESS);
719 
720 		default:
721 			return (DDI_FAILURE);
722 	}
723 }
724 
725 /*
726  * Function:
727  *	sol_uverbs_prop_op
728  * Input:
729  *	dev	- The device number associated with this device.
730  *	dip	- A pointer to the device information structure for this device.
731  *	prop_op - Property operator (PROP_LEN, PROP_LEN_AND_VAL_BUF, or
732  *	          PROP_LEN_AND_VAL_ALLOC).
733  *	flags	- Only possible flag value is DDI_PROP_DONTPASS.
734  *	name    - Pointer to the property to be interrogated.
735  *	valuep	- Address of pointer if ALLOC, otherwise a pointer to the
736  *	          users buffer.
737  *	lengthp	- Pointer to update with property length.
738  * Output:
739  *	valuep	- Updated with the property value.
740  *	lenghtp	- Updated with the property length.
741  * Returns:
742  *	DDI_SUCCESS on success, else error code.
743  * Description:
744  *	Driver entry point to report the values of certain properties of the
745  *	driver or  device.
746  */
747 static int
748 sol_uverbs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
749     char *name, caddr_t valuep, int *lengthp)
750 {
751 	return (ddi_prop_op(dev, dip, prop_op, flags, name, valuep, lengthp));
752 
753 }
754 
755 static uverbs_uctxt_uobj_t *sol_uverbs_alloc_uctxt(dev_t *,
756     uverbs_module_context_t *, minor_t);
757 
758 /*
759  * Function:
760  *	sol_uverbs_open
761  * Input:
762  *	devp	- A pointer to the device number.
763  *	flag	- Flags specified by caller (FEXCL, FNDELAY, FREAD, FWRITE).
764  *	otyp	- Open type (OTYP_BLK, OTYP_CHR, OTYP_LYR).
765  *	cred	- Pointer to the callers credentials.
766  * Output:
767  *	devp	- On success devp has been cloned to point to a unique minor
768  *		  device.
769  * Returns:
770  *	DDI_SUCCESS on success, else error code.
771  * Description:
772  * 	Handles a user process open of a specific user verbs minor device by
773  *	allocating a user context user object and creating a unique device
774  *	to identify the user.  Note: The first SOL_UVERBS_DRIVER_MAX_MINOR
775  *	minor numbers are reserved for :
776  *		0 to SOL_UVERBS_DRIVER_MAX_HCA_MINOR - 1 : actual HCA devices
777  *		SOL_UVERBS_DRIVER_MAX_HCA_MINOR		 : UCMA node
778  *		SOL_UVERBS_DRIVER_EVENT_MINOR		 :
779  *			Event file for opening an event file for completion
780  *			or async notifications.
781  */
782 /* ARGSUSED */
783 static int
784 sol_uverbs_open(dev_t *devp, int flag, int otyp, cred_t *cred)
785 {
786 	uverbs_module_context_t	*mod_ctxt;
787 	uverbs_uctxt_uobj_t	*uctxt;
788 	int			minor;
789 
790 	/* Char only */
791 	if (otyp != OTYP_CHR) {
792 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
793 		    "open: not CHR");
794 		return (EINVAL);
795 	}
796 
797 	mod_ctxt = ddi_get_soft_state(statep, 0);
798 	if (mod_ctxt == NULL) {
799 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
800 		    "open: get soft state failed");
801 		return (ENXIO);
802 	}
803 
804 	minor = getminor(*devp);
805 
806 	/*
807 	 * Special case of ucma module.
808 	 */
809 	if (minor == SOL_UVERBS_DRIVER_MAX_HCA_MINOR) {
810 		extern cred_t	*kcred;
811 
812 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
813 		    "open: ucma_open");
814 		if (cred != kcred) {
815 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
816 			    "open: ucma_open non-kernel context");
817 			return (ENOTSUP);
818 		}
819 
820 		return (DDI_SUCCESS);
821 	}
822 
823 	/*
824 	 * If this is not an open for sol_uverbs event file,
825 	 * A device minor number must be less than the user verb max
826 	 * minor device number and the HCA count.
827 	 */
828 	if (minor != SOL_UVERBS_DRIVER_EVENT_MINOR &&
829 	    (minor >= SOL_UVERBS_DRIVER_MAX_HCA_MINOR ||
830 	    minor >= mod_ctxt->hca_count)) {
831 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
832 		    "open: bad minor %d", minor);
833 		return (ENODEV);
834 	}
835 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "open() - minor %d", minor);
836 
837 	/*
838 	 * Allocate a user context and return a unique ID that can be used
839 	 * in identify the new user context object.  Create a clone device
840 	 * that uses this unique ID as the minor number.  Allocation of the
841 	 * user context object places one reference against it; which will
842 	 * be held until the device is closed.
843 	 *
844 	 * sol_uverbs_alloc_uctxt() returns a sucessful allocation of uctx
845 	 * with the uobj uo_lock held for WRITTER.
846 	 */
847 	uctxt = sol_uverbs_alloc_uctxt(devp, mod_ctxt, minor);
848 	if (!uctxt)  {
849 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
850 		    "open: user context alloc failed");
851 		return (ENODEV);
852 	}
853 
854 	/*
855 	 * Indicate the object is alive and release the user object write lock
856 	 * which was placed on the user context at allocation.
857 	 */
858 	uctxt->uobj.uo_live = 1;
859 	rw_exit(&uctxt->uobj.uo_lock);
860 
861 	return (DDI_SUCCESS);
862 }
863 
864 /*
865  * Function:
866  *	sol_uverbs_close
867  * Input:
868  *	dev	- Device number.
869  *	flag	- File status flag.
870  *	otyp	- Open type.
871  *	cred	- A pointer to the callers credientials.
872  * Output:
873  *	None
874  * Returns:
875  *	DDI_SUCCESS on success, else error code.
876  * Description:
877  * 	Handles a user process close of a specific user verbs minor device by
878  *	freeing any user objects this process may still have allocated and
879  * 	deleting the associated user context object.
880  */
881 /* ARGSUSED */
882 static int
883 sol_uverbs_close(dev_t dev, int flag, int otyp, cred_t *cred)
884 {
885 	minor_t			id = getminor(dev);
886 	genlist_entry_t		*entry, *new_entry;
887 	uverbs_uctxt_uobj_t	*uctxt;
888 	int			rc;
889 	genlist_t		tmp_genlist;
890 
891 	/*
892 	 * HCA specific device nodes created during attach are been
893 	 * closed. Return SUCCESS.
894 	 */
895 	if (id < SOL_UVERBS_DRIVER_MAX_MINOR) {
896 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
897 		    "uverbs_close: dev_t %x, minor %x < %x",
898 		    dev, id, SOL_UVERBS_DRIVER_MAX_MINOR);
899 		return (0);
900 	}
901 
902 	/*
903 	 * Must be a user or kernel open, i.e. not a minor node that
904 	 * that represents a user verbs device.  If it is the UCMA
905 	 * nothing needs to be done.
906 	 */
907 	if (id == SOL_UVERBS_DRIVER_MAX_HCA_MINOR) {
908 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
909 		    "uverbs_close: ucma close");
910 		return (DDI_SUCCESS);
911 	}
912 
913 	uctxt = uverbs_uobj_get_uctxt_write(id - SOL_UVERBS_DRIVER_MAX_MINOR);
914 	if (uctxt == NULL) {
915 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
916 		    "uverbs_close: Unknown user context");
917 		return (ENXIO);
918 	}
919 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_close- "
920 	    "uctxt %p", uctxt);
921 
922 	/*
923 	 * Remove from the user context resource table, cleanup all
924 	 * user resources that may still be hanging around.
925 	 */
926 	if (!sol_ofs_uobj_remove(&uverbs_uctxt_uo_tbl, &uctxt->uobj)) {
927 		/*
928 		 * It was already removed, drop the lock held from
929 		 * get above and exit.
930 		 */
931 		sol_ofs_uobj_put(&uctxt->uobj);
932 		return (ENXIO);
933 	}
934 
935 	if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC ||
936 	    uctxt->uctxt_type == SOL_UVERBS_UCTXT_COMPL) {
937 		uverbs_uctxt_uobj_t	*verbs_uctxt;
938 
939 		SOL_OFS_DPRINTF_L4(sol_uverbs_dbg_str,
940 		    "uverbs_close: Async or Compl user context");
941 
942 		/*
943 		 * Verbs uctxt has already been freed, just return.
944 		 */
945 		if (!uctxt->uctxt_verbs_id) {
946 			sol_ofs_uobj_put(&uctxt->uobj);
947 			sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free);
948 			return (0);
949 		}
950 
951 		/*
952 		 * Verbs uctxt has not been freed. Close the ufile. This
953 		 * also frees the ufile if reference count is 0.
954 		 */
955 		verbs_uctxt = uverbs_uobj_get_uctxt_write(
956 		    uctxt->uctxt_verbs_id - SOL_UVERBS_DRIVER_MAX_MINOR);
957 
958 		if (verbs_uctxt &&
959 		    uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC) {
960 			sol_uverbs_event_file_close(verbs_uctxt->async_evfile);
961 			verbs_uctxt->async_evfile = NULL;
962 		} else if (uctxt->comp_evfile) {
963 			uctxt->comp_evfile = NULL;
964 		}
965 		if (verbs_uctxt)
966 			sol_ofs_uobj_put(&verbs_uctxt->uobj);
967 
968 		sol_ofs_uobj_put(&uctxt->uobj);
969 		sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free);
970 		return (0);
971 	} else if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_EVENT) {
972 		sol_ofs_uobj_put(&uctxt->uobj);
973 		sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free);
974 		return (0);
975 	}
976 
977 	ASSERT(uctxt->hca != NULL);
978 
979 	/*
980 	 * Release resources that may still be held by this user context.
981 	 * Remove the resources from the associated resource managment
982 	 * table and free it.
983 	 */
984 	mutex_enter(&uctxt->lock);
985 
986 	entry = remove_genlist_head(&uctxt->ah_list);
987 	while (entry) {
988 		uverbs_uah_uobj_t *uah = (uverbs_uah_uobj_t *)entry->data;
989 
990 		rw_enter(&(uah->uobj.uo_lock), RW_WRITER);
991 		(void) sol_ofs_uobj_remove(&uverbs_uah_uo_tbl, &uah->uobj);
992 		rw_exit(&(uah->uobj.uo_lock));
993 		(void) ibt_free_ah(uctxt->hca->hdl, uah->ah);
994 		sol_ofs_uobj_free(&uah->uobj);
995 
996 		kmem_free((void *)entry, sizeof (genlist_entry_t));
997 		entry = remove_genlist_head(&uctxt->ah_list);
998 	}
999 
1000 	init_genlist(&tmp_genlist);
1001 	entry = remove_genlist_head(&uctxt->qp_list);
1002 	while (entry) {
1003 		uverbs_uqp_uobj_t *uqp = (uverbs_uqp_uobj_t *)entry->data;
1004 
1005 		/* Free unreaped asynchronous events.  */
1006 		uverbs_release_uqp_uevents(uctxt->async_evfile, uqp);
1007 
1008 		/*
1009 		 * If ucma has disabled QP free for this QP, set the
1010 		 * uqp_free_state to FREE_PENDING. Free QP if not.
1011 		 */
1012 		rw_enter(&(uqp->uobj.uo_lock), RW_WRITER);
1013 		if (uqp->uqp_free_state != SOL_UVERBS2UCMA_ENABLE_QP_FREE) {
1014 			new_entry = add_genlist(&tmp_genlist, entry->data,
1015 			    entry->data_context);
1016 			uqp->list_entry = new_entry;
1017 			uqp->uqp_free_state = SOL_UVERBS2UCMA_FREE_PENDING;
1018 			rw_exit(&(uqp->uobj.uo_lock));
1019 		} else {
1020 			uqp->list_entry = NULL;
1021 			mutex_exit(&uctxt->lock);
1022 			sol_ofs_uobj_ref(&uqp->uobj);
1023 			rc = uverbs_uqp_free(uqp, uctxt);
1024 			mutex_enter(&uctxt->lock);
1025 			if (rc)
1026 				SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1027 				    "uqp_free(%p) failed", uqp);
1028 		}
1029 		kmem_free(entry, sizeof (genlist_entry_t));
1030 		entry = remove_genlist_head(&uctxt->qp_list);
1031 	}
1032 	(uctxt->qp_list).count = tmp_genlist.count;
1033 	(uctxt->qp_list).head = tmp_genlist.head;
1034 	(uctxt->qp_list).tail = tmp_genlist.tail;
1035 
1036 	init_genlist(&tmp_genlist);
1037 	entry = remove_genlist_head(&uctxt->cq_list);
1038 	while (entry) {
1039 		uverbs_ucq_uobj_t *ucq = (uverbs_ucq_uobj_t *)entry->data;
1040 
1041 		rw_enter(&(ucq->uobj.uo_lock), RW_WRITER);
1042 
1043 		/* Free events associated with the CQ.  */
1044 		uverbs_release_ucq_channel(uctxt, ucq->comp_chan, ucq);
1045 
1046 		if (ucq->active_qp_cnt) {
1047 			new_entry = add_genlist(&tmp_genlist, entry->data,
1048 			    entry->data_context);
1049 			ucq->list_entry = new_entry;
1050 			ucq->free_pending = 1;
1051 			rw_exit(&(ucq->uobj.uo_lock));
1052 		} else {
1053 			ucq->list_entry = NULL;
1054 			sol_ofs_uobj_ref(&ucq->uobj);
1055 			mutex_exit(&uctxt->lock);
1056 			rc = uverbs_ucq_free(ucq, uctxt);
1057 			mutex_enter(&uctxt->lock);
1058 			if (rc)
1059 				SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1060 				    "ucq_free(%p) failed", ucq);
1061 		}
1062 
1063 		kmem_free((void *)entry, sizeof (genlist_entry_t));
1064 		entry = remove_genlist_head(&uctxt->cq_list);
1065 	}
1066 	(uctxt->cq_list).count = tmp_genlist.count;
1067 	(uctxt->cq_list).head = tmp_genlist.head;
1068 	(uctxt->cq_list).tail = tmp_genlist.tail;
1069 
1070 	init_genlist(&tmp_genlist);
1071 	entry = remove_genlist_head(&uctxt->srq_list);
1072 	while (entry) {
1073 		uverbs_usrq_uobj_t *usrq = (uverbs_usrq_uobj_t *)entry->data;
1074 
1075 		rw_enter(&(usrq->uobj.uo_lock), RW_WRITER);
1076 
1077 		/* Free unreaped asynchronous events.  */
1078 		uverbs_release_usrq_uevents(uctxt->async_evfile, usrq);
1079 
1080 		if (usrq->active_qp_cnt) {
1081 			new_entry = add_genlist(&tmp_genlist, entry->data,
1082 			    entry->data_context);
1083 			usrq->list_entry = new_entry;
1084 			usrq->free_pending = 1;
1085 			rw_exit(&(usrq->uobj.uo_lock));
1086 		} else {
1087 			usrq->list_entry = NULL;
1088 			sol_ofs_uobj_ref(&usrq->uobj);
1089 			mutex_exit(&uctxt->lock);
1090 			rc = uverbs_usrq_free(usrq, uctxt);
1091 			mutex_enter(&uctxt->lock);
1092 			if (rc)
1093 				SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1094 				    "usrq_free(%p) failed", usrq);
1095 		}
1096 
1097 		kmem_free((void *)entry, sizeof (genlist_entry_t));
1098 		entry = remove_genlist_head(&uctxt->srq_list);
1099 	}
1100 	(uctxt->srq_list).count = tmp_genlist.count;
1101 	(uctxt->srq_list).head = tmp_genlist.head;
1102 	(uctxt->srq_list).tail = tmp_genlist.tail;
1103 
1104 	entry = remove_genlist_head(&uctxt->mr_list);
1105 	while (entry) {
1106 		uverbs_umr_uobj_t *umr = (uverbs_umr_uobj_t *)entry->data;
1107 
1108 		rw_enter(&(umr->uobj.uo_lock), RW_WRITER);
1109 		(void) sol_ofs_uobj_remove(&uverbs_umr_uo_tbl, &umr->uobj);
1110 		rw_exit(&(umr->uobj.uo_lock));
1111 
1112 		(void) ibt_deregister_mr(uctxt->hca->hdl, umr->mr);
1113 		sol_ofs_uobj_free(&umr->uobj);
1114 
1115 		kmem_free((void *)entry, sizeof (genlist_entry_t));
1116 		entry = remove_genlist_head(&uctxt->mr_list);
1117 	}
1118 
1119 	entry = remove_genlist_head(&uctxt->pd_list);
1120 	while (entry) {
1121 		uverbs_upd_uobj_t *upd = (uverbs_upd_uobj_t *)entry->data;
1122 
1123 		rw_enter(&(upd->uobj.uo_lock), RW_WRITER);
1124 		if (upd->active_qp_cnt) {
1125 			new_entry = add_genlist(&tmp_genlist, entry->data,
1126 			    entry->data_context);
1127 			upd->list_entry = new_entry;
1128 			upd->free_pending = 1;
1129 			rw_exit(&(upd->uobj.uo_lock));
1130 		} else {
1131 			upd->list_entry = NULL;
1132 			sol_ofs_uobj_ref(&upd->uobj);
1133 			mutex_exit(&uctxt->lock);
1134 			rc = uverbs_upd_free(upd, uctxt);
1135 			mutex_enter(&uctxt->lock);
1136 			if (rc)
1137 				SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1138 				    "upd_free(%p) failed", upd);
1139 		}
1140 
1141 		kmem_free((void *)entry, sizeof (genlist_entry_t));
1142 		entry = remove_genlist_head(&uctxt->pd_list);
1143 	}
1144 	(uctxt->pd_list).count = tmp_genlist.count;
1145 	(uctxt->pd_list).head = tmp_genlist.head;
1146 	(uctxt->pd_list).tail = tmp_genlist.tail;
1147 
1148 	mutex_exit(&uctxt->lock);
1149 
1150 	/*
1151 	 * Release the user file structure to the async file if it
1152 	 * has not be released yet. The uctxt for async file will
1153 	 * be closed when the async file is closed.
1154 	 */
1155 	if (uctxt->async_evfile) {
1156 		uverbs_uctxt_uobj_t	*async_uctxt;
1157 
1158 		async_uctxt = uverbs_uobj_get_uctxt_write(
1159 		    uctxt->uctxt_async_id -
1160 		    SOL_UVERBS_DRIVER_MAX_MINOR);
1161 		if (!async_uctxt) {
1162 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1163 			    "uverbs_close: Invalid async_id %x",
1164 			    uctxt->uctxt_async_id);
1165 			sol_ofs_uobj_put(&uctxt->uobj);
1166 			return (ENXIO);
1167 		}
1168 
1169 		async_uctxt->uctxt_verbs_id = 0;
1170 		sol_uverbs_event_file_close(uctxt->async_evfile);
1171 		uctxt->async_evfile = NULL;
1172 		sol_ofs_uobj_put(&async_uctxt->uobj);
1173 	}
1174 
1175 	/*
1176 	 * Release the write lock and the reference from the get above, and
1177 	 * release the reference placed on the user context as process open
1178 	 * to release context.
1179 	 */
1180 	sol_ofs_uobj_put(&uctxt->uobj);
1181 
1182 	/*
1183 	 * If some QPs have not been freed, donot free the uctxt.
1184 	 * Set uctxt_free_pending flag. This will be freed when
1185 	 * the QP will be freed.
1186 	 */
1187 	if ((uctxt->qp_list).count) {
1188 		SOL_OFS_DPRINTF_L3(sol_uverbs_dbg_str,
1189 		    "close: uctxt %p, has pending uqp", uctxt);
1190 		uctxt->uctxt_free_pending = 1;
1191 		return (0);
1192 	}
1193 
1194 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1195 	    "close: deallocated user context: %p, ref = %d",
1196 	    (void *)uctxt, uctxt->uobj.uo_refcnt);
1197 
1198 	sol_ofs_uobj_deref(&uctxt->uobj, sol_ofs_uobj_free);
1199 
1200 	return (0);
1201 }
1202 
1203 /*
1204  * Function:
1205  *	sol_uverbs_read
1206  * Input:
1207  *	dev	- Device number.
1208  *	uiop	- Pointer to the uio structgure where data is to be stored.
1209  *	credp	- A pointer to the credentials for the I/O transaction.
1210  * Output:
1211  *	None
1212  * Returns:
1213  *	DDI_SUCCESS on success, else error code.
1214  * Description:
1215  * 	User process read stub.
1216  */
1217 static int
1218 sol_uverbs_read(dev_t dev, struct uio *uiop, cred_t *credp)
1219 {
1220 	minor_t			id = getminor(dev);
1221 	uverbs_uctxt_uobj_t	*uctxt, *verbs_uctxt;
1222 	int			rc;
1223 
1224 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_read(%x, %p, %p)",
1225 	    dev, uiop, credp);
1226 
1227 	ASSERT(id >= SOL_UVERBS_DRIVER_MAX_MINOR);
1228 	uctxt = uverbs_uobj_get_uctxt_read(id - SOL_UVERBS_DRIVER_MAX_MINOR);
1229 	if (uctxt == NULL) {
1230 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1231 		    "uverbs_read: Failed get user context");
1232 		return (ENXIO);
1233 	}
1234 
1235 	if (uctxt->uctxt_verbs_id < SOL_UVERBS_DRIVER_MAX_MINOR) {
1236 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1237 		    "uverbs_read: Invalid Verbs user context id, %x",
1238 		    uctxt->uctxt_verbs_id);
1239 		sol_ofs_uobj_put(&uctxt->uobj);
1240 		return (ENXIO);
1241 	}
1242 	verbs_uctxt = uverbs_uobj_get_uctxt_read(uctxt->uctxt_verbs_id
1243 	    - SOL_UVERBS_DRIVER_MAX_MINOR);
1244 	if (verbs_uctxt == NULL) {
1245 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1246 		    "uverbs_read: Failed get verbs user context");
1247 		sol_ofs_uobj_put(&uctxt->uobj);
1248 		return (ENXIO);
1249 	}
1250 	if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC) {
1251 		ASSERT(verbs_uctxt->async_evfile);
1252 		rc = sol_uverbs_event_file_read(verbs_uctxt->async_evfile,
1253 		    uiop, credp);
1254 	} else if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_COMPL) {
1255 		rc = sol_uverbs_event_file_read(uctxt->comp_evfile,
1256 		    uiop, credp);
1257 	} else {
1258 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1259 		    "uverbs_read: invalid user context type %x",
1260 		    uctxt->uctxt_type);
1261 		rc = ENXIO;
1262 	}
1263 
1264 	sol_ofs_uobj_put(&verbs_uctxt->uobj);
1265 	sol_ofs_uobj_put(&uctxt->uobj);
1266 	return (rc);
1267 }
1268 
1269 /*
1270  * Function:
1271  *	sol_uverbs_mmap
1272  * Input:
1273  *	dev		- Device whose memory is to be mapped.
1274  *	sol_uverbs_mmap	- Offset within the device memory at which mapping
1275  *			  begins.
1276  *	prot		- Bitmask specifying protection.
1277  * Output:
1278  *	None
1279  * Returns:
1280  *	DDI_SUCCESS on success, else error code.
1281  * Description:
1282  * 	User process mmap stub.  Mmap operations are performed directly
1283  *	by the underlying IB HCA driver, bypassing the user verbs.
1284  */
1285 /* ARGSUSED */
1286 static int
1287 sol_uverbs_mmap(dev_t dev, off_t mmap_offset, int prot)
1288 {
1289 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1290 	    "sol_uverbs_mmap(%d)-  not yet used", mmap_offset);
1291 	return (DDI_SUCCESS);
1292 }
1293 
1294 /*
1295  * Function:
1296  *	sol_uverbs_get_context
1297  * Input:
1298  *	uctxt   - Pointer to the callers user context.
1299  *	buf     - Pointer to kernel buffer containing command.
1300  *	in_len  - Length in bytes of input command buffer.
1301  *	out_len - Length in bytes of output response buffer.
1302  * Output:
1303  *	The command output buffer is updated with command results.
1304  * Returns:
1305  *	DDI_SUCCESS on success, else error code.
1306  * Description:
1307  * 	User verb entry point to return the unique user context to the process
1308  *	that opened the associated user verb driver instance.  Note that upon
1309  *	entry a reference will have already been placed on the user
1310  *	context user space object, so an additional reference is not
1311  *	required here.
1312  */
1313 int
1314 sol_uverbs_get_context(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1315     int out_len)
1316 {
1317 	struct mthca_alloc_ucontext_resp	uresp;
1318 	struct ib_uverbs_get_context		cmd;
1319 	struct ib_uverbs_get_context_resp	resp;
1320 	struct ib_udata				udata;
1321 	int					rc;
1322 	minor_t					async_id;
1323 	uverbs_uctxt_uobj_t			*async_uctxt;
1324 
1325 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1326 	    "uverbs_get_context() - buf %p, sizeof (cmd) %d",
1327 	    buf, sizeof (cmd));
1328 
1329 	ASSERT(uctxt->hca);
1330 
1331 	(void) memcpy(&cmd, buf, sizeof (cmd));
1332 
1333 	udata.inbuf  = (void *)(buf + sizeof (cmd));
1334 #ifdef	_LP64
1335 	udata.outbuf = (void *)(cmd.response.r_laddr + sizeof (resp));
1336 #else
1337 	udata.outbuf = (void *)(cmd.response.r_addr + sizeof (resp));
1338 #endif
1339 	udata.inlen  = in_len - sizeof (cmd);
1340 	udata.outlen = out_len - sizeof (resp);
1341 
1342 	/*
1343 	 * libibverbs will have passed minor of the async file in
1344 	 * resp.fd. Use this to determine the uctxt created for
1345 	 * asyncs.
1346 	 */
1347 #ifdef	_LP64
1348 	rc = copyin((void*)cmd.response.r_laddr, (void*)&resp, sizeof (resp));
1349 #else
1350 	rc = copyin((void*)cmd.response.r_addr, (void*)&resp, sizeof (resp));
1351 #endif
1352 	if (rc != 0) {
1353 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1354 		    "get_context: copyin (rc=%d)", rc);
1355 		rc = EFAULT;
1356 		goto out;
1357 	}
1358 	async_id = resp.async_fd;
1359 	if (async_id < SOL_UVERBS_DRIVER_MAX_MINOR) {
1360 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1361 		    "get_context: Invalid async user context "
1362 		    "id %x", async_id);
1363 		return (ENXIO);
1364 	}
1365 
1366 	async_uctxt = uverbs_uobj_get_uctxt_read(async_id -
1367 	    SOL_UVERBS_DRIVER_MAX_MINOR);
1368 	if (async_uctxt == NULL) {
1369 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1370 		    "get_context: Failed get async user context");
1371 		return (ENXIO);
1372 	}
1373 	if (async_uctxt->uctxt_type != SOL_UVERBS_UCTXT_EVENT ||
1374 	    async_uctxt->uctxt_verbs_id != 0) {
1375 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1376 		    "get_context: Invalid user context - "
1377 		    "possibly reused");
1378 		return (ENXIO);
1379 	}
1380 	async_uctxt->uctxt_type = SOL_UVERBS_UCTXT_ASYNC;
1381 	async_uctxt->uctxt_verbs_id = uctxt->uobj.uo_id +
1382 	    SOL_UVERBS_DRIVER_MAX_MINOR;
1383 	uctxt->uctxt_async_id = async_id;
1384 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1385 	    "get_context: uctxt %p, async_uctxt %p, async_id %x",
1386 	    uctxt, async_uctxt, async_id);
1387 	sol_ofs_uobj_put(&async_uctxt->uobj);
1388 
1389 	uctxt->async_evfile = uverbs_alloc_event_file(uctxt, 1);
1390 	if (!uctxt->async_evfile) {
1391 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1392 		    "get_context: async event file allocation failed");
1393 		goto out;
1394 	}
1395 
1396 	(void) memset(&resp, 0, sizeof (resp));
1397 	resp.num_comp_vectors 	= 1;
1398 
1399 #ifdef	_LP64
1400 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
1401 #else
1402 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
1403 #endif
1404 	if (rc != 0) {
1405 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1406 		    "get_context: copyout (rc=%d)", rc);
1407 		rc = EFAULT;
1408 		goto out;
1409 	}
1410 
1411 	/*
1412 	 * This unfortunately is Mellanox specific, we need to consider moving
1413 	 * this directly into the command response as opaque data, instead of
1414 	 * using this method.
1415 	 */
1416 	(void) memset(&uresp, 0, sizeof (uresp));
1417 	uresp.uarc_size   = 0;
1418 	uresp.qp_tab_size = uctxt->hca->attr.hca_max_chans;
1419 
1420 	rc = copyout((void*)&uresp, (void*)udata.outbuf, sizeof (uresp));
1421 	if (rc != 0) {
1422 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1423 		    "get_context: copyout outbuf (rc=%d)", rc);
1424 		rc = EFAULT;
1425 		goto out;
1426 	}
1427 	rc = DDI_SUCCESS;
1428 
1429 out:
1430 	return (rc);
1431 }
1432 
1433 /*
1434  * Function:
1435  *	sol_uverbs_alloc_pd
1436  * Input:
1437  *	uctxt   - Pointer to the callers user context.
1438  *	buf     - Pointer to kernel buffer containing a alloc PD command.
1439  *	in_len  - Length in bytes of input command buffer.
1440  *	out_len - Length in bytes of output response buffer.
1441  * Output:
1442  *	The command output buffer is updated with command results.
1443  * Returns:
1444  *	DDI_SUCCESS on success, else error code.
1445  * Description:
1446  * 	User verb entry point to allocate a device protection domain.
1447  */
1448 /* ARGSUSED */
1449 int
1450 sol_uverbs_alloc_pd(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1451     int out_len)
1452 {
1453 	struct ib_uverbs_alloc_pd	cmd;
1454 	struct ib_uverbs_alloc_pd_resp	resp;
1455 	uverbs_upd_uobj_t		*upd;
1456 	int				rc;
1457 
1458 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "alloc_pd()");
1459 
1460 	(void) memcpy(&cmd, buf, sizeof (cmd));
1461 	(void) memset(&resp, 0, sizeof (resp));
1462 
1463 	upd = kmem_zalloc(sizeof (*upd), KM_NOSLEEP);
1464 	if (upd == NULL) {
1465 		rc = ENOMEM;
1466 		goto out;
1467 	}
1468 	sol_ofs_uobj_init(&upd->uobj, 0, SOL_UVERBS_UPD_UOBJ_TYPE);
1469 	rw_enter(&upd->uobj.uo_lock, RW_WRITER);
1470 
1471 	rc = ibt_alloc_pd(uctxt->hca->hdl, IBT_PD_NO_FLAGS, &upd->pd);
1472 	if (rc != IBT_SUCCESS) {
1473 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1474 		    "alloc_pd: ibt_alloc_pd() (rc=%d)", rc);
1475 		rc = sol_uverbs_ibt_to_kernel_status(rc);
1476 		upd->uobj.uo_uobj_sz = sizeof (uverbs_upd_uobj_t);
1477 		goto alloc_err;
1478 	}
1479 
1480 	if (sol_ofs_uobj_add(&uverbs_upd_uo_tbl, &upd->uobj) != 0) {
1481 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1482 		    "alloc_pd: User object add failed");
1483 		rc = ENOMEM;
1484 		goto err_add_uobj;
1485 	}
1486 	resp.pd_handle = upd->uobj.uo_id;
1487 
1488 	/*
1489 	 * Query underlying hardware driver for data that may be required
1490 	 * when using the PD in an OS Bypass creation of UD address vectors.
1491 	 */
1492 	rc = ibt_ci_data_out(uctxt->hca->hdl, IBT_CI_NO_FLAGS, IBT_HDL_PD,
1493 	    (void *)upd->pd, &resp.drv_out, sizeof (resp.drv_out));
1494 	if (rc != IBT_SUCCESS) {
1495 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1496 		    "alloc_pd: ibt_ci_data_out() (rc=%d)", rc);
1497 		rc = EFAULT;
1498 		goto err_response;
1499 	}
1500 
1501 #ifdef	_LP64
1502 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
1503 #else
1504 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
1505 #endif
1506 	if (rc != 0) {
1507 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1508 		    "alloc_pd: copyout fail (rc=%d)", rc);
1509 		rc = EFAULT;
1510 		goto err_response;
1511 	}
1512 
1513 	mutex_enter(&uctxt->lock);
1514 	upd->list_entry = add_genlist(&uctxt->pd_list, (uintptr_t)upd, uctxt);
1515 	mutex_exit(&uctxt->lock);
1516 
1517 	if (!upd->list_entry) {
1518 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1519 		    "alloc_pd: Error adding upd to pd_list\n");
1520 		rc = ENOMEM;
1521 		goto err_response;
1522 	}
1523 
1524 	upd->uobj.uo_live = 1;
1525 	rw_exit(&upd->uobj.uo_lock);
1526 	return (DDI_SUCCESS);
1527 
1528 err_response:
1529 	/*
1530 	 * Need to set uo_live, so sol_ofs_uobj_remove() will
1531 	 * remove the object from the object table.
1532 	 */
1533 	upd->uobj.uo_live = 1;
1534 	(void) sol_ofs_uobj_remove(&uverbs_upd_uo_tbl, &upd->uobj);
1535 
1536 err_add_uobj:
1537 	(void) ibt_free_pd(uctxt->hca->hdl, upd->pd);
1538 
1539 alloc_err:
1540 	rw_exit(&upd->uobj.uo_lock);
1541 	sol_ofs_uobj_deref(&upd->uobj, sol_ofs_uobj_free);
1542 out:
1543 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1544 	    "alloc_pd:error (rc=%d)", rc);
1545 	return (rc);
1546 }
1547 
1548 int
1549 uverbs_upd_free(uverbs_upd_uobj_t *upd, uverbs_uctxt_uobj_t *uctxt)
1550 {
1551 	int	rc;
1552 
1553 	rc = ibt_free_pd(uctxt->hca->hdl, upd->pd);
1554 	if (rc != IBT_SUCCESS) {
1555 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1556 		    "uverbs_upd_free: ibt_free_pd() failed %d", rc);
1557 		rc = sol_uverbs_ibt_to_kernel_status(rc);
1558 		sol_ofs_uobj_put(&upd->uobj);
1559 		return (rc);
1560 	}
1561 
1562 	/*
1563 	 * Remove from the list of this contexts PD resources, then remove from
1564 	 * the resource managment table and the reference placed on the user
1565 	 * object at PD allocation.
1566 	 */
1567 	upd->pd = NULL;
1568 	if (upd->list_entry) {
1569 		mutex_enter(&uctxt->lock);
1570 		delete_genlist(&uctxt->pd_list, upd->list_entry);
1571 		mutex_exit(&uctxt->lock);
1572 	}
1573 
1574 	/*
1575 	 * list_entry is NULL when called from sol_uverbs_close. Remove
1576 	 * from upd_uo_tbl and free upd, when called from close also.
1577 	 */
1578 	sol_ofs_uobj_put(&upd->uobj);
1579 	(void) sol_ofs_uobj_remove(&uverbs_upd_uo_tbl, &upd->uobj);
1580 	sol_ofs_uobj_deref(&upd->uobj, sol_ofs_uobj_free);
1581 	return (0);
1582 }
1583 
1584 /*
1585  * Function:
1586  *	sol_uverbs_dealloc_pd
1587  * Input:
1588  *	uctxt   - Pointer to the callers user context.
1589  *	buf     - Pointer to kernel buffer containing dealloc PD command.
1590  *	in_len  - Length in bytes of input command buffer.
1591  *	out_len - Length in bytes of output response buffer.
1592  * Output:
1593  *	The command output buffer is updated with command results.
1594  * Returns:
1595  *	DDI_SUCCESS on success, else error code.
1596  * Description:
1597  * 	User verb entry point to de-allocate a device protection domain.
1598  */
1599 /* ARGSUSED */
1600 int
1601 sol_uverbs_dealloc_pd(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1602     int out_len)
1603 {
1604 	struct ib_uverbs_dealloc_pd	cmd;
1605 	uverbs_upd_uobj_t		*upd;
1606 	int				rc = 0;
1607 
1608 	(void) memcpy(&cmd, buf, sizeof (cmd));
1609 
1610 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1611 	    "dealloc_pd(%d)", cmd.pd_handle);
1612 
1613 	upd = uverbs_uobj_get_upd_write(cmd.pd_handle);
1614 	if (upd == NULL) {
1615 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1616 		    "dealloc_pd(%d) : invalid hdl", cmd.pd_handle);
1617 		rc = EINVAL;
1618 		goto err_out1;
1619 	}
1620 
1621 	if (upd->active_qp_cnt) {
1622 		sol_ofs_uobj_put(&upd->uobj);
1623 		rc = EBUSY;
1624 	} else {
1625 		rc = uverbs_upd_free(upd, uctxt);
1626 	}
1627 	cmd.pd_handle = 0;
1628 	return (rc);
1629 
1630 err_out1:
1631 	return (rc);
1632 }
1633 
1634 /*
1635  * Function:
1636  *	sol_uverbs_query_device
1637  * Input:
1638  *	uctxt   - Pointer to the callers user context.
1639  *	buf     - Pointer to kernel buffer containing query device command.
1640  *	in_len  - Length in bytes of input command buffer.
1641  *	out_len - Length in bytes of output response buffer.
1642  * Output:
1643  *	The command output buffer is updated with command results.
1644  * Returns:
1645  *	DDI_SUCCESS on success, else error code.
1646  * Description:
1647  * 	User verb entry point to query device attributes.
1648  */
1649 /* ARGSUSED */
1650 int
1651 sol_uverbs_query_device(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1652 	int out_len)
1653 {
1654 	struct ib_uverbs_query_device		cmd;
1655 	struct ib_uverbs_query_device_resp	resp;
1656 	ibt_hca_attr_t				hca_attr;
1657 	int					rc;
1658 
1659 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_device()");
1660 
1661 	(void) memcpy(&cmd, buf, sizeof (cmd));
1662 	rc = ibt_query_hca(uctxt->hca->hdl, &hca_attr);
1663 	if (rc != IBT_SUCCESS) {
1664 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1665 		    "query_device: ibt_query_hca() (rc=%d)", rc);
1666 		rc = sol_uverbs_ibt_to_kernel_status(rc);
1667 		goto out;
1668 	}
1669 
1670 	(void) memset(&resp, 0, sizeof (resp));
1671 
1672 	resp.fw_ver = ((uint64_t)hca_attr.hca_fw_major_version << 32) |
1673 	    ((uint64_t)hca_attr.hca_fw_minor_version << 16) |
1674 	    ((uint64_t)hca_attr.hca_fw_micro_version);
1675 
1676 	/*
1677 	 * NOTE: node guid and system image guid must be returned in big
1678 	 * endian (network order).  On solaris these are in host
1679 	 * order, so we swap it back here.
1680 	 */
1681 	resp.node_guid			= htonll(hca_attr.hca_node_guid);
1682 	resp.sys_image_guid		= htonll(hca_attr.hca_si_guid);
1683 
1684 	resp.max_mr_size		= hca_attr.hca_max_memr_len;
1685 
1686 	resp.page_size_cap =
1687 	    sol_uverbs_ibt_to_of_page_sz(hca_attr.hca_page_sz);
1688 
1689 	resp.vendor_id			= hca_attr.hca_vendor_id;
1690 	resp.vendor_part_id		= hca_attr.hca_device_id;
1691 	resp.hw_ver			= hca_attr.hca_version_id;
1692 	resp.max_qp			= hca_attr.hca_max_chans;
1693 	resp.max_qp_wr			= hca_attr.hca_max_chan_sz;
1694 
1695 	resp.device_cap_flags		=
1696 	    sol_uverbs_ibt_to_of_device_cap_flags(hca_attr.hca_flags,
1697 	    hca_attr.hca_flags2);
1698 
1699 	resp.max_sge			= hca_attr.hca_max_sgl;
1700 	resp.max_sge_rd			= hca_attr.hca_max_sgl;
1701 	resp.max_cq			= hca_attr.hca_max_cq;
1702 	resp.max_cqe			= hca_attr.hca_max_cq_sz;
1703 	resp.max_mr			= hca_attr.hca_max_memr;
1704 	resp.max_pd			= hca_attr.hca_max_pd;
1705 	resp.max_qp_rd_atom		= hca_attr.hca_max_rdma_in_chan;
1706 	resp.max_ee_rd_atom		= 0;
1707 	resp.max_res_rd_atom		= hca_attr.hca_max_rsc;
1708 	resp.max_qp_init_rd_atom	= hca_attr.hca_max_rdma_out_chan;
1709 	resp.max_ee_init_rd_atom	= 0;
1710 	if (hca_attr.hca_flags & IBT_HCA_ATOMICS_GLOBAL) {
1711 		resp.atomic_cap = IB_ATOMIC_GLOB;
1712 	} else if (hca_attr.hca_flags & IBT_HCA_ATOMICS_HCA) {
1713 		resp.atomic_cap = IB_ATOMIC_HCA;
1714 	} else {
1715 		resp.atomic_cap = IB_ATOMIC_NONE;
1716 	}
1717 	resp.max_ee			= 0;
1718 	resp.max_rdd			= 0;
1719 	resp.max_mw			= hca_attr.hca_max_mem_win;
1720 	resp.max_raw_ipv6_qp		= hca_attr.hca_max_ipv6_chan;
1721 	resp.max_raw_ethy_qp		= hca_attr.hca_max_ether_chan;
1722 	resp.max_mcast_grp		= hca_attr.hca_max_mcg;
1723 	resp.max_mcast_qp_attach	= hca_attr.hca_max_chan_per_mcg;
1724 	resp.max_total_mcast_qp_attach	= hca_attr.hca_max_mcg_chans;
1725 	resp.max_ah			= hca_attr.hca_max_ud_dest;
1726 	resp.max_fmr			= hca_attr.hca_max_fmrs;
1727 	resp.max_map_per_fmr		= 0;
1728 	resp.max_srq			= hca_attr.hca_max_srqs;
1729 	resp.max_srq_wr			= hca_attr.hca_max_srqs_sz;
1730 	resp.max_srq_sge		= hca_attr.hca_max_srq_sgl;
1731 	resp.max_pkeys			= hca_attr.hca_max_port_pkey_tbl_sz;
1732 	resp.local_ca_ack_delay		= hca_attr.hca_local_ack_delay;
1733 	resp.phys_port_cnt		= hca_attr.hca_nports;
1734 
1735 #ifdef	_LP64
1736 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
1737 #else
1738 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
1739 #endif
1740 	if (rc != 0) {
1741 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1742 		    "query_device: Error writing resp data (rc=%d)", rc);
1743 		rc = EFAULT;
1744 		goto out;
1745 	}
1746 
1747 	rc = DDI_SUCCESS;
1748 
1749 out:
1750 	return (rc);
1751 }
1752 
1753 /*
1754  * Function:
1755  *	sol_uverbs_query_port
1756  * Input:
1757  *	uctxt   - Pointer to the callers user context.
1758  *	buf     - Pointer to kernel buffer containing query port command.
1759  *	in_len  - Length in bytes of input command buffer.
1760  *	out_len - Length in bytes of output response buffer.
1761  * Output:
1762  *	The command output buffer is updated with command results.
1763  * Returns:
1764  *	DDI_SUCCESS on success, else error code.
1765  * Description:
1766  * 	User verb entry point to query a device port attributes.
1767  */
1768 /* ARGSUSED */
1769 int
1770 sol_uverbs_query_port(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1771     int out_len)
1772 {
1773 	struct ib_uverbs_query_port		cmd;
1774 	struct ib_uverbs_query_port_resp	resp;
1775 	ibt_hca_portinfo_t			*port_info;
1776 	uint_t					port_info_n;
1777 	uint_t					port_info_size;
1778 	int					rc;
1779 
1780 	(void) memcpy(&cmd, buf, sizeof (cmd));
1781 
1782 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "sol_uverbs_query_port: %d",
1783 	    cmd.port_num);
1784 
1785 	if (!cmd.port_num || cmd.port_num > uctxt->hca->attr.hca_nports) {
1786 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1787 		    "query_port: Invalid port specified");
1788 
1789 		rc = EINVAL;
1790 		goto out;
1791 	}
1792 
1793 	rc = ibt_query_hca_ports(uctxt->hca->hdl, cmd.port_num, &port_info,
1794 	    &port_info_n, &port_info_size);
1795 
1796 	if (rc != IBT_SUCCESS) {
1797 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1798 		    "query_port: ibt_query_hca_ports() (rc=%d)", rc);
1799 		rc = sol_uverbs_ibt_to_kernel_status(rc);
1800 		goto out;
1801 	}
1802 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_port: "
1803 	    "port_num %d, port_info %x, lid %x, sm_lid %x",
1804 	    cmd.port_num, port_info, port_info->p_opaque1,
1805 	    port_info->p_sm_lid);
1806 
1807 	(void) memset(&resp, 0, sizeof (resp));
1808 
1809 	resp.state			= port_info->p_linkstate;
1810 	resp.max_mtu			= port_info->p_mtu;
1811 	resp.active_mtu			= port_info->p_mtu;
1812 	resp.gid_tbl_len		= port_info->p_sgid_tbl_sz;
1813 	resp.port_cap_flags  		= port_info->p_capabilities;
1814 	resp.max_msg_sz			= port_info->p_msg_sz;
1815 	resp.bad_pkey_cntr   		= port_info->p_pkey_violations;
1816 	resp.qkey_viol_cntr  		= port_info->p_qkey_violations;
1817 	resp.pkey_tbl_len    		= port_info->p_pkey_tbl_sz;
1818 	resp.lid			= port_info->p_opaque1;
1819 	resp.sm_lid			= port_info->p_sm_lid;
1820 	resp.lmc			= port_info->p_lmc;
1821 	resp.max_vl_num			= port_info->p_max_vl;
1822 	resp.sm_sl			= port_info->p_sm_sl;
1823 	resp.subnet_timeout  		= port_info->p_subnet_timeout;
1824 	resp.init_type_reply 		= port_info->p_init_type_reply;
1825 	resp.active_width    		= port_info->p_width_active;
1826 	resp.active_speed    		= port_info->p_speed_active;
1827 	resp.phys_state			= port_info->p_phys_state;
1828 
1829 	ibt_free_portinfo(port_info, port_info_size);
1830 
1831 #ifdef	_LP64
1832 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
1833 #else
1834 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
1835 #endif
1836 	if (rc != 0) {
1837 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1838 		    "query_port : copyout fail %x", rc);
1839 		rc = EFAULT;
1840 		goto out;
1841 	}
1842 
1843 	rc = DDI_SUCCESS;
1844 
1845 out:
1846 	return (rc);
1847 }
1848 
1849 /*
1850  * Function:
1851  *	sol_uverbs_query_gid
1852  * Input:
1853  *	uctxt   - Pointer to the callers user context.
1854  *	buf     - Pointer to kernel buffer containing query gid command.
1855  *	in_len  - Length in bytes of input command buffer.
1856  *	out_len - Length in bytes of output response buffer.
1857  * Output:
1858  *	The command output buffer is updated with command results.
1859  * Returns:
1860  *	DDI_SUCCESS on success, else error code.
1861  * Description:
1862  * 	User verb entry point to query the device gid for the specified
1863  *	port and gid index.
1864  */
1865 /* ARGSUSED */
1866 int
1867 sol_uverbs_query_gid(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1868     int out_len)
1869 {
1870 	struct ib_uverbs_query_gid	cmd;
1871 	struct ib_uverbs_query_gid_resp	resp;
1872 	ibt_hca_portinfo_t		*port_info;
1873 	uint_t				port_info_n;
1874 	uint_t				port_info_size;
1875 	int				rc;
1876 	uint64_t			temp;
1877 
1878 	(void) memcpy(&cmd, buf, sizeof (cmd));
1879 
1880 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1881 	    "query_gid() : port_num %x, gid_index %x",
1882 	    cmd.port_num, cmd.gid_index);
1883 
1884 	if (!cmd.port_num || cmd.port_num > uctxt->hca->attr.hca_nports) {
1885 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1886 		    "query_gid: Invalid port specified");
1887 
1888 		rc = EINVAL;
1889 		goto out;
1890 	}
1891 
1892 	rc = ibt_query_hca_ports(uctxt->hca->hdl, cmd.port_num, &port_info,
1893 	    &port_info_n, &port_info_size);
1894 	if (rc != IBT_SUCCESS) {
1895 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1896 		    "query_gid: ibt_query_hca_ports() (rc=%d)", rc);
1897 		rc = sol_uverbs_ibt_to_kernel_status(rc);
1898 		goto out;
1899 	}
1900 
1901 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "number of "
1902 	    "gid entries %d", cmd.port_num, cmd.gid_index,
1903 	    port_info->p_sgid_tbl_sz);
1904 
1905 
1906 	if (cmd.gid_index >= port_info->p_sgid_tbl_sz) {
1907 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1908 		    "query_gid: cmd gid_index %x > port_info sz %x",
1909 		    cmd.gid_index, port_info->p_sgid_tbl_sz);
1910 		rc = EINVAL;
1911 		ibt_free_portinfo(port_info, port_info_size);
1912 		goto out;
1913 	}
1914 
1915 	/*
1916 	 * The gid must be returned as a network ordered byte array, on solaris
1917 	 * it is a structure in host order so we swap the components as needed.
1918 	 */
1919 	temp = htonll(port_info->p_sgid_tbl[cmd.gid_index].gid.ucast_gid.
1920 	    ugid_prefix);
1921 	(void) memcpy(&resp.gid[0], &temp, sizeof (temp));
1922 	temp = htonll(port_info->p_sgid_tbl[cmd.gid_index].gid.ucast_gid.
1923 	    ugid_guid);
1924 	(void) memcpy(&resp.gid[8], &temp, sizeof (temp));
1925 
1926 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "query_gid: gid = "
1927 	    "0x%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:"
1928 	    "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x:",
1929 	    resp.gid[0], resp.gid[1], resp.gid[2], resp.gid[3],
1930 	    resp.gid[4], resp.gid[5], resp.gid[6], resp.gid[7],
1931 	    resp.gid[8], resp.gid[9], resp.gid[10], resp.gid[11],
1932 	    resp.gid[12], resp.gid[13], resp.gid[14], resp.gid[15]);
1933 
1934 	ibt_free_portinfo(port_info, port_info_size);
1935 
1936 #ifdef	_LP64
1937 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
1938 #else
1939 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
1940 #endif
1941 	if (rc != 0) {
1942 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1943 		    "query_gid: copyout %d", rc);
1944 		rc = EFAULT;
1945 		goto out;
1946 	}
1947 
1948 	rc = DDI_SUCCESS;
1949 out:
1950 	return (rc);
1951 }
1952 
1953 /*
1954  * Function:
1955  *	sol_uverbs_query_pkey
1956  * Input:
1957  *	uctxt   - Pointer to the callers user context.
1958  *	buf     - Pointer to kernel buffer containing a query pkey command.
1959  *	in_len  - Length in bytes of input command buffer.
1960  *	out_len - Length in bytes of output response buffer.
1961  * Output:
1962  *	The command output buffer is updated with command results.
1963  * Returns:
1964  *	DDI_SUCCESS on success, else error code.
1965  * Description:
1966  * 	User verb entry point to query a device for the pkey at the specified
1967  *	port and pkey index.
1968  */
1969 /* ARGSUSED */
1970 int
1971 sol_uverbs_query_pkey(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
1972     int out_len)
1973 {
1974 	struct ib_uverbs_query_pkey		cmd;
1975 	struct ib_uverbs_query_pkey_resp	resp;
1976 	ibt_hca_portinfo_t			*port_info;
1977 	uint_t					port_info_n;
1978 	uint_t					port_info_size;
1979 	int					rc;
1980 
1981 	(void) memcpy(&cmd, buf, sizeof (cmd));
1982 
1983 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1984 	    "query_pkey: entry, port = %d, pkey index = %d",
1985 	    cmd.port_num, cmd.pkey_index);
1986 
1987 	if (!cmd.port_num || cmd.port_num > uctxt->hca->attr.hca_nports) {
1988 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
1989 		    "query_pkey: Invalid port specified");
1990 
1991 		rc = EINVAL;
1992 		goto out;
1993 	}
1994 
1995 	rc = ibt_query_hca_ports(uctxt->hca->hdl, cmd.port_num, &port_info,
1996 	    &port_info_n, &port_info_size);
1997 	if (rc != IBT_SUCCESS) {
1998 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
1999 		    "query_pkey: ibt_query_hca_ports() %d", rc);
2000 		rc = sol_uverbs_ibt_to_kernel_status(rc);
2001 		goto out;
2002 	}
2003 
2004 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2005 	    "query_pkey: port %d, requested index %d, number of pkey entries "
2006 	    "%d", cmd.port_num, cmd.pkey_index, port_info->p_pkey_tbl_sz);
2007 
2008 
2009 	if (cmd.pkey_index >= port_info->p_pkey_tbl_sz) {
2010 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2011 		    "query_pkey: Invalid index %d, table size = %d",
2012 		    cmd.pkey_index, port_info->p_pkey_tbl_sz);
2013 
2014 		ibt_free_portinfo(port_info, port_info_size);
2015 		rc = EINVAL;
2016 		goto out;
2017 	}
2018 
2019 	(void) memset(&resp, 0, sizeof (resp));
2020 	resp.pkey = port_info->p_pkey_tbl[cmd.pkey_index];
2021 
2022 	ibt_free_portinfo(port_info, port_info_size);
2023 
2024 #ifdef	_LP64
2025 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
2026 #else
2027 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
2028 #endif
2029 	if (rc != 0) {
2030 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2031 		    "query_pkey: copyout %d", rc);
2032 		rc = EFAULT;
2033 		goto out;
2034 	}
2035 
2036 	rc = DDI_SUCCESS;
2037 out:
2038 	return (rc);
2039 }
2040 
2041 /*
2042  * Function:
2043  *	sol_uverbs_reg_mr
2044  * Input:
2045  *	uctxt   - Pointer to the callers user context.
2046  *	buf     - Pointer to kernel buffer containing command.
2047  *	in_len  - Length in bytes of input command buffer.
2048  *	out_len - Length in bytes of output response buffer.
2049  * Output:
2050  *	The command output buffer is updated with command results.
2051  * Returns:
2052  *	DDI_SUCCESS on success, else error code.
2053  * Description:
2054  * 	User verb entry point to register a memory region.
2055  */
2056 /* ARGSUSED */
2057 int
2058 sol_uverbs_reg_mr(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
2059     int out_len)
2060 {
2061 	struct ib_uverbs_reg_mr		cmd;
2062 	struct ib_uverbs_reg_mr_resp	resp;
2063 	uverbs_upd_uobj_t		*upd;
2064 	uverbs_umr_uobj_t		*umr;
2065 	ibt_mr_attr_t			new_mem_attr;
2066 	ibt_mr_desc_t			new_mr_desc;
2067 	int				rc;
2068 
2069 	(void) memcpy(&cmd, buf, sizeof (cmd));
2070 	(void) memset(&resp, 0, sizeof (resp));
2071 	(void) memset(&new_mem_attr, 0, sizeof (new_mem_attr));
2072 	(void) memset(&new_mr_desc, 0, sizeof (new_mr_desc));
2073 
2074 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "reg_mr()");
2075 
2076 	new_mem_attr.mr_vaddr	= cmd.start;
2077 	new_mem_attr.mr_len	= cmd.length;
2078 	new_mem_attr.mr_as	= curproc->p_as;
2079 	new_mem_attr.mr_flags	= IBT_MR_NOSLEEP;
2080 
2081 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "reg_mr : "
2082 	    "mr_vaddr 0x%0lX, mr_len %d, mr_as %d, mr_flags %d",
2083 	    new_mem_attr.mr_vaddr, new_mem_attr.mr_len,
2084 	    new_mem_attr.mr_as, new_mem_attr.mr_flags);
2085 
2086 	if ((cmd.access_flags & IB_ACCESS_LOCAL_WRITE) ==
2087 	    IB_ACCESS_LOCAL_WRITE) {
2088 		new_mem_attr.mr_flags |= IBT_MR_ENABLE_LOCAL_WRITE;
2089 	}
2090 	if ((cmd.access_flags & IB_ACCESS_REMOTE_WRITE) ==
2091 	    IB_ACCESS_REMOTE_WRITE) {
2092 		new_mem_attr.mr_flags |= IBT_MR_ENABLE_REMOTE_WRITE;
2093 	}
2094 	if ((cmd.access_flags & IB_ACCESS_REMOTE_READ) ==
2095 	    IB_ACCESS_REMOTE_READ) {
2096 		new_mem_attr.mr_flags |= IBT_MR_ENABLE_REMOTE_READ;
2097 	}
2098 	if ((cmd.access_flags & IB_ACCESS_REMOTE_ATOMIC) ==
2099 	    IB_ACCESS_REMOTE_ATOMIC) {
2100 		new_mem_attr.mr_flags |= IBT_MR_ENABLE_REMOTE_ATOMIC;
2101 	}
2102 	if ((cmd.access_flags & IB_ACCESS_MW_BIND) == IB_ACCESS_MW_BIND) {
2103 		new_mem_attr.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND;
2104 	}
2105 	if ((cmd.access_flags & IB_ACCESS_SO) == IB_ACCESS_SO) {
2106 		new_mem_attr.mr_flags |= IBT_MR_DISABLE_RO;
2107 	}
2108 
2109 	umr = kmem_zalloc(sizeof (*umr), KM_NOSLEEP);
2110 	if (umr == NULL) {
2111 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2112 		    "reg_mr: User object mem allocation error");
2113 		rc = ENOMEM;
2114 		goto out;
2115 	}
2116 	sol_ofs_uobj_init(&umr->uobj, 0, SOL_UVERBS_UMR_UOBJ_TYPE);
2117 	rw_enter(&umr->uobj.uo_lock, RW_WRITER);
2118 
2119 	upd = uverbs_uobj_get_upd_read(cmd.pd_handle);
2120 	if (upd == NULL) {
2121 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2122 		    "reg_mr: PD invalid");
2123 		rc = EINVAL;
2124 		goto bad_pd;
2125 	}
2126 
2127 	rc = ibt_register_mr(uctxt->hca->hdl, upd->pd, &new_mem_attr, &umr->mr,
2128 	    &new_mr_desc);
2129 
2130 	if (rc != IBT_SUCCESS) {
2131 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2132 		    "reg_mr: ibt_register_mr() (rc=%d)", rc);
2133 		rc = sol_uverbs_ibt_to_kernel_status(rc);
2134 		umr->uobj.uo_uobj_sz = sizeof (uverbs_umr_uobj_t);
2135 		goto err_register;
2136 	}
2137 
2138 	if (sol_ofs_uobj_add(&uverbs_umr_uo_tbl, &umr->uobj) != 0) {
2139 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2140 		    "reg_mr: User object add failed");
2141 		rc = ENOMEM;
2142 		goto err_add_uobj;
2143 	}
2144 
2145 	resp.mr_handle  = umr->uobj.uo_id;
2146 	resp.lkey	= new_mr_desc.md_lkey;
2147 	resp.rkey	= new_mr_desc.md_rkey;
2148 
2149 #ifdef	_LP64
2150 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
2151 #else
2152 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
2153 #endif
2154 	if (rc != 0) {
2155 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2156 		    "reg_mr: Error writing resp data (rc=%d)", rc);
2157 		rc = EFAULT;
2158 		goto err_response;
2159 	}
2160 
2161 	mutex_enter(&uctxt->lock);
2162 	umr->list_entry  = add_genlist(&uctxt->mr_list, (uintptr_t)umr, uctxt);
2163 	mutex_exit(&uctxt->lock);
2164 
2165 	if (!umr->list_entry) {
2166 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2167 		    "reg_mr: Error adding umr to mr_list\n");
2168 		rc = ENOMEM;
2169 		goto err_response;
2170 	}
2171 
2172 	umr->uobj.uo_live = 1;
2173 	rw_exit(&umr->uobj.uo_lock);
2174 
2175 	sol_ofs_uobj_put(&upd->uobj);
2176 
2177 	return (DDI_SUCCESS);
2178 
2179 err_response:
2180 	/*
2181 	 * Need to set uo_live, so sol_ofs_uobj_remove() will
2182 	 * remove the object from the object table.
2183 	 */
2184 	umr->uobj.uo_live = 1;
2185 	(void) sol_ofs_uobj_remove(&uverbs_umr_uo_tbl, &umr->uobj);
2186 
2187 err_add_uobj:
2188 	(void) ibt_deregister_mr(uctxt->hca->hdl, umr->mr);
2189 
2190 err_register:
2191 	sol_ofs_uobj_put(&upd->uobj);
2192 
2193 bad_pd:
2194 	rw_exit(&umr->uobj.uo_lock);
2195 	sol_ofs_uobj_deref(&umr->uobj, sol_ofs_uobj_free);
2196 
2197 out:
2198 	return (rc);
2199 }
2200 
2201 /*
2202  * Function:
2203  *	sol_uverbs_dereg_mr
2204  * Input:
2205  *	uctxt   - Pointer to the callers user context.
2206  *	buf     - Pointer to kernel buffer containing command.
2207  *	in_len  - Length in bytes of input command buffer.
2208  *	out_len - Length in bytes of output response buffer.
2209  * Output:
2210  *	The command output buffer is updated with command results.
2211  * Returns:
2212  *	DDI_SUCCESS on success, else error code.
2213  * Description:
2214  * 	User verb entry point to de-register a memory region.
2215  */
2216 /* ARGSUSED */
2217 int
2218 sol_uverbs_dereg_mr(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
2219     int out_len)
2220 {
2221 	struct ib_uverbs_dereg_mr	cmd;
2222 	uverbs_umr_uobj_t		*umr;
2223 	int				rc;
2224 
2225 	(void) memcpy(&cmd, buf, sizeof (cmd));
2226 
2227 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2228 	    "dereg_mr(mr_handle=%d)", cmd.mr_handle);
2229 
2230 	umr = uverbs_uobj_get_umr_write(cmd.mr_handle);
2231 	if (umr == NULL) {
2232 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2233 		    "dereg_mr: Invalid handle");
2234 		rc = EINVAL;
2235 		goto err_out;
2236 	}
2237 
2238 	rc = ibt_deregister_mr(uctxt->hca->hdl, umr->mr);
2239 
2240 	if (rc != IBT_SUCCESS) {
2241 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2242 		    "dereg_mr: ibt_deregister_mr() (rc=%d)", rc);
2243 		rc = sol_uverbs_ibt_to_kernel_status(rc);
2244 		goto err_deregister;
2245 	}
2246 
2247 	/*
2248 	 * Remove from the list of this contexts MR resources, then remove from
2249 	 * the resource management table and the reference placed on the user
2250 	 * object at MR creation.
2251 	 */
2252 	mutex_enter(&uctxt->lock);
2253 	delete_genlist(&uctxt->mr_list, umr->list_entry);
2254 	mutex_exit(&uctxt->lock);
2255 
2256 	(void) sol_ofs_uobj_remove(&uverbs_umr_uo_tbl, &umr->uobj);
2257 
2258 	/*
2259 	 * Drop the lock and ref held by get_umr_write.
2260 	 */
2261 	sol_ofs_uobj_put(&umr->uobj);
2262 
2263 	sol_ofs_uobj_deref(&umr->uobj, sol_ofs_uobj_free);
2264 
2265 	cmd.mr_handle = 0;
2266 	return (DDI_SUCCESS);
2267 
2268 err_deregister:
2269 	/*
2270 	 * Drop the lock and ref held by get_umr_write.
2271 	 */
2272 	sol_ofs_uobj_put(&umr->uobj);
2273 
2274 err_out:
2275 	return (rc);
2276 }
2277 
2278 /*
2279  * Function:
2280  *	sol_uverbs_create_ah
2281  * Input:
2282  *	uctxt   - Pointer to the callers user context.
2283  *	buf     - Pointer to kernel buffer containing command.
2284  *	in_len  - Length in bytes of input command buffer.
2285  *	out_len - Length in bytes of output response buffer.
2286  * Output:
2287  *	The command output buffer is updated with command results.
2288  * Returns:
2289  *	DDI_SUCCESS on success, else error code.
2290  * Description:
2291  * 	User verb entry point to for devices that require kernel AH creation.
2292  */
2293 /* ARGSUSED */
2294 int
2295 sol_uverbs_create_ah(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
2296     int out_len)
2297 {
2298 	SOL_OFS_DPRINTF_L3(sol_uverbs_dbg_str,
2299 	    "create_ah: kernel user verb not implemented");
2300 	return (ENOTSUP);
2301 }
2302 
2303 /*
2304  * Function:
2305  *	sol_uverbs_destroy_ah
2306  * Input:
2307  *	uctxt   - Pointer to the callers user context.
2308  *	buf     - Pointer to kernel buffer containing command.
2309  *	in_len  - Length in bytes of input command buffer.
2310  *	out_len - Length in bytes of output response buffer.
2311  * Output:
2312  *	The command output buffer is updated with command results.
2313  * Returns:
2314  *	DDI_SUCCESS on success, else error code.
2315  * Description:
2316  * 	User verb entry point to for devices that require kernel AH deletion.
2317  */
2318 /* ARGSUSED */
2319 int
2320 sol_uverbs_destroy_ah(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
2321     int out_len)
2322 {
2323 	SOL_OFS_DPRINTF_L3(sol_uverbs_dbg_str,
2324 	    "destroy_ah: kernel user verb not implemented");
2325 	return (ENOTSUP);
2326 }
2327 
2328 /*
2329  * Function:
2330  *	sol_uverbs_create_comp_chan
2331  * Input:
2332  *	uctxt   - Pointer to the callers user context.
2333  *	buf     - Pointer to kernel buffer containing command.
2334  *	in_len  - Length in bytes of input command buffer.
2335  *	out_len - Length in bytes of output response buffer.
2336  * Output:
2337  *	The command output buffer is updated with command results.
2338  * Returns:
2339  *	DDI_SUCCESS on success, else error code.
2340  * Description:
2341  * 	User verb entry point to create a completion event channel.
2342  */
2343 int
2344 sol_uverbs_create_comp_channel(uverbs_uctxt_uobj_t *uctxt, char *buf,
2345     int in_len, int out_len)
2346 {
2347 	struct ib_uverbs_create_comp_channel		cmd;
2348 	struct ib_uverbs_create_comp_channel_resp	resp;
2349 	int						rc;
2350 	minor_t						compl_id;
2351 	uverbs_uctxt_uobj_t				*compl_uctxt;
2352 
2353 	(void) memcpy(&cmd, buf, sizeof (cmd));
2354 	(void) memset(&resp, 0, sizeof (resp));
2355 
2356 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2357 	    "create_comp_chan: entry, in_len=%d, out_len=%d",
2358 	    in_len, out_len);
2359 
2360 	/*
2361 	 * libibverbs will have passed minor of the compl file in
2362 	 * resp.fd. Use this to determine the uctxt created for
2363 	 * completions.
2364 	 */
2365 #ifdef	_LP64
2366 	rc = copyin((void*)cmd.response.r_laddr, (void*)&resp, sizeof (resp));
2367 #else
2368 	rc = copyin((void*)cmd.response.r_addr, (void*)&resp, sizeof (resp));
2369 #endif
2370 	if (rc != 0) {
2371 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2372 		    "create_comp: copyin (rc=%d)", rc);
2373 		rc = EFAULT;
2374 		return (rc);
2375 	}
2376 	compl_id = resp.fd;
2377 	if (compl_id < SOL_UVERBS_DRIVER_MAX_MINOR) {
2378 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2379 		    "create_comp: Invalid compl user context id %x",
2380 		    compl_id);
2381 		return (ENXIO);
2382 	}
2383 
2384 	compl_uctxt = uverbs_uobj_get_uctxt_read(compl_id -
2385 	    SOL_UVERBS_DRIVER_MAX_MINOR);
2386 	if (compl_uctxt == NULL) {
2387 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2388 		    "create_comp: Failed get compl user context");
2389 		return (ENXIO);
2390 	}
2391 	if (compl_uctxt->uctxt_type != SOL_UVERBS_UCTXT_EVENT ||
2392 	    compl_uctxt->uctxt_verbs_id != 0) {
2393 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2394 		    "create_comp_chan: Invalid user context - "
2395 		    "possibly reused");
2396 		return (ENXIO);
2397 	}
2398 	compl_uctxt->uctxt_type = SOL_UVERBS_UCTXT_COMPL;
2399 	compl_uctxt->uctxt_verbs_id = uctxt->uobj.uo_id +
2400 	    SOL_UVERBS_DRIVER_MAX_MINOR;
2401 	uctxt->uctxt_comp_id = compl_id;
2402 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "create_comp_chan: "
2403 	    "uctxt %p, compl_uctxt %p, compl_id %x", uctxt,
2404 	    compl_uctxt, compl_id);
2405 
2406 	/*
2407 	 * Allocate an event file to be used for completion
2408 	 * event notification.
2409 	 */
2410 	compl_uctxt->comp_evfile = uverbs_alloc_event_file(uctxt, 0);
2411 	if (compl_uctxt->comp_evfile == NULL) {
2412 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2413 		    "create_comp_chan: Event file alloc error");
2414 		rc = EINVAL;
2415 		sol_ofs_uobj_put(&compl_uctxt->uobj);
2416 		return (rc);
2417 	}
2418 
2419 	/*
2420 	 * Place an extra reference on the compl event file.  These will
2421 	 * be used to handle the natural race of between the closing of
2422 	 * the compl event file and uverbs device file that can occur.
2423 	 */
2424 	sol_ofs_uobj_ref(&compl_uctxt->comp_evfile->uobj);
2425 
2426 	sol_ofs_uobj_put(&compl_uctxt->uobj);
2427 
2428 #ifdef	_LP64
2429 	rc = copyout((void*)&resp, (void*)cmd.response.r_laddr, sizeof (resp));
2430 #else
2431 	rc = copyout((void*)&resp, (void*)cmd.response.r_addr, sizeof (resp));
2432 #endif
2433 	if (rc != 0) {
2434 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2435 		    "create_comp_chan: copyout %d", rc);
2436 		rc = EFAULT;
2437 		return (rc);
2438 	}
2439 
2440 	return (0);
2441 }
2442 
2443 /*
2444  * Function:
2445  *	sol_uverbs_dummy_command
2446  * Input:
2447  *	uctxt   - Pointer to the callers user context.
2448  *	buf     - Pointer to kernel buffer containing command.
2449  *	in_len  - Length in bytes of input command buffer.
2450  *	out_len - Length in bytes of output response buffer.
2451  * Output:
2452  *	The command output buffer is updated with command results.
2453  * Returns:
2454  *	DDI_SUCCESS on success, else error code.
2455  * Description:
2456  * 	User verb generic place holder stub.
2457  */
2458 /* ARGSUSED */
2459 int
2460 sol_uverbs_dummy_command(uverbs_uctxt_uobj_t *uctxt, char *buf, int in_len,
2461     int out_len)
2462 {
2463 	SOL_OFS_DPRINTF_L4(sol_uverbs_dbg_str,
2464 	    "sol_uverbs_dummy_command invoked");
2465 
2466 	return (0);
2467 }
2468 
2469 /*
2470  * Function:
2471  *	sol_uverbs_write
2472  * Input:
2473  *	dev	- Device number.
2474  *	uiop	- Pointer to the uio structure that describes the data (i.e.
2475  *                Solaris User Verbs command).
2476  *	credp	- A pointer to the user credentials for the I/O transaction.
2477  * Output:
2478  *	uiop	-
2479  * Returns:
2480  *	DDI_SUCCESS on success, else error code.
2481  * Description:
2482  * 	User verb write entry point.  A user deivce libraries use this
2483  *	entry point to execute a kernel agent user verbs call.  During
2484  *	the course of the call the user process will hold a read reference
2485  *	to the associated user context.
2486  */
2487 #define	SOL_UVERBS_MAX_CMD_PAYLOAD    512
2488 /* ARGSUSED */
2489 static int
2490 sol_uverbs_write(dev_t dev, struct uio *uiop, cred_t *credp)
2491 {
2492 	uverbs_uctxt_uobj_t		*uctxt;
2493 	size_t				len = uiop->uio_resid;
2494 	int				rc;
2495 	struct ib_uverbs_cmd_hdr	hdr;
2496 	char				payload[SOL_UVERBS_MAX_CMD_PAYLOAD];
2497 	minor_t				id = getminor(dev);
2498 
2499 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2500 	    "uverbs_write: entry (len=%d)", len);
2501 
2502 	ASSERT(id >= SOL_UVERBS_DRIVER_MAX_MINOR);
2503 
2504 	uctxt = uverbs_uobj_get_uctxt_read(id - SOL_UVERBS_DRIVER_MAX_MINOR);
2505 	if (uctxt == NULL) {
2506 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2507 		    "uverbs_write: Failed get user context");
2508 		return (ENXIO);
2509 	}
2510 
2511 	if (uctxt->uctxt_type != SOL_UVERBS_UCTXT_VERBS) {
2512 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2513 		    "uverbs_write: write() on invalid uctxt type %x",
2514 		    uctxt->uctxt_type);
2515 		rc = ENXIO;
2516 		goto out;
2517 	}
2518 
2519 	if (len < sizeof (hdr)) {
2520 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2521 		    "uverbs_write: Header too small");
2522 		rc =  EINVAL;
2523 		goto out;
2524 	}
2525 
2526 	hdr.command	= -1;
2527 	hdr.in_words	= 0;
2528 	hdr.out_words	= 0;
2529 
2530 	if (uiomove(&hdr, sizeof (hdr), UIO_WRITE, uiop) != 0) {
2531 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2532 		    "uverbs_write: Error reading header");
2533 		rc = EFAULT;
2534 		goto out;
2535 	}
2536 
2537 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2538 	    "uverbs_write:  hdr.command   = %d", hdr.command);
2539 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2540 	    "uverbs_write:  hdr.command   = %d", hdr.command);
2541 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2542 	    "uverbs_write:  hdr.in_words  = %d", hdr.in_words);
2543 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2544 	    "uverbs_write:  hdr.out_words = %d", hdr.out_words);
2545 
2546 	if (hdr.in_words * 4 != len) {
2547 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2548 		    "uverbs_write: Invalid header size");
2549 		rc = EINVAL;
2550 		goto out;
2551 	}
2552 
2553 	if (hdr.command >=
2554 	    sizeof (uverbs_cmd_table)/sizeof (uverbs_cmd_table[0]) ||
2555 	    !uverbs_cmd_table[hdr.command]) {
2556 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2557 		    "uverbs_write: Invalid command (%d)", hdr.command);
2558 		rc = EINVAL;
2559 		goto out;
2560 	}
2561 
2562 	ASSERT(len <= SOL_UVERBS_MAX_CMD_PAYLOAD);
2563 
2564 	if (uiomove(&payload, len, UIO_WRITE, uiop) != 0) {
2565 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2566 		    "uverbs_write: Error reading payload");
2567 		rc = EFAULT;
2568 		goto out;
2569 	}
2570 
2571 #ifdef DEBUG
2572 	unsigned int	*payload_int = (unsigned int *)payload;
2573 
2574 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2575 	    "payload:   %08x,    %08x,    %08x,    %08x",
2576 	    payload_int[0], payload_int[1],
2577 	    payload_int[2], payload_int[3]);
2578 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2579 	    "payload:   %08x,    %08x,    %08x,    %08x",
2580 	    payload_int[4], payload_int[5],
2581 	    payload_int[6], payload_int[7]);
2582 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2583 	    "payload:   %08x,    %08x,    %08x,    %08x",
2584 	    payload_int[8], payload_int[9],
2585 	    payload_int[10], payload_int[11]);
2586 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2587 	    "payload:   %08x,    %08x,    %08x",
2588 	    payload_int[12], payload_int[13], payload_int[14]);
2589 #endif
2590 
2591 	rc = uverbs_cmd_table[hdr.command](uctxt, &payload[0], hdr.in_words * 4,
2592 	    hdr.out_words * 4);
2593 
2594 out:
2595 	sol_ofs_uobj_put(&uctxt->uobj);
2596 
2597 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2598 	    "uverbs_write: rc = %d", rc);
2599 
2600 	return (rc);
2601 }
2602 
2603 static int
2604 sol_uverbs_poll(dev_t dev, short events, int anyyet,
2605     short *reventsp, struct pollhead **phpp)
2606 {
2607 	minor_t			id = getminor(dev);
2608 	uverbs_uctxt_uobj_t	*uctxt, *verbs_uctxt;
2609 	int			rc;
2610 
2611 #ifdef DEBUG
2612 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_poll(%p, %x, %x, "
2613 	    "%p, %p)", dev, events, anyyet, reventsp, phpp);
2614 #endif
2615 
2616 	ASSERT(id >= SOL_UVERBS_DRIVER_MAX_MINOR);
2617 
2618 	uctxt = uverbs_uobj_get_uctxt_read(id - SOL_UVERBS_DRIVER_MAX_MINOR);
2619 	if (uctxt == NULL) {
2620 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2621 		    "uverbs_poll: Failed get user context");
2622 		return (ENXIO);
2623 	}
2624 
2625 	if (uctxt->uctxt_verbs_id < SOL_UVERBS_DRIVER_MAX_MINOR) {
2626 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2627 		    "uverbs_poll: Invalid Verbs user context id, %x",
2628 		    uctxt->uctxt_verbs_id);
2629 		sol_ofs_uobj_put(&uctxt->uobj);
2630 		return (ENXIO);
2631 	}
2632 	verbs_uctxt = uverbs_uobj_get_uctxt_read(uctxt->uctxt_verbs_id
2633 	    - SOL_UVERBS_DRIVER_MAX_MINOR);
2634 	if (verbs_uctxt == NULL) {
2635 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2636 		    "uverbs_poll: Failed get verbs user context");
2637 		sol_ofs_uobj_put(&uctxt->uobj);
2638 		return (ENXIO);
2639 	}
2640 	if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_ASYNC) {
2641 		ASSERT(verbs_uctxt->async_evfile);
2642 		rc = sol_uverbs_event_file_poll(verbs_uctxt->async_evfile,
2643 		    events, anyyet, reventsp, phpp);
2644 	} else if (uctxt->uctxt_type == SOL_UVERBS_UCTXT_COMPL) {
2645 		ASSERT(uctxt->comp_evfile);
2646 		rc = sol_uverbs_event_file_poll(uctxt->comp_evfile,
2647 		    events, anyyet, reventsp, phpp);
2648 	} else {
2649 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2650 		    "uverbs_poll: poll user context type %d",
2651 		    uctxt->uctxt_type);
2652 		rc = ENXIO;
2653 	}
2654 
2655 	sol_ofs_uobj_put(&verbs_uctxt->uobj);
2656 	sol_ofs_uobj_put(&uctxt->uobj);
2657 	return (rc);
2658 }
2659 
2660 /*
2661  * Function:
2662  *	sol_uverbs_alloc_uctxt
2663  * Input:
2664  *	devp	 - A pointer to the device number associated with the open.
2665  *	mod_ctxt - A pointer to the drivers module context.
2666  *	minor    - The minor device number.
2667  * Output:
2668  *	None.
2669  * Returns:
2670  *	On success a new user context user resource object associated with
2671  *	the device passed via devp. NULL on error.
2672  * Description:
2673  * 	Allocate a new user context user resource object and initialize it.
2674  *	The users asynchronous event file is created as part of this. On
2675  *	successful allocation, the user context is returned with the
2676  *	associated write lock enabled.
2677  */
2678 static uverbs_uctxt_uobj_t *
2679 sol_uverbs_alloc_uctxt(dev_t *devp, uverbs_module_context_t *mod_ctxt,
2680     minor_t minor)
2681 {
2682 	uverbs_uctxt_uobj_t *uctxt = NULL;
2683 
2684 	uctxt = kmem_zalloc(sizeof (uverbs_uctxt_uobj_t), KM_SLEEP);
2685 	ASSERT(uctxt != NULL);
2686 	sol_ofs_uobj_init(&uctxt->uobj, 0, SOL_UVERBS_UCTXT_UOBJ_TYPE);
2687 	rw_enter(&uctxt->uobj.uo_lock, RW_WRITER);
2688 	if (sol_ofs_uobj_add(&uverbs_uctxt_uo_tbl, &uctxt->uobj) != 0) {
2689 		/*
2690 		 * The initialization routine set's the initial reference,
2691 		 * we dereference the object here to clean it up.
2692 		 */
2693 		SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2694 		    "alloc_uctxt: Object add failed");
2695 		rw_exit(&uctxt->uobj.uo_lock);
2696 		sol_ofs_uobj_free(&uctxt->uobj);
2697 		return (NULL);
2698 	}
2699 
2700 	/*
2701 	 * Create the new clone for this user context using the
2702 	 * object id as the minor number.   Note we offset beyond all
2703 	 * real minor device numbers.
2704 	 */
2705 	*devp = makedevice(getmajor(*devp),
2706 	    uctxt->uobj.uo_id + SOL_UVERBS_DRIVER_MAX_MINOR);
2707 
2708 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str, "uverbs_open : "
2709 	    "uctxt %p, minor %x- alloced", uctxt,
2710 	    uctxt->uobj.uo_id + SOL_UVERBS_DRIVER_MAX_MINOR);
2711 
2712 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2713 	    "alloc_uctxt: user context allocated: %p, ref = %d",
2714 	    (void *)uctxt, uctxt->uobj.uo_refcnt);
2715 
2716 	mutex_init(&uctxt->lock, NULL, MUTEX_DRIVER, NULL);
2717 	uctxt->mod_ctxt	= mod_ctxt;
2718 	if (minor == SOL_UVERBS_DRIVER_EVENT_MINOR) {
2719 		uctxt->uctxt_type = SOL_UVERBS_UCTXT_EVENT;
2720 	} else {
2721 		uctxt->uctxt_type = SOL_UVERBS_UCTXT_VERBS;
2722 		uctxt->hca = &mod_ctxt->hcas[minor];
2723 	}
2724 
2725 	init_genlist(&uctxt->pd_list);
2726 	init_genlist(&uctxt->mr_list);
2727 	init_genlist(&uctxt->cq_list);
2728 	init_genlist(&uctxt->srq_list);
2729 	init_genlist(&uctxt->qp_list);
2730 	init_genlist(&uctxt->ah_list);
2731 
2732 	/* Return with uobj uo_lock held for WRITTER. */
2733 	return (uctxt);
2734 }
2735 
2736 /*
2737  * Function:
2738  *	sol_uverbs_qpnum2uqpid
2739  * Input:
2740  *	qp_num	- used to find the user object that mapped to this qp_num
2741  * Output:
2742  *	None
2743  * Returns:
2744  *	DDI_FAILURE if not found else
2745  *	the uo_id in the user object that matches the qp_num
2746  * Description:
2747  * 	Find the uo_id of the user object which mapped to the input qp_num
2748  */
2749 uint32_t
2750 sol_uverbs_qpnum2uqpid(uint32_t qp_num)
2751 {
2752 	sol_ofs_uobj_table_t	*uo_tbl;
2753 	sol_ofs_uobj_t		*uobj;
2754 	uverbs_uqp_uobj_t	*uqp;
2755 	int			i, j;
2756 	sol_ofs_uobj_blk_t	*blk;
2757 
2758 	uo_tbl = &uverbs_uqp_uo_tbl;
2759 	rw_enter(&uo_tbl->uobj_tbl_lock, RW_READER);
2760 
2761 	/*
2762 	 * Try to find an empty slot for the new user object.
2763 	 */
2764 	for (i = 0; i < uo_tbl->uobj_tbl_used_blks; i++) {
2765 		blk = uo_tbl->uobj_tbl_uo_root[i];
2766 		if (blk != NULL) {
2767 			for (j = 0; j < SOL_OFS_UO_BLKSZ; j++) {
2768 				if ((uobj = blk->ofs_uoblk_blks[j]) != NULL) {
2769 					uqp = (uverbs_uqp_uobj_t *)uobj;
2770 					if (uqp->qp_num == qp_num) {
2771 						rw_exit(&uo_tbl->uobj_tbl_lock);
2772 						SOL_OFS_DPRINTF_L5(
2773 						    sol_uverbs_dbg_str,
2774 						    "qpnum2uqpid(%x) ret %x",
2775 						    qp_num, uobj->uo_id);
2776 						return (uobj->uo_id);
2777 					}
2778 				}
2779 			}
2780 		}
2781 	}
2782 
2783 	rw_exit(&uo_tbl->uobj_tbl_lock);
2784 	SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str, "qpnum2uqpid(%x) ret %x",
2785 	    qp_num, DDI_FAILURE);
2786 	return (DDI_FAILURE);
2787 }
2788 
2789 void
2790 sol_uverbs_get_clnt_hdl(void **ibclnt_hdl, void **iwclnt_hdl)
2791 {
2792 	*ibclnt_hdl = sol_uverbs_ib_clntp;
2793 	*iwclnt_hdl = NULL;
2794 }
2795 
2796 void *
2797 sol_uverbs_qpnum2qphdl(uint32_t qpnum)
2798 {
2799 	int32_t	uqpid;
2800 
2801 	uqpid = sol_uverbs_qpnum2uqpid(qpnum);
2802 	if (uqpid == DDI_FAILURE)
2803 		return (NULL);
2804 	return (sol_uverbs_uqpid_to_ibt_handle(uqpid));
2805 }
2806 
2807 int
2808 sol_uverbs_disable_uqpn_modify(uint32_t qpnum)
2809 {
2810 	int32_t	uqpid;
2811 
2812 	uqpid = sol_uverbs_qpnum2uqpid(qpnum);
2813 	if (uqpid == DDI_FAILURE)
2814 		return (-1);
2815 
2816 	return (sol_uverbs_disable_user_qp_modify(uqpid));
2817 }
2818 
2819 extern int uverbs_uqpn_cq_ctrl(uint32_t, sol_uverbs_cq_ctrl_t);
2820 
2821 int
2822 sol_uverbs_uqpn_cq_ctrl(uint32_t qpnum, sol_uverbs_cq_ctrl_t ctrl)
2823 {
2824 	int32_t	uqpid;
2825 
2826 	uqpid = sol_uverbs_qpnum2uqpid(qpnum);
2827 	if (uqpid == DDI_FAILURE)
2828 		return (-1);
2829 
2830 	return (uverbs_uqpn_cq_ctrl(uqpid, ctrl));
2831 }
2832 
2833 void
2834 sol_uverbs_set_qp_free_state(sol_uverbs_qp_free_state_t qp_free_state,
2835     uint32_t qpnum, void *qphdl)
2836 {
2837 	int32_t			uqpid;
2838 	uverbs_uqp_uobj_t	*uqp;
2839 
2840 	SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2841 	    "sol_uverbs_set_qp_free_state(%x, %x, %p)",
2842 	    qp_free_state, qpnum, qphdl);
2843 	if (qp_free_state == SOL_UVERBS2UCMA_DISABLE_QP_FREE) {
2844 		uqpid = sol_uverbs_qpnum2uqpid(qpnum);
2845 		if (uqpid == DDI_FAILURE) {
2846 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2847 			    "set_qp_free_state(%d)-invalid qpnum",
2848 			    qpnum);
2849 			return;
2850 		}
2851 
2852 		uqp = uverbs_uobj_get_uqp_write(uqpid);
2853 		if (uqp == NULL) {
2854 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2855 			    "set_qp_free_state(%d)-uqp lookup failure", qpnum);
2856 			return;
2857 		}
2858 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2859 		    "set_qp_free_state : uqp %p, setting Disable QP Free", uqp);
2860 		uqp->uqp_free_state  = SOL_UVERBS2UCMA_DISABLE_QP_FREE;
2861 		sol_ofs_uobj_put(&uqp->uobj);
2862 		return;
2863 	}
2864 
2865 	ASSERT(qphdl);
2866 	uqp = (uverbs_uqp_uobj_t *)ibt_get_qp_private((ibt_qp_hdl_t)qphdl);
2867 	ASSERT(uqp);
2868 	if (uqp->uqp_free_state != SOL_UVERBS2UCMA_FREE_PENDING) {
2869 		/*
2870 		 * Enable free flag, so that close or userland free_qp
2871 		 * call can free this in the future.
2872 		 */
2873 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2874 		    "set_qp_free_state : uqp %p, setting Enable QP Free",
2875 		    uqp);
2876 		rw_enter(&(uqp->uobj.uo_lock), RW_WRITER);
2877 		uqp->uqp_free_state = SOL_UVERBS2UCMA_ENABLE_QP_FREE;
2878 		rw_exit(&(uqp->uobj.uo_lock));
2879 	} else {
2880 		/*
2881 		 * uqp_free_state is set to FREE_PENDING, QP has been freed
2882 		 * by userland. Call uverbs_uqp_free() to free this.
2883 		 */
2884 		SOL_OFS_DPRINTF_L5(sol_uverbs_dbg_str,
2885 		    "set_qp_free_state : uqp %p calling uverbs_uqp_free()",
2886 		    uqp);
2887 		rw_enter(&(uqp->uobj.uo_lock), RW_WRITER);
2888 		sol_ofs_uobj_ref(&uqp->uobj);
2889 		if (uverbs_uqp_free(uqp, uqp->uctxt))
2890 			SOL_OFS_DPRINTF_L2(sol_uverbs_dbg_str,
2891 			    "set_qp_free_state : uverbs_uqp_free(%p) failed",
2892 			    uqp);
2893 	}
2894 }
2895 
2896 /*
2897  * Function:
2898  *	sol_uverbs_user_objects_init
2899  * Input:
2900  *	None
2901  * Output:
2902  *	None
2903  * Returns:
2904  *	None
2905  * Description:
2906  * 	Initializes all of the user object resource managment tables.
2907  */
2908 static void sol_uverbs_user_objects_init()
2909 {
2910 	sol_ofs_uobj_tbl_init(&uverbs_uctxt_uo_tbl,
2911 	    sizeof (uverbs_uctxt_uobj_t));
2912 	sol_ofs_uobj_tbl_init(&uverbs_upd_uo_tbl,
2913 	    sizeof (uverbs_upd_uobj_t));
2914 	sol_ofs_uobj_tbl_init(&uverbs_umr_uo_tbl,
2915 	    sizeof (uverbs_umr_uobj_t));
2916 	sol_ofs_uobj_tbl_init(&uverbs_ucq_uo_tbl,
2917 	    sizeof (uverbs_ucq_uobj_t));
2918 	sol_ofs_uobj_tbl_init(&uverbs_usrq_uo_tbl,
2919 	    sizeof (uverbs_usrq_uobj_t));
2920 	sol_ofs_uobj_tbl_init(&uverbs_uqp_uo_tbl,
2921 	    sizeof (uverbs_uqp_uobj_t));
2922 	sol_ofs_uobj_tbl_init(&uverbs_uah_uo_tbl,
2923 	    sizeof (uverbs_uah_uobj_t));
2924 	sol_ofs_uobj_tbl_init(&uverbs_ufile_uo_tbl,
2925 	    sizeof (uverbs_ufile_uobj_t));
2926 }
2927 
2928 /*
2929  * Function:
2930  *	sol_uverbs_user_objects_fini
2931  * Input:
2932  *	None
2933  * Output:
2934  *	None
2935  * Returns:
2936  *	None
2937  * Description:
2938  * 	Releases all of the user object resource managment tables.
2939  */
2940 static void sol_uverbs_user_objects_fini()
2941 {
2942 	sol_ofs_uobj_tbl_fini(&uverbs_ufile_uo_tbl);
2943 	sol_ofs_uobj_tbl_fini(&uverbs_uah_uo_tbl);
2944 	sol_ofs_uobj_tbl_fini(&uverbs_uqp_uo_tbl);
2945 	sol_ofs_uobj_tbl_fini(&uverbs_usrq_uo_tbl);
2946 	sol_ofs_uobj_tbl_fini(&uverbs_ucq_uo_tbl);
2947 	sol_ofs_uobj_tbl_fini(&uverbs_umr_uo_tbl);
2948 	sol_ofs_uobj_tbl_fini(&uverbs_upd_uo_tbl);
2949 	sol_ofs_uobj_tbl_fini(&uverbs_uctxt_uo_tbl);
2950 }
2951 
2952 /*
2953  * Function:
2954  *	sol_uverbs_ibt_to_kernel_status
2955  * Input:
2956  *	status	- An IBT status code.
2957  * Output:
2958  *	None
2959  * Returns:
2960  *	The "errno" based kernel error code the IBT status maps to.
2961  * Description:
2962  * 	Map an IBT status to the "errno" code that should be returned.
2963  */
2964 int
2965 sol_uverbs_ibt_to_kernel_status(ibt_status_t status)
2966 {
2967 	int err;
2968 
2969 	switch (status) {
2970 		case IBT_NOT_SUPPORTED:
2971 			err = ENOTSUP;
2972 			break;
2973 
2974 		case IBT_ILLEGAL_OP:
2975 		case IBT_INVALID_PARAM:
2976 			err = EINVAL;
2977 			break;
2978 
2979 		case IBT_HCA_IN_USE:
2980 		case IBT_HCA_BUSY_DETACHING:
2981 		case IBT_HCA_BUSY_CLOSING:
2982 		case IBT_CHAN_IN_USE:
2983 		case IBT_CQ_BUSY:
2984 		case IBT_MR_IN_USE:
2985 		case IBT_PD_IN_USE:
2986 		case IBT_SRQ_IN_USE:
2987 			err = EBUSY;
2988 			break;
2989 		case	IBT_INSUFF_RESOURCE:
2990 		case	IBT_INSUFF_KERNEL_RESOURCE:
2991 		case	IBT_HCA_WR_EXCEEDED:
2992 		case	IBT_HCA_SGL_EXCEEDED:
2993 			err = ENOMEM;
2994 			break;
2995 
2996 		default:
2997 			err = EINVAL;
2998 	}
2999 	return (err);
3000 }
3001 
3002 /* ARGSUSED */
3003 uint32_t
3004 sol_uverbs_ibt_to_of_device_cap_flags(ibt_hca_flags_t flags,
3005     ibt_hca_flags2_t flags2) {
3006 
3007 	uint32_t of_flags = 0;
3008 
3009 	if (flags && IBT_HCA_RESIZE_CHAN)
3010 		of_flags |= IB_DEVICE_RESIZE_MAX_WR;
3011 
3012 	if (flags && IBT_HCA_PKEY_CNTR)
3013 		of_flags |= IB_DEVICE_BAD_PKEY_CNTR;
3014 
3015 	if (flags && IBT_HCA_QKEY_CNTR)
3016 		of_flags |= IB_DEVICE_BAD_QKEY_CNTR;
3017 
3018 	if (flags && IBT_HCA_RAW_MULTICAST)
3019 		of_flags |= IB_DEVICE_RAW_MULTI;
3020 
3021 	if (flags && IBT_HCA_AUTO_PATH_MIG)
3022 		of_flags |= IB_DEVICE_AUTO_PATH_MIG;
3023 
3024 	if (flags && IBT_HCA_SQD_SQD_PORT)
3025 		of_flags |= IB_DEVICE_CHANGE_PHY_PORT;
3026 
3027 	if (flags && IBT_HCA_AH_PORT_CHECK)
3028 		of_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
3029 
3030 	if (flags && IBT_HCA_CURRENT_QP_STATE)
3031 		of_flags |= IB_DEVICE_CURR_QP_STATE_MOD;
3032 
3033 	if (flags && IBT_HCA_SHUTDOWN_PORT)
3034 		of_flags |= IB_DEVICE_SHUTDOWN_PORT;
3035 
3036 	if (flags && IBT_HCA_INIT_TYPE)
3037 		of_flags |= IB_DEVICE_INIT_TYPE;
3038 
3039 	if (flags && IBT_HCA_PORT_UP)
3040 		of_flags |= IB_DEVICE_PORT_ACTIVE_EVENT;
3041 
3042 	if (flags && IBT_HCA_SI_GUID)
3043 		of_flags |= IB_DEVICE_SYS_IMAGE_GUID;
3044 
3045 	if (flags && IBT_HCA_RNR_NAK)
3046 		of_flags |= IB_DEVICE_RC_RNR_NAK_GEN;
3047 
3048 	if (flags && IBT_HCA_RESIZE_SRQ)
3049 		of_flags |= IB_DEVICE_SRQ_RESIZE;
3050 
3051 	if (flags && IBT_HCA_BASE_QUEUE_MGT)
3052 		of_flags |= IB_DEVICE_N_NOTIFY_CQ;
3053 
3054 	if (flags && IBT_HCA_ZERO_BASED_VA)
3055 		of_flags |= IB_DEVICE_ZERO_STAG;
3056 
3057 	if (flags && IBT_HCA_LOCAL_INVAL_FENCE)
3058 		of_flags |= IB_DEVICE_SEND_W_INV;
3059 
3060 	if (flags && IBT_HCA_MEM_WIN_TYPE_2B)
3061 		of_flags |= IB_DEVICE_MEM_WINDOW;
3062 
3063 	return (of_flags);
3064 }
3065 
3066 uint64_t
3067 sol_uverbs_ibt_to_of_page_sz(ibt_page_sizes_t page_szs)
3068 {
3069 
3070 	uint64_t of_page_sz = 0;
3071 
3072 	if (page_szs && IBT_PAGE_4K)
3073 		of_page_sz |= 1LL << 12;
3074 
3075 	if (page_szs && IBT_PAGE_8K)
3076 		of_page_sz |= 1LL << 13;
3077 
3078 	if (page_szs && IBT_PAGE_16K)
3079 		of_page_sz |= 1LL << 14;
3080 
3081 	if (page_szs && IBT_PAGE_32K)
3082 		of_page_sz |= 1LL << 15;
3083 
3084 	if (page_szs && IBT_PAGE_64K)
3085 		of_page_sz |= 1LL << 16;
3086 
3087 	if (page_szs && IBT_PAGE_128K)
3088 		of_page_sz |= 1LL << 17;
3089 
3090 	if (page_szs && IBT_PAGE_256K)
3091 		of_page_sz |= 1LL << 18;
3092 
3093 	if (page_szs && IBT_PAGE_512K)
3094 		of_page_sz |= 1LL << 19;
3095 
3096 	if (page_szs && IBT_PAGE_1M)
3097 		of_page_sz |= 1LL << 20;
3098 
3099 	if (page_szs && IBT_PAGE_2M)
3100 		of_page_sz |= 1LL << 21;
3101 
3102 	if (page_szs && IBT_PAGE_4M)
3103 		of_page_sz |= 1LL << 22;
3104 
3105 	if (page_szs && IBT_PAGE_8M)
3106 		of_page_sz |= 1LL << 23;
3107 
3108 	if (page_szs && IBT_PAGE_16M)
3109 		of_page_sz |= 1LL << 24;
3110 
3111 	if (page_szs && IBT_PAGE_32M)
3112 		of_page_sz |= 1LL << 25;
3113 
3114 	if (page_szs && IBT_PAGE_64M)
3115 		of_page_sz |= 1LL << 26;
3116 
3117 	if (page_szs && IBT_PAGE_128M)
3118 		of_page_sz |= 1LL << 27;
3119 
3120 	if (page_szs && IBT_PAGE_256M)
3121 		of_page_sz |= 1LL << 28;
3122 
3123 	if (page_szs && IBT_PAGE_512M)
3124 		of_page_sz |= 1LL << 29;
3125 
3126 	if (page_szs && IBT_PAGE_1G)
3127 		of_page_sz |= 1LL << 30;
3128 
3129 	if (page_szs && IBT_PAGE_2G)
3130 		of_page_sz |= 1LL << 31;
3131 
3132 	if (page_szs && IBT_PAGE_4G)
3133 		of_page_sz |= 1LL << 32;
3134 
3135 	if (page_szs && IBT_PAGE_8G)
3136 		of_page_sz |= 1LL << 33;
3137 
3138 	if (page_szs && IBT_PAGE_16G)
3139 		of_page_sz |= 1LL << 34;
3140 
3141 	return (of_page_sz);
3142 }
3143