xref: /titanic_44/usr/src/uts/common/io/ib/clients/daplt/daplt.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * UDAPL kernel agent
27  */
28 
29 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/debug.h>
32 #include <sys/stropts.h>
33 #include <sys/stream.h>
34 #include <sys/strlog.h>
35 #include <sys/cmn_err.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/stat.h>
39 #include <sys/modctl.h>
40 #include <sys/kstat.h>
41 #include <sys/ddi.h>
42 #include <sys/sunddi.h>
43 #include <sys/strsun.h>
44 #include <sys/taskq.h>
45 #include <sys/open.h>
46 #include <sys/uio.h>
47 #include <sys/cpuvar.h>
48 #include <sys/atomic.h>
49 #include <sys/sysmacros.h>
50 #include <sys/esunddi.h>
51 #include <sys/avl.h>
52 #include <sys/cred.h>
53 #include <sys/note.h>
54 #include <sys/ib/ibtl/ibti.h>
55 #include <sys/socket.h>
56 #include <netinet/in.h>
57 #include <daplt_if.h>
58 #include <daplt.h>
59 
60 /*
61  * The following variables support the debug log buffer scheme.
62  */
63 #ifdef	DEBUG
64 static char daplka_dbgbuf[0x80000];
65 #else /* DEBUG */
66 static char daplka_dbgbuf[0x4000];
67 #endif /* DEBUG */
68 static int daplka_dbgsize = sizeof (daplka_dbgbuf);
69 static size_t daplka_dbgnext;
70 static int daplka_dbginit = 0;
71 static kmutex_t daplka_dbglock;
72 _NOTE(MUTEX_PROTECTS_DATA(daplka_dbglock,
73     daplka_dbgbuf
74     daplka_dbgnext))
75 
76 static int daplka_dbg = 0x0103;
77 static void daplka_console(const char *, ...);
78 static void daplka_debug(const char *, ...);
79 static int daplka_apm = 0x1;			/* default enable */
80 static int daplka_failback = 0x1;		/* default enable */
81 static int daplka_query_aft_setaltpath = 10;
82 
83 #define	DERR				\
84 	if (daplka_dbg & 0x100) 	\
85 	    daplka_debug
86 
87 #ifdef DEBUG
88 
89 #define	DINFO				\
90 	daplka_console
91 
92 #define	D1				\
93 	if (daplka_dbg & 0x01)		\
94 	    daplka_debug
95 #define	D2				\
96 	if (daplka_dbg & 0x02) 		\
97 	    daplka_debug
98 #define	D3				\
99 	if (daplka_dbg & 0x04) 		\
100 	    daplka_debug
101 #define	D4				\
102 	if (daplka_dbg & 0x08) 		\
103 	    daplka_debug
104 
105 #else /* DEBUG */
106 
107 #define	DINFO	if (0) printf
108 #define	D1	if (0) printf
109 #define	D2	if (0) printf
110 #define	D3	if (0) printf
111 #define	D4	if (0) printf
112 
113 #endif /* DEBUG */
114 
115 /*
116  * driver entry points
117  */
118 static int daplka_open(dev_t *, int, int, struct cred *);
119 static int daplka_close(dev_t, int, int, struct cred *);
120 static int daplka_attach(dev_info_t *, ddi_attach_cmd_t);
121 static int daplka_detach(dev_info_t *, ddi_detach_cmd_t);
122 static int daplka_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
123 static int daplka_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
124 
125 /*
126  * types of ioctls
127  */
128 static int daplka_common_ioctl(int, minor_t, intptr_t, int, cred_t *, int *);
129 static int daplka_misc_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
130     cred_t *, int *);
131 static int daplka_ep_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
132     cred_t *, int *);
133 static int daplka_evd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
134     cred_t *, int *);
135 static int daplka_mr_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
136     cred_t *, int *);
137 static int daplka_cno_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
138     cred_t *, int *);
139 static int daplka_pd_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
140     cred_t *, int *);
141 static int daplka_sp_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
142     cred_t *, int *);
143 static int daplka_srq_ioctl(int, daplka_ia_resource_t *, intptr_t, int,
144     cred_t *, int *);
145 
146 /*
147  * common ioctls and supporting functions
148  */
149 static int daplka_ia_create(minor_t, intptr_t, int, cred_t *, int *);
150 static int daplka_ia_destroy(daplka_resource_t *);
151 
152 /*
153  * EP ioctls and supporting functions
154  */
155 static int daplka_ep_create(daplka_ia_resource_t *, intptr_t, int,
156     cred_t *, int *);
157 static int daplka_ep_modify(daplka_ia_resource_t *, intptr_t, int,
158     cred_t *, int *);
159 static int daplka_ep_free(daplka_ia_resource_t *, intptr_t, int,
160     cred_t *, int *);
161 static int daplka_ep_connect(daplka_ia_resource_t *, intptr_t, int,
162     cred_t *, int *);
163 static int daplka_ep_disconnect(daplka_ia_resource_t *, intptr_t, int,
164     cred_t *, int *);
165 static int daplka_ep_reinit(daplka_ia_resource_t *, intptr_t, int,
166     cred_t *, int *);
167 static int daplka_ep_destroy(daplka_resource_t *);
168 static void daplka_hash_ep_free(void *);
169 static int daplka_ep_failback(void *objp, void *arg);
170 static int daplka_ep_altpath(daplka_ep_resource_t *, ib_gid_t *);
171 
172 static uint32_t daplka_ep_get_state(daplka_ep_resource_t *);
173 static void daplka_ep_set_state(daplka_ep_resource_t *, uint32_t, uint32_t);
174 static boolean_t daplka_ep_transition_is_valid(uint32_t, uint32_t);
175 static daplka_timer_info_t *daplka_timer_info_alloc(daplka_ep_resource_t *);
176 static void daplka_timer_info_free(daplka_timer_info_t *);
177 static void daplka_timer_handler(void *);
178 static void daplka_timer_dispatch(void *);
179 static void daplka_timer_thread(void *);
180 static int daplka_cancel_timer(daplka_ep_resource_t *);
181 static void daplka_hash_timer_free(void *);
182 
183 /*
184  * EVD ioctls and supporting functions
185  */
186 static int daplka_evd_create(daplka_ia_resource_t *, intptr_t, int,
187     cred_t *, int *);
188 static int daplka_cq_resize(daplka_ia_resource_t *, intptr_t, int,
189     cred_t *, int *);
190 static int daplka_evd_free(daplka_ia_resource_t *, intptr_t, int,
191     cred_t *, int *);
192 static int daplka_event_poll(daplka_ia_resource_t *, intptr_t, int,
193     cred_t *, int *);
194 static int daplka_evd_destroy(daplka_resource_t *);
195 static void daplka_cq_handler(ibt_cq_hdl_t, void *);
196 static void daplka_evd_wakeup(daplka_evd_resource_t *,
197     daplka_evd_event_list_t *, daplka_evd_event_t *);
198 static void daplka_evd_event_enqueue(daplka_evd_event_list_t *,
199     daplka_evd_event_t *);
200 static daplka_evd_event_t *daplka_evd_event_dequeue(daplka_evd_event_list_t *);
201 static void daplka_hash_evd_free(void *);
202 
203 
204 /*
205  * SRQ ioctls and supporting functions
206  */
207 static int daplka_srq_create(daplka_ia_resource_t *, intptr_t, int,
208     cred_t *, int *);
209 static int daplka_srq_resize(daplka_ia_resource_t *, intptr_t, int,
210     cred_t *, int *);
211 static int daplka_srq_free(daplka_ia_resource_t *, intptr_t, int,
212     cred_t *, int *);
213 static int daplka_srq_destroy(daplka_resource_t *);
214 static void daplka_hash_srq_free(void *);
215 
216 /*
217  * Miscellaneous ioctls
218  */
219 static int daplka_cr_accept(daplka_ia_resource_t *, intptr_t, int,
220     cred_t *, int *);
221 static int daplka_cr_reject(daplka_ia_resource_t *, intptr_t, int,
222     cred_t *, int *);
223 static int daplka_cr_handoff(daplka_ia_resource_t *, intptr_t, int,
224     cred_t *, int *);
225 static int daplka_ia_query(daplka_ia_resource_t *, intptr_t, int,
226     cred_t *, int *);
227 
228 /*
229  * PD ioctls and supporting functions
230  */
231 static int daplka_pd_alloc(daplka_ia_resource_t *, intptr_t, int,
232     cred_t *, int *);
233 static int daplka_pd_free(daplka_ia_resource_t *, intptr_t, int,
234     cred_t *, int *);
235 static int daplka_pd_destroy(daplka_resource_t *);
236 static void daplka_hash_pd_free(void *);
237 
238 /*
239  * SP ioctls and supporting functions
240  */
241 static int daplka_service_register(daplka_ia_resource_t *, intptr_t, int,
242     cred_t *, int *);
243 static int daplka_service_deregister(daplka_ia_resource_t *, intptr_t, int,
244     cred_t *, int *);
245 static int daplka_sp_destroy(daplka_resource_t *);
246 static void daplka_hash_sp_free(void *);
247 static void daplka_hash_sp_unref(void *);
248 
249 /*
250  * MR ioctls and supporting functions
251  */
252 static int daplka_mr_register(daplka_ia_resource_t *, intptr_t, int,
253     cred_t *, int *);
254 static int daplka_mr_register_lmr(daplka_ia_resource_t *, intptr_t, int,
255     cred_t *, int *);
256 static int daplka_mr_register_shared(daplka_ia_resource_t *, intptr_t, int,
257     cred_t *, int *);
258 static int daplka_mr_deregister(daplka_ia_resource_t *, intptr_t, int,
259     cred_t *, int *);
260 static int daplka_mr_sync(daplka_ia_resource_t *, intptr_t, int,
261     cred_t *, int *);
262 static int daplka_mr_destroy(daplka_resource_t *);
263 static void daplka_hash_mr_free(void *);
264 static void daplka_shared_mr_free(daplka_mr_resource_t *);
265 
266 /*
267  * MW ioctls and supporting functions
268  */
269 static int daplka_mw_alloc(daplka_ia_resource_t *, intptr_t, int,
270     cred_t *, int *);
271 static int daplka_mw_free(daplka_ia_resource_t *, intptr_t, int,
272     cred_t *, int *);
273 static int daplka_mw_destroy(daplka_resource_t *);
274 static void daplka_hash_mw_free(void *);
275 
276 /*
277  * CNO ioctls and supporting functions
278  */
279 static int daplka_cno_alloc(daplka_ia_resource_t *, intptr_t, int,
280     cred_t *, int *);
281 static int daplka_cno_free(daplka_ia_resource_t *, intptr_t, int,
282     cred_t *, int *);
283 static int daplka_cno_wait(daplka_ia_resource_t *, intptr_t, int,
284     cred_t *, int *);
285 static int daplka_cno_destroy(daplka_resource_t *);
286 static void daplka_hash_cno_free(void *);
287 
288 /*
289  * CM handlers
290  */
291 static  ibt_cm_status_t daplka_cm_rc_handler(void *, ibt_cm_event_t *,
292     ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
293 
294 static  ibt_cm_status_t daplka_cm_service_handler(void *, ibt_cm_event_t *,
295     ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
296 
297 static ibt_cm_status_t daplka_cm_service_req(daplka_sp_resource_t *,
298     ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t);
299 
300 /*
301  * resource management routines
302  */
303 static int daplka_resource_reserve(minor_t *);
304 static int daplka_resource_insert(minor_t, daplka_resource_t *);
305 static daplka_resource_t *daplka_resource_remove(minor_t rnum);
306 static daplka_resource_t *daplka_resource_lookup(minor_t);
307 static void daplka_resource_init(void);
308 static void daplka_resource_fini(void);
309 static struct daplka_resource_table daplka_resource;
310 
311 /*
312  * hash table routines
313  */
314 static int daplka_hash_insert(daplka_hash_table_t *, uint64_t *, void *);
315 static int daplka_hash_remove(daplka_hash_table_t *, uint64_t, void **);
316 static void daplka_hash_walk(daplka_hash_table_t *, int (*)(void *, void *),
317     void *, krw_t);
318 static void *daplka_hash_lookup(daplka_hash_table_t *, uint64_t);
319 static int daplka_hash_create(daplka_hash_table_t *, uint_t,
320     void (*)(void *), void (*)(void *));
321 static void daplka_hash_destroy(daplka_hash_table_t *);
322 static uint32_t daplka_hash_getsize(daplka_hash_table_t *);
323 static void daplka_hash_generic_lookup(void *);
324 
325 static uint32_t daplka_timer_hkey_gen();
326 
327 /*
328  * async event handlers
329  */
330 static void daplka_async_event_create(ibt_async_code_t, ibt_async_event_t *,
331     uint64_t, daplka_ia_resource_t *);
332 static void daplka_rc_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
333     ibt_async_event_t *);
334 static void daplka_cq_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
335     ibt_async_event_t *);
336 static void daplka_un_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
337     ibt_async_event_t *);
338 static void daplka_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
339     ibt_async_event_t *);
340 static void daplka_sm_notice_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
341     ibt_subnet_event_t *event);
342 static void daplka_sm_gid_avail(ib_gid_t *, ib_gid_t *);
343 
344 /*
345  * IBTF wrappers and default limits used for resource accounting
346  */
347 static boolean_t	daplka_accounting_enabled = B_TRUE;
348 static uint32_t		daplka_max_qp_percent = 100;
349 static uint32_t		daplka_max_cq_percent = 100;
350 static uint32_t		daplka_max_pd_percent = 100;
351 static uint32_t		daplka_max_mw_percent = 100;
352 static uint32_t		daplka_max_mr_percent = 100;
353 static uint32_t		daplka_max_srq_percent = 100;
354 
355 static ibt_status_t
356 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *, ibt_hca_hdl_t,
357     ibt_chan_alloc_flags_t, ibt_rc_chan_alloc_args_t *,
358     ibt_channel_hdl_t *, ibt_chan_sizes_t *);
359 
360 static ibt_status_t
361 daplka_ibt_free_channel(daplka_ep_resource_t *, ibt_channel_hdl_t);
362 
363 static ibt_status_t
364 daplka_ibt_alloc_cq(daplka_evd_resource_t *, ibt_hca_hdl_t,
365     ibt_cq_attr_t *, ibt_cq_hdl_t *, uint_t *);
366 
367 static ibt_status_t
368 daplka_ibt_free_cq(daplka_evd_resource_t *, ibt_cq_hdl_t);
369 
370 static ibt_status_t
371 daplka_ibt_alloc_pd(daplka_pd_resource_t *, ibt_hca_hdl_t,
372     ibt_pd_flags_t, ibt_pd_hdl_t *);
373 
374 static ibt_status_t
375 daplka_ibt_free_pd(daplka_pd_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t);
376 
377 static ibt_status_t
378 daplka_ibt_alloc_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
379     ibt_mw_flags_t, ibt_mw_hdl_t *, ibt_rkey_t *);
380 
381 static ibt_status_t
382 daplka_ibt_free_mw(daplka_mw_resource_t *, ibt_hca_hdl_t, ibt_mw_hdl_t);
383 
384 static ibt_status_t
385 daplka_ibt_register_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_pd_hdl_t,
386     ibt_mr_attr_t *, ibt_mr_hdl_t *, ibt_mr_desc_t *);
387 
388 static ibt_status_t
389 daplka_ibt_register_shared_mr(daplka_mr_resource_t *, ibt_hca_hdl_t,
390     ibt_mr_hdl_t, ibt_pd_hdl_t, ibt_smr_attr_t *, ibt_mr_hdl_t *,
391     ibt_mr_desc_t *);
392 
393 static ibt_status_t
394 daplka_ibt_deregister_mr(daplka_mr_resource_t *, ibt_hca_hdl_t, ibt_mr_hdl_t);
395 
396 static ibt_status_t
397 daplka_ibt_alloc_srq(daplka_srq_resource_t *, ibt_hca_hdl_t, ibt_srq_flags_t,
398     ibt_pd_hdl_t, ibt_srq_sizes_t *, ibt_srq_hdl_t *, ibt_srq_sizes_t *);
399 
400 static ibt_status_t
401 daplka_ibt_free_srq(daplka_srq_resource_t *, ibt_srq_hdl_t);
402 
403 /*
404  * macros for manipulating resource objects.
405  * these macros can be used on objects that begin with a
406  * daplka_resource_t header.
407  */
408 #define	DAPLKA_RS_REFCNT(rp) ((rp)->header.rs_refcnt)
409 
410 #define	DAPLKA_RS_REF(rp) {			\
411 	mutex_enter(&(rp)->header.rs_reflock);	\
412 	(rp)->header.rs_refcnt++;		\
413 	ASSERT((rp)->header.rs_refcnt != 0);	\
414 	mutex_exit(&(rp)->header.rs_reflock);	\
415 }
416 
417 #define	DAPLKA_RS_UNREF(rp) {					\
418 	mutex_enter(&(rp)->header.rs_reflock);			\
419 	ASSERT((rp)->header.rs_refcnt != 0);			\
420 	if (--(rp)->header.rs_refcnt == 0) {			\
421 		ASSERT((rp)->header.rs_free != NULL);		\
422 		mutex_exit(&(rp)->header.rs_reflock);		\
423 		(rp)->header.rs_free((daplka_resource_t *)rp);	\
424 	} else {						\
425 		mutex_exit(&(rp)->header.rs_reflock);		\
426 	}							\
427 }
428 
429 #define	DAPLKA_RS_INIT(rp, type, rnum, free_func) {	\
430 	(rp)->header.rs_refcnt = 1;			\
431 	(rp)->header.rs_type = (type);			\
432 	(rp)->header.rs_rnum = (rnum); 			\
433 	(rp)->header.rs_charged = 0;			\
434 	(rp)->header.rs_free = (free_func);		\
435 	mutex_init(&(rp)->header.rs_reflock, NULL,	\
436 	    MUTEX_DRIVER, NULL);			\
437 }
438 
439 #define	DAPLKA_RS_FINI(rp) {				\
440 	mutex_destroy(&(rp)->header.rs_reflock);	\
441 }
442 
443 #define	DAPLKA_RS_ACCT_INC(rp, cnt) {				\
444 	atomic_add_32(&(rp)->header.rs_charged, (cnt));		\
445 }
446 #define	DAPLKA_RS_ACCT_DEC(rp, cnt) {				\
447 	atomic_add_32(&(rp)->header.rs_charged, -(cnt));	\
448 }
449 #define	DAPLKA_RS_ACCT_CHARGED(rp) ((rp)->header.rs_charged)
450 
451 #define	DAPLKA_RS_RNUM(rp) ((rp)->header.rs_rnum)
452 #define	DAPLKA_RS_TYPE(rp) ((rp)->header.rs_type)
453 #define	DAPLKA_RS_RESERVED(rp) ((intptr_t)(rp) == DAPLKA_RC_RESERVED)
454 
455 /*
456  * depending on the timeout value does a cv_wait_sig or cv_timedwait_sig
457  */
458 #define	DAPLKA_EVD_WAIT(cvp, mp, timeout)			\
459 	((timeout) == LONG_MAX) ? cv_wait_sig((cvp), (mp)) :	\
460 	cv_timedwait_sig((cvp), (mp), (timeout))
461 
462 #define	DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca)	((hca)->hca_ref_cnt++)
463 #define	DAPLKA_RELE_HCA_WITHOUT_LOCK(hca)	((hca)->hca_ref_cnt--)
464 
465 #define	DAPLKA_HOLD_HCA(dp, hca) {			\
466 	mutex_enter(&(dp)->daplka_mutex);		\
467 	DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);		\
468 	mutex_exit(&(dp)->daplka_mutex);		\
469 }
470 
471 #define	DAPLKA_RELE_HCA(dp, hca) {			\
472 	mutex_enter(&(dp)->daplka_mutex);		\
473 	DAPLKA_RELE_HCA_WITHOUT_LOCK(hca);		\
474 	mutex_exit(&(dp)->daplka_mutex);		\
475 }
476 
477 #define	DAPLKA_HCA_BUSY(hca)				\
478 	((hca)->hca_ref_cnt != 0 ||			\
479 	(hca)->hca_qp_count != 0 ||			\
480 	(hca)->hca_cq_count != 0 ||			\
481 	(hca)->hca_pd_count != 0 ||			\
482 	(hca)->hca_mw_count != 0 ||			\
483 	(hca)->hca_mr_count != 0)
484 
485 
486 static struct cb_ops daplka_cb_ops = {
487 	daplka_open,		/* cb_open */
488 	daplka_close,		/* cb_close */
489 	nodev,			/* cb_strategy */
490 	nodev,			/* cb_print */
491 	nodev,			/* cb_dump */
492 	nodev,			/* cb_read */
493 	nodev,			/* cb_write */
494 	daplka_ioctl,		/* cb_ioctl */
495 	nodev,			/* cb_devmap */
496 	nodev,			/* cb_mmap */
497 	nodev,			/* cb_segmap */
498 	nochpoll,		/* cb_chpoll */
499 	ddi_prop_op,		/* cb_prop_op */
500 	NULL,			/* cb_stream */
501 	D_NEW | D_MP,		/* cb_flag */
502 	CB_REV,			/* rev */
503 	nodev,			/* int (*cb_aread)() */
504 	nodev			/* int (*cb_awrite)() */
505 };
506 
507 static struct dev_ops daplka_ops = {
508 	DEVO_REV,		/* devo_rev */
509 	0,			/* devo_refcnt */
510 	daplka_info,		/* devo_getinfo */
511 	nulldev,		/* devo_identify */
512 	nulldev,		/* devo_probe */
513 	daplka_attach,		/* devo_attach */
514 	daplka_detach,		/* devo_detach */
515 	nodev,			/* devo_reset */
516 	&daplka_cb_ops,		/* devo_cb_ops */
517 	(struct bus_ops *)NULL,	/* devo_bus_ops */
518 	nulldev,		/* power */
519 	ddi_quiesce_not_needed,	/* devo_quiesce */
520 };
521 
522 /*
523  * Module linkage information for the kernel.
524  */
525 static struct modldrv modldrv = {
526 	&mod_driverops,
527 	"uDAPL Service Driver",
528 	&daplka_ops,
529 };
530 
531 static struct modlinkage modlinkage = {
532 #ifdef _LP64
533 	MODREV_1, { (void *) &modldrv, NULL, NULL, NULL, NULL, NULL, NULL }
534 #else
535 	MODREV_1, { (void *) &modldrv, NULL, NULL, NULL }
536 #endif
537 };
538 
539 /*
540  * daplka_dev holds global driver state and a list of HCAs
541  */
542 static daplka_t *daplka_dev = NULL;
543 static void *daplka_state = NULL;
544 
545 /*
546  * global SP hash table
547  */
548 static daplka_hash_table_t daplka_global_sp_htbl;
549 
550 /*
551  * timer_info hash table
552  */
553 static daplka_hash_table_t daplka_timer_info_htbl;
554 static uint32_t daplka_timer_hkey = 0;
555 
556 /*
557  * shared MR avl tree
558  */
559 static avl_tree_t daplka_shared_mr_tree;
560 static kmutex_t daplka_shared_mr_lock;
561 static int daplka_shared_mr_cmp(const void *, const void *);
562 _NOTE(MUTEX_PROTECTS_DATA(daplka_shared_mr_lock,
563     daplka_shared_mr_tree))
564 
565 /*
566  * default kmem flags used by this driver
567  */
568 static int daplka_km_flags = KM_SLEEP;
569 
570 /*
571  * taskq used for handling background tasks
572  */
573 static taskq_t *daplka_taskq = NULL;
574 
575 /*
576  * daplka_cm_delay is the length of time the active
577  * side needs to wait before timing out on the REP message.
578  */
579 static clock_t daplka_cm_delay = 60000000;
580 
581 /*
582  * modunload will fail if pending_close is non-zero
583  */
584 static uint32_t daplka_pending_close = 0;
585 
586 static struct ibt_clnt_modinfo_s daplka_clnt_modinfo = {
587 	IBTI_V_CURR,
588 	IBT_USER,
589 	daplka_async_handler,
590 	NULL,
591 	DAPLKA_DRV_NAME
592 };
593 
594 /*
595  * Module Installation
596  */
597 int
_init(void)598 _init(void)
599 {
600 	int status;
601 
602 	status = ddi_soft_state_init(&daplka_state, sizeof (daplka_t), 1);
603 	if (status != 0) {
604 		return (status);
605 	}
606 
607 	mutex_init(&daplka_dbglock, NULL, MUTEX_DRIVER, NULL);
608 	bzero(daplka_dbgbuf, sizeof (daplka_dbgbuf));
609 	daplka_dbgnext = 0;
610 	daplka_dbginit = 1;
611 
612 	daplka_resource_init();
613 
614 	status = mod_install(&modlinkage);
615 	if (status != DDI_SUCCESS) {
616 		/* undo inits done before mod_install */
617 		daplka_resource_fini();
618 		mutex_destroy(&daplka_dbglock);
619 		ddi_soft_state_fini(&daplka_state);
620 	}
621 	return (status);
622 }
623 
624 /*
625  * Module Removal
626  */
627 int
_fini(void)628 _fini(void)
629 {
630 	int	status;
631 
632 	/*
633 	 * mod_remove causes detach to be called
634 	 */
635 	if ((status = mod_remove(&modlinkage)) != 0) {
636 		DERR("fini: mod_remove failed: 0x%x\n", status);
637 		return (status);
638 	}
639 
640 	daplka_resource_fini();
641 	mutex_destroy(&daplka_dbglock);
642 	ddi_soft_state_fini(&daplka_state);
643 
644 	return (status);
645 }
646 
647 /*
648  * Return Module Info.
649  */
650 int
_info(struct modinfo * modinfop)651 _info(struct modinfo *modinfop)
652 {
653 	return (mod_info(&modlinkage, modinfop));
654 }
655 
656 static void
daplka_enqueue_hca(daplka_t * dp,daplka_hca_t * hca)657 daplka_enqueue_hca(daplka_t *dp, daplka_hca_t *hca)
658 {
659 	daplka_hca_t *h;
660 
661 	ASSERT(mutex_owned(&dp->daplka_mutex));
662 
663 	if (dp->daplka_hca_list_head == NULL) {
664 		dp->daplka_hca_list_head = hca;
665 	} else {
666 		h = dp->daplka_hca_list_head;
667 		while (h->hca_next != NULL)
668 			h = h->hca_next;
669 
670 		h->hca_next = hca;
671 	}
672 }
673 
674 static void
daplka_dequeue_hca(daplka_t * dp,daplka_hca_t * hca)675 daplka_dequeue_hca(daplka_t *dp, daplka_hca_t *hca)
676 {
677 	daplka_hca_t *h;
678 
679 	ASSERT(mutex_owned(&dp->daplka_mutex));
680 
681 	if (dp->daplka_hca_list_head == hca)
682 		dp->daplka_hca_list_head = hca->hca_next;
683 	else {
684 		h = dp->daplka_hca_list_head;
685 		while (h->hca_next != hca)
686 			h = h->hca_next;
687 		h->hca_next = hca->hca_next;
688 	}
689 }
690 
691 static int
daplka_init_hca(daplka_t * dp,ib_guid_t hca_guid)692 daplka_init_hca(daplka_t *dp, ib_guid_t hca_guid)
693 {
694 	daplka_hca_t		*hca;
695 	ibt_hca_portinfo_t	*pinfop;
696 	uint_t			size;
697 	int			j;
698 	ibt_status_t		status;
699 
700 	hca = kmem_zalloc(sizeof (daplka_hca_t), KM_SLEEP);
701 
702 	hca->hca_guid = hca_guid;
703 
704 	/*
705 	 * open the HCA for use
706 	 */
707 	status = ibt_open_hca(dp->daplka_clnt_hdl, hca_guid, &hca->hca_hdl);
708 	if (status != IBT_SUCCESS) {
709 		if (status == IBT_HCA_IN_USE) {
710 			DERR("ibt_open_hca() returned IBT_HCA_IN_USE\n");
711 		} else {
712 			DERR("ibt_open_hca() returned %d\n", status);
713 		}
714 		kmem_free(hca, sizeof (daplka_hca_t));
715 		return (status);
716 	}
717 
718 	/*
719 	 * query HCA to get its info
720 	 */
721 	status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr);
722 	if (status != IBT_SUCCESS) {
723 		DERR("ibt_query_hca returned %d (hca_guid 0x%llx)\n",
724 		    status, (longlong_t)hca_guid);
725 		goto out;
726 	}
727 
728 	/*
729 	 * query HCA to get info of all ports
730 	 */
731 	status = ibt_query_hca_ports(hca->hca_hdl,
732 	    0, &pinfop, &hca->hca_nports, &size);
733 	if (status != IBT_SUCCESS) {
734 		DERR("ibt_query_all_ports returned %d "
735 		    "(hca_guid 0x%llx)\n", status,
736 		    (longlong_t)hca_guid);
737 		goto out;
738 	}
739 	hca->hca_ports = pinfop;
740 	hca->hca_pinfosz = size;
741 
742 	DERR("hca guid 0x%llx, nports %d\n",
743 	    (longlong_t)hca_guid, hca->hca_nports);
744 	for (j = 0; j < hca->hca_nports; j++) {
745 		DERR("port %d: state %d prefix 0x%016llx "
746 		    "guid %016llx\n",
747 		    pinfop[j].p_port_num, pinfop[j].p_linkstate,
748 		    (longlong_t)pinfop[j].p_sgid_tbl[0].gid_prefix,
749 		    (longlong_t)pinfop[j].p_sgid_tbl[0].gid_guid);
750 	}
751 
752 	mutex_enter(&dp->daplka_mutex);
753 	daplka_enqueue_hca(dp, hca);
754 	mutex_exit(&dp->daplka_mutex);
755 
756 	return (IBT_SUCCESS);
757 
758 out:
759 	(void) ibt_close_hca(hca->hca_hdl);
760 	kmem_free(hca, sizeof (daplka_hca_t));
761 	return (status);
762 }
763 
764 /*
765  * this function obtains the list of HCAs from IBTF.
766  * the HCAs are then opened and the returned handles
767  * and attributes are stored into the global daplka_dev
768  * structure.
769  */
770 static int
daplka_init_hcas(daplka_t * dp)771 daplka_init_hcas(daplka_t *dp)
772 {
773 	int		i;
774 	ib_guid_t	*hca_guids;
775 	uint32_t	hca_count;
776 
777 	/*
778 	 * get the num & list of HCAs present
779 	 */
780 	hca_count = ibt_get_hca_list(&hca_guids);
781 	DERR("No. of HCAs present %d\n", hca_count);
782 
783 	if (hca_count != 0) {
784 		/*
785 		 * get the info for each available HCA
786 		 */
787 		for (i = 0; i < hca_count; i++)
788 			(void) daplka_init_hca(dp, hca_guids[i]);
789 
790 		ibt_free_hca_list(hca_guids, hca_count);
791 	}
792 
793 	if (dp->daplka_hca_list_head != NULL)
794 		return (IBT_SUCCESS);
795 	else
796 		return (IBT_FAILURE);
797 }
798 
799 static int
daplka_fini_hca(daplka_t * dp,daplka_hca_t * hca)800 daplka_fini_hca(daplka_t *dp, daplka_hca_t *hca)
801 {
802 	ibt_status_t	status;
803 
804 	if (hca->hca_hdl != NULL) {
805 		status = ibt_close_hca(hca->hca_hdl);
806 		if (status != IBT_SUCCESS) {
807 			DERR("ibt_close_hca returned %d"
808 			    " (hca_guid 0x%llx)\n", status,
809 			    (longlong_t)hca->hca_guid);
810 
811 			mutex_enter(&dp->daplka_mutex);
812 			daplka_enqueue_hca(dp, hca);
813 			mutex_exit(&dp->daplka_mutex);
814 
815 			return (status);
816 		}
817 	}
818 
819 	if (hca->hca_ports != NULL)
820 		ibt_free_portinfo(hca->hca_ports, hca->hca_pinfosz);
821 
822 	kmem_free(hca, sizeof (daplka_hca_t));
823 	return (IBT_SUCCESS);
824 }
825 
826 /*
827  * closes all HCAs and frees up the HCA list
828  */
829 static int
daplka_fini_hcas(daplka_t * dp)830 daplka_fini_hcas(daplka_t *dp)
831 {
832 	ibt_status_t	status;
833 	daplka_hca_t	*hca;
834 
835 	mutex_enter(&daplka_dev->daplka_mutex);
836 	while ((hca = dp->daplka_hca_list_head) != NULL) {
837 		if (DAPLKA_HCA_BUSY(hca)) {
838 			mutex_exit(&daplka_dev->daplka_mutex);
839 			return (IBT_HCA_RESOURCES_NOT_FREED);
840 		}
841 		daplka_dequeue_hca(daplka_dev, hca);
842 		mutex_exit(&daplka_dev->daplka_mutex);
843 
844 		if ((status = daplka_fini_hca(dp, hca)) != IBT_SUCCESS)
845 			return (status);
846 
847 		mutex_enter(&daplka_dev->daplka_mutex);
848 	}
849 	mutex_exit(&daplka_dev->daplka_mutex);
850 
851 	DERR("dapl kernel agent unloaded\n");
852 	return (IBT_SUCCESS);
853 }
854 
855 
856 /*
857  * Attach the device, create and fill in daplka_dev
858  */
859 static int
daplka_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)860 daplka_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
861 {
862 	daplka_t	*dp;
863 	int		instance, retval, err;
864 	boolean_t	sp_htbl_allocated = B_FALSE;
865 	boolean_t	timer_htbl_allocated = B_FALSE;
866 	boolean_t	shared_mr_tree_allocated = B_FALSE;
867 
868 	switch (cmd) {
869 	case DDI_ATTACH:
870 		break;
871 	case DDI_RESUME:
872 		return (DDI_SUCCESS);
873 	default:
874 		return (DDI_FAILURE);
875 	}
876 
877 	/*
878 	 * Allocate soft data structure
879 	 */
880 	instance = ddi_get_instance(dip);
881 	if (ddi_soft_state_zalloc(daplka_state, instance) != DDI_SUCCESS) {
882 		DERR("attach: bad state zalloc\n");
883 		return (DDI_FAILURE);
884 	}
885 
886 	dp = ddi_get_soft_state(daplka_state, instance);
887 	if (dp == NULL) {
888 		ddi_soft_state_free(daplka_state, instance);
889 		DERR("attach: cannot get soft state\n");
890 		return (DDI_FAILURE);
891 	}
892 	/*
893 	 * Stuff private info into dip.
894 	 */
895 	dp->daplka_dip = dip;
896 	ddi_set_driver_private(dip, dp);
897 	daplka_dev = dp;
898 	mutex_init(&dp->daplka_mutex, NULL, MUTEX_DRIVER, NULL);
899 
900 	/*
901 	 * Register driver with IBTF
902 	 */
903 	retval = ibt_attach(&daplka_clnt_modinfo, dip, dp,
904 	    &dp->daplka_clnt_hdl);
905 	if (retval != IBT_SUCCESS) {
906 		DERR("attach: ibt_attach failed: error = %d\n", retval);
907 		retval = DDI_FAILURE;
908 		goto error;
909 	}
910 	/* Register to receive SM events */
911 	ibt_register_subnet_notices(dp->daplka_clnt_hdl,
912 	    daplka_sm_notice_handler, NULL);
913 
914 	retval = daplka_init_hcas(dp);
915 	if (retval != IBT_SUCCESS) {
916 		DERR("attach: hca_init failed: error = %d\n", retval);
917 		retval = DDI_FAILURE;
918 		goto error;
919 	}
920 	/*
921 	 * this table is used by cr_handoff
922 	 */
923 	retval = daplka_hash_create(&daplka_global_sp_htbl,
924 	    DAPLKA_G_SP_HTBL_SZ, daplka_hash_sp_unref,
925 	    daplka_hash_generic_lookup);
926 	if (retval != 0) {
927 		DERR("attach: cannot create sp hash table\n");
928 		retval = DDI_FAILURE;
929 		goto error;
930 	}
931 	sp_htbl_allocated = B_TRUE;
932 
933 	/*
934 	 * this table stores per EP timer information.
935 	 * timer_info_t objects are inserted into this table whenever
936 	 * a EP timer is set. timers get removed when they expire
937 	 * or when they get cancelled.
938 	 */
939 	retval = daplka_hash_create(&daplka_timer_info_htbl,
940 	    DAPLKA_TIMER_HTBL_SZ, daplka_hash_timer_free, NULL);
941 	if (retval != 0) {
942 		DERR("attach: cannot create timer hash table\n");
943 		retval = DDI_FAILURE;
944 		goto error;
945 	}
946 	timer_htbl_allocated = B_TRUE;
947 
948 	/*
949 	 * this taskq is currently only used for processing timers.
950 	 * other processing may also use this taskq in the future.
951 	 */
952 	daplka_taskq = taskq_create(DAPLKA_DRV_NAME, DAPLKA_TQ_NTHREADS,
953 	    maxclsyspri, 1, DAPLKA_TQ_NTHREADS, TASKQ_DYNAMIC);
954 	if (daplka_taskq == NULL) {
955 		DERR("attach: cannot create daplka_taskq\n");
956 		retval = DDI_FAILURE;
957 		goto error;
958 	}
959 
960 	/*
961 	 * daplka_shared_mr_tree holds daplka_shared_mr_t objects that
962 	 * gets retrieved or created when daplka_mr_register_shared is
963 	 * called.
964 	 */
965 	mutex_init(&daplka_shared_mr_lock, NULL, MUTEX_DRIVER, NULL);
966 
967 	avl_create(&daplka_shared_mr_tree, daplka_shared_mr_cmp,
968 	    sizeof (daplka_shared_mr_t),
969 	    offsetof(daplka_shared_mr_t, smr_node));
970 	shared_mr_tree_allocated = B_TRUE;
971 
972 	/*
973 	 * Create the filesystem device node.
974 	 */
975 	if (ddi_create_minor_node(dip, DAPLKA_MINOR_NAME, S_IFCHR,
976 	    0, DDI_PSEUDO, NULL) != DDI_SUCCESS) {
977 		DERR("attach: bad create_minor_node\n");
978 		retval = DDI_FAILURE;
979 		goto error;
980 	}
981 	dp->daplka_status = DAPLKA_STATE_ATTACHED;
982 	ddi_report_dev(dip);
983 	return (DDI_SUCCESS);
984 
985 error:
986 	if (shared_mr_tree_allocated) {
987 		avl_destroy(&daplka_shared_mr_tree);
988 		mutex_destroy(&daplka_shared_mr_lock);
989 	}
990 
991 	if (daplka_taskq) {
992 		taskq_destroy(daplka_taskq);
993 		daplka_taskq = NULL;
994 	}
995 
996 	if (timer_htbl_allocated) {
997 		daplka_hash_destroy(&daplka_timer_info_htbl);
998 	}
999 
1000 	if (sp_htbl_allocated) {
1001 		daplka_hash_destroy(&daplka_global_sp_htbl);
1002 	}
1003 
1004 	err = daplka_fini_hcas(dp);
1005 	if (err != IBT_SUCCESS) {
1006 		DERR("attach: hca_fini returned %d\n", err);
1007 	}
1008 
1009 	if (dp->daplka_clnt_hdl != NULL) {
1010 		/* unregister SM event notification */
1011 		ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1012 		    (ibt_sm_notice_handler_t)NULL, NULL);
1013 		err = ibt_detach(dp->daplka_clnt_hdl);
1014 
1015 		if (err != IBT_SUCCESS) {
1016 			DERR("attach: ibt_detach returned %d\n", err);
1017 		}
1018 	}
1019 	mutex_destroy(&dp->daplka_mutex);
1020 
1021 	if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1022 		ddi_remove_minor_node(dip, NULL);
1023 	}
1024 	ddi_soft_state_free(daplka_state, instance);
1025 	return (retval);
1026 }
1027 
1028 /*
1029  * Detach - Free resources allocated in attach
1030  */
1031 /* ARGSUSED */
1032 static int
daplka_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)1033 daplka_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1034 {
1035 	int		instance, err;
1036 	void		*cookie = NULL;
1037 	daplka_t	*dp;
1038 
1039 	if (cmd != DDI_DETACH) {
1040 		return (DDI_FAILURE);
1041 	}
1042 	if (daplka_resource.daplka_rc_cnt > 0 ||
1043 	    daplka_pending_close > 0) {
1044 		DERR("detach: driver in use\n");
1045 		return (DDI_FAILURE);
1046 	}
1047 
1048 	instance = ddi_get_instance(dip);
1049 	dp = ddi_get_soft_state(daplka_state, instance);
1050 	if (dp == NULL) {
1051 		DERR("detach: cannot get soft state\n");
1052 		return (DDI_FAILURE);
1053 	}
1054 	err = daplka_fini_hcas(dp);
1055 	if (err != IBT_SUCCESS) {
1056 		DERR("detach: hca_fini returned %d\n", err);
1057 		return (DDI_FAILURE);
1058 	}
1059 	if (dp->daplka_clnt_hdl != NULL) {
1060 		/* unregister SM event notification */
1061 		ibt_register_subnet_notices(dp->daplka_clnt_hdl,
1062 		    (ibt_sm_notice_handler_t)NULL, NULL);
1063 		err = ibt_detach(dp->daplka_clnt_hdl);
1064 		if (err != IBT_SUCCESS) {
1065 			DERR("detach: ibt_detach returned %d\n", err);
1066 			return (DDI_FAILURE);
1067 		}
1068 		dp->daplka_clnt_hdl = NULL;
1069 	}
1070 	mutex_destroy(&dp->daplka_mutex);
1071 	if (dp->daplka_status == DAPLKA_STATE_ATTACHED) {
1072 		ddi_remove_minor_node(dip, NULL);
1073 	}
1074 	dp->daplka_status = DAPLKA_STATE_DETACHED;
1075 	ddi_soft_state_free(daplka_state, instance);
1076 	daplka_dev = NULL;
1077 
1078 	/*
1079 	 * by the time we get here, all clients of dapl should
1080 	 * have exited and completed their cleanup properly.
1081 	 * we can assert that all global data structures are now
1082 	 * empty.
1083 	 */
1084 	ASSERT(avl_destroy_nodes(&daplka_shared_mr_tree, &cookie) == NULL);
1085 	avl_destroy(&daplka_shared_mr_tree);
1086 	mutex_destroy(&daplka_shared_mr_lock);
1087 
1088 	ASSERT(daplka_hash_getsize(&daplka_timer_info_htbl) == 0);
1089 	daplka_hash_destroy(&daplka_timer_info_htbl);
1090 
1091 	ASSERT(daplka_hash_getsize(&daplka_global_sp_htbl) == 0);
1092 	daplka_hash_destroy(&daplka_global_sp_htbl);
1093 
1094 	taskq_destroy(daplka_taskq);
1095 
1096 	return (DDI_SUCCESS);
1097 }
1098 
1099 /* ARGSUSED */
1100 static int
daplka_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)1101 daplka_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1102 {
1103 	switch (infocmd) {
1104 	case DDI_INFO_DEVT2DEVINFO:
1105 		if (daplka_dev !=  NULL) {
1106 			*result = daplka_dev->daplka_dip;
1107 			return (DDI_SUCCESS);
1108 		} else {
1109 			return (DDI_FAILURE);
1110 		}
1111 
1112 	case DDI_INFO_DEVT2INSTANCE:
1113 		*result = 0;
1114 		return (DDI_SUCCESS);
1115 
1116 	default:
1117 		return (DDI_FAILURE);
1118 	}
1119 }
1120 
1121 /*
1122  * creates a EP resource.
1123  * A EP resource contains a RC channel. A EP resource holds a
1124  * reference to a send_evd (for the send CQ), recv_evd (for the
1125  * recv CQ), a connection evd and a PD. These references ensure
1126  * that the referenced resources are not freed until the EP itself
1127  * gets freed.
1128  */
1129 /* ARGSUSED */
1130 static int
daplka_ep_create(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)1131 daplka_ep_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1132 	cred_t *cred, int *rvalp)
1133 {
1134 	daplka_ep_resource_t		*ep_rp;
1135 	daplka_pd_resource_t		*pd_rp;
1136 	dapl_ep_create_t		args;
1137 	ibt_rc_chan_alloc_args_t	chan_args;
1138 	ibt_chan_alloc_flags_t		achan_flags;
1139 	ibt_chan_sizes_t		chan_real_sizes;
1140 	ibt_hca_attr_t			*hca_attrp;
1141 	uint64_t			ep_hkey = 0;
1142 	boolean_t			inserted = B_FALSE;
1143 	uint32_t			old_state, new_state;
1144 	int				retval;
1145 	ibt_status_t			status;
1146 
1147 	D3("ep_create: enter\n");
1148 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_create_t),
1149 	    mode);
1150 	if (retval != 0) {
1151 		DERR("ep_create: copyin error %d\n", retval);
1152 		return (EFAULT);
1153 	}
1154 	ep_rp = kmem_zalloc(sizeof (daplka_ep_resource_t), daplka_km_flags);
1155 	if (ep_rp == NULL) {
1156 		DERR("ep_create: cannot allocate ep_rp\n");
1157 		return (ENOMEM);
1158 	}
1159 	DAPLKA_RS_INIT(ep_rp, DAPL_TYPE_EP,
1160 	    DAPLKA_RS_RNUM(ia_rp), daplka_ep_destroy);
1161 
1162 	mutex_init(&ep_rp->ep_lock, NULL, MUTEX_DRIVER, NULL);
1163 	cv_init(&ep_rp->ep_cv, NULL, CV_DRIVER, NULL);
1164 	ep_rp->ep_hca = ia_rp->ia_hca;
1165 	ep_rp->ep_cookie = args.ep_cookie;
1166 	ep_rp->ep_timer_hkey = 0;
1167 
1168 	/*
1169 	 * we don't have to use ep_get_state here because ep_rp is not in
1170 	 * ep_htbl yet. refer to the description of daplka_ep_set_state
1171 	 * for details about the EP state machine.
1172 	 */
1173 	ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1174 	new_state = old_state = DAPLKA_EP_STATE_CLOSED;
1175 
1176 	/* get reference to send evd and get cq handle */
1177 	ep_rp->ep_snd_evd = (daplka_evd_resource_t *)
1178 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_snd_evd_hkey);
1179 	if (ep_rp->ep_snd_evd == NULL) {
1180 		DERR("ep_create: ep_snd_evd %llx not found\n",
1181 		    args.ep_snd_evd_hkey);
1182 		retval = EINVAL;
1183 		goto cleanup;
1184 	}
1185 	chan_args.rc_scq = ep_rp->ep_snd_evd->evd_cq_hdl;
1186 	if (chan_args.rc_scq == NULL) {
1187 		DERR("ep_create: ep_snd_evd cq invalid\n");
1188 		retval = EINVAL;
1189 		goto cleanup;
1190 	}
1191 
1192 	/* get reference to recv evd and get cq handle */
1193 	ep_rp->ep_rcv_evd = (daplka_evd_resource_t *)
1194 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_rcv_evd_hkey);
1195 	if (ep_rp->ep_rcv_evd == NULL) {
1196 		DERR("ep_create: ep_rcv_evd %llx not found\n",
1197 		    args.ep_rcv_evd_hkey);
1198 		retval = EINVAL;
1199 		goto cleanup;
1200 	}
1201 	chan_args.rc_rcq = ep_rp->ep_rcv_evd->evd_cq_hdl;
1202 	if (chan_args.rc_rcq == NULL) {
1203 		DERR("ep_create: ep_rcv_evd cq invalid\n");
1204 		retval = EINVAL;
1205 		goto cleanup;
1206 	}
1207 
1208 	/* get reference to conn evd */
1209 	ep_rp->ep_conn_evd = (daplka_evd_resource_t *)
1210 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.ep_conn_evd_hkey);
1211 	if (ep_rp->ep_conn_evd == NULL) {
1212 		DERR("ep_create: ep_conn_evd %llx not found\n",
1213 		    args.ep_conn_evd_hkey);
1214 		retval = EINVAL;
1215 		goto cleanup;
1216 	}
1217 
1218 	/* get reference to SRQ if needed */
1219 	if (args.ep_srq_attached) {
1220 		ep_rp->ep_srq_res = (daplka_srq_resource_t *)daplka_hash_lookup(
1221 		    &ia_rp->ia_srq_htbl, args.ep_srq_hkey);
1222 		if (ep_rp->ep_srq_res == NULL) {
1223 			DERR("ep_create: ep_srq %llx not found\n",
1224 			    (longlong_t)args.ep_srq_hkey);
1225 			retval = EINVAL;
1226 			goto cleanup;
1227 		}
1228 		ASSERT(DAPLKA_RS_TYPE(ep_rp->ep_srq_res) == DAPL_TYPE_SRQ);
1229 		D3("ep_create: ep_srq %p %llx\n", ep_rp->ep_srq_res,
1230 		    (longlong_t)args.ep_srq_hkey);
1231 	} else {
1232 		ep_rp->ep_srq_res = NULL;
1233 	}
1234 
1235 	/* get pd handle */
1236 	pd_rp = (daplka_pd_resource_t *)
1237 	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.ep_pd_hkey);
1238 	if (pd_rp == NULL) {
1239 		DERR("ep_create: cannot find pd resource\n");
1240 		retval = EINVAL;
1241 		goto cleanup;
1242 	}
1243 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
1244 	ep_rp->ep_pd_res = pd_rp;
1245 	chan_args.rc_pd = pd_rp->pd_hdl;
1246 
1247 
1248 	/*
1249 	 * these checks ensure that the requested channel sizes
1250 	 * are within the limits supported by the chosen HCA.
1251 	 */
1252 	hca_attrp = &ia_rp->ia_hca->hca_attr;
1253 	if (args.ep_ch_sizes.dcs_sq_sgl > hca_attrp->hca_max_sgl) {
1254 		DERR("ep_create: invalid cs_sq_sgl %d\n",
1255 		    args.ep_ch_sizes.dcs_sq_sgl);
1256 		retval = EINVAL;
1257 		goto cleanup;
1258 	}
1259 	if (args.ep_ch_sizes.dcs_rq_sgl > hca_attrp->hca_max_sgl) {
1260 		DERR("ep_create: invalid cs_rq_sgl %d\n",
1261 		    args.ep_ch_sizes.dcs_rq_sgl);
1262 		retval = EINVAL;
1263 		goto cleanup;
1264 	}
1265 	if (args.ep_ch_sizes.dcs_sq > hca_attrp->hca_max_chan_sz) {
1266 		DERR("ep_create: invalid cs_sq %d\n",
1267 		    args.ep_ch_sizes.dcs_sq);
1268 		retval = EINVAL;
1269 		goto cleanup;
1270 	}
1271 	if (args.ep_ch_sizes.dcs_rq > hca_attrp->hca_max_chan_sz) {
1272 		DERR("ep_create: invalid cs_rq %d\n",
1273 		    args.ep_ch_sizes.dcs_rq);
1274 		retval = EINVAL;
1275 		goto cleanup;
1276 	}
1277 
1278 	chan_args.rc_sizes.cs_sq_sgl = args.ep_ch_sizes.dcs_sq_sgl;
1279 	chan_args.rc_sizes.cs_rq_sgl = args.ep_ch_sizes.dcs_rq_sgl;
1280 	chan_args.rc_sizes.cs_sq = args.ep_ch_sizes.dcs_sq;
1281 	chan_args.rc_sizes.cs_rq = args.ep_ch_sizes.dcs_rq;
1282 	chan_args.rc_flags = IBT_WR_SIGNALED;
1283 	chan_args.rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR;
1284 	chan_args.rc_hca_port_num = ia_rp->ia_port_num;
1285 	chan_args.rc_clone_chan = NULL;
1286 	if (args.ep_srq_attached) {
1287 		chan_args.rc_srq = ep_rp->ep_srq_res->srq_hdl;
1288 	} else {
1289 		chan_args.rc_srq = NULL;
1290 	}
1291 
1292 	D3("ep_create: sq_sgl %d, rq_sgl %d, sq %d, rq %d, "
1293 	    "sig_type 0x%x, control 0x%x, portnum %d, clone_chan 0x%p\n",
1294 	    args.ep_ch_sizes.dcs_sq_sgl, args.ep_ch_sizes.dcs_rq_sgl,
1295 	    args.ep_ch_sizes.dcs_sq, args.ep_ch_sizes.dcs_rq,
1296 	    chan_args.rc_flags, chan_args.rc_control,
1297 	    chan_args.rc_hca_port_num, chan_args.rc_clone_chan);
1298 
1299 	if (args.ep_srq_attached) {
1300 		achan_flags = IBT_ACHAN_USER_MAP | IBT_ACHAN_USES_SRQ;
1301 	} else {
1302 		achan_flags = IBT_ACHAN_USER_MAP;
1303 	}
1304 	/* create rc channel */
1305 	status = daplka_ibt_alloc_rc_channel(ep_rp, ia_rp->ia_hca_hdl,
1306 	    achan_flags, &chan_args, &ep_rp->ep_chan_hdl,
1307 	    &chan_real_sizes);
1308 	if (status != IBT_SUCCESS) {
1309 		DERR("ep_create: alloc_rc_channel returned %d\n", status);
1310 		*rvalp = (int)status;
1311 		retval = 0;
1312 		goto cleanup;
1313 	}
1314 
1315 	args.ep_ch_real_sizes.dcs_sq = chan_real_sizes.cs_sq;
1316 	args.ep_ch_real_sizes.dcs_rq = chan_real_sizes.cs_rq;
1317 	args.ep_ch_real_sizes.dcs_sq_sgl = chan_real_sizes.cs_sq_sgl;
1318 	args.ep_ch_real_sizes.dcs_rq_sgl = chan_real_sizes.cs_rq_sgl;
1319 
1320 	/*
1321 	 * store ep ptr with chan_hdl.
1322 	 * this ep_ptr is used by the CM handlers (both active and
1323 	 * passive)
1324 	 * mutex is only needed for race of "destroy" and "async"
1325 	 */
1326 	mutex_enter(&daplka_dev->daplka_mutex);
1327 	ibt_set_chan_private(ep_rp->ep_chan_hdl, (void *)ep_rp);
1328 	mutex_exit(&daplka_dev->daplka_mutex);
1329 
1330 	/* Get HCA-specific data_out info */
1331 	status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
1332 	    IBT_CI_NO_FLAGS, IBT_HDL_CHANNEL, (void *)ep_rp->ep_chan_hdl,
1333 	    &args.ep_qp_data_out, sizeof (args.ep_qp_data_out));
1334 
1335 	if (status != IBT_SUCCESS) {
1336 		DERR("ep_create: ibt_ci_data_out error(%d)\n",
1337 		    status);
1338 		*rvalp = (int)status;
1339 		retval = 0;
1340 		goto cleanup;
1341 	}
1342 
1343 	/* insert into ep hash table */
1344 	retval = daplka_hash_insert(&ia_rp->ia_ep_htbl,
1345 	    &ep_hkey, (void *)ep_rp);
1346 	if (retval != 0) {
1347 		DERR("ep_create: cannot insert ep resource into ep_htbl\n");
1348 		goto cleanup;
1349 	}
1350 	inserted = B_TRUE;
1351 
1352 	/*
1353 	 * at this point, the ep_rp can be looked up by other threads
1354 	 * if they manage to guess the correct hkey. but they are not
1355 	 * permitted to operate on ep_rp until we transition to the
1356 	 * CLOSED state.
1357 	 */
1358 
1359 	/* return hkey to library */
1360 	args.ep_hkey = ep_hkey;
1361 
1362 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ep_create_t),
1363 	    mode);
1364 	if (retval != 0) {
1365 		DERR("ep_create: copyout error %d\n", retval);
1366 		retval = EFAULT;
1367 		goto cleanup;
1368 	}
1369 
1370 	daplka_ep_set_state(ep_rp, old_state, new_state);
1371 	D3("ep_create: exit\n");
1372 	return (0);
1373 
1374 cleanup:
1375 	if (inserted) {
1376 		daplka_ep_resource_t *free_rp = NULL;
1377 
1378 		(void) daplka_hash_remove(&ia_rp->ia_ep_htbl, ep_hkey,
1379 		    (void **)&free_rp);
1380 		if (free_rp != ep_rp) {
1381 			/*
1382 			 * this case is impossible because ep_free will
1383 			 * wait until our state transition is complete.
1384 			 */
1385 			DERR("ep_create: cannot remove ep from hash table\n");
1386 			ASSERT(B_FALSE);
1387 			return (retval);
1388 		}
1389 	}
1390 	new_state = DAPLKA_EP_STATE_FREED;
1391 	daplka_ep_set_state(ep_rp, old_state, new_state);
1392 	DAPLKA_RS_UNREF(ep_rp);
1393 	return (retval);
1394 }
1395 
1396 /*
1397  * daplka_ep_get_state retrieves the current state of the EP and
1398  * sets the state to TRANSITIONING. if the current state is already
1399  * TRANSITIONING, this function will wait until the state becomes one
1400  * of the other EP states. Most of the EP related ioctls follow the
1401  * call sequence:
1402  *
1403  *	new_state = old_state = daplka_ep_get_state(ep_rp);
1404  *	...
1405  *	...some code that affects the EP
1406  *	...
1407  *	new_state = <NEW_STATE>;
1408  *	daplka_ep_set_state(ep_rp, old_state, new_state);
1409  *
1410  * this call sequence ensures that only one thread may access the EP
1411  * during the time ep_state is in TRANSITIONING. daplka_ep_set_state
1412  * transitions ep_state to new_state and wakes up any waiters blocking
1413  * on ep_cv.
1414  *
1415  */
1416 static uint32_t
daplka_ep_get_state(daplka_ep_resource_t * ep_rp)1417 daplka_ep_get_state(daplka_ep_resource_t *ep_rp)
1418 {
1419 	uint32_t	old_state = 0;
1420 
1421 	mutex_enter(&ep_rp->ep_lock);
1422 	while (ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING) {
1423 		D2("get_state: wait for state transition to complete\n");
1424 		cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
1425 		D2("get_state: done, curr state = %d\n", ep_rp->ep_state);
1426 	}
1427 	ASSERT(ep_rp->ep_state != DAPLKA_EP_STATE_TRANSITIONING);
1428 	old_state = ep_rp->ep_state;
1429 
1430 	/*
1431 	 * an ep that is in the FREED state cannot transition
1432 	 * back to any of the regular states
1433 	 */
1434 	if (old_state != DAPLKA_EP_STATE_FREED) {
1435 		ep_rp->ep_state = DAPLKA_EP_STATE_TRANSITIONING;
1436 	}
1437 	mutex_exit(&ep_rp->ep_lock);
1438 	return (old_state);
1439 }
1440 
1441 /*
1442  * EP state transition diagram
1443  *
1444  *              CLOSED<-------------------
1445  *                |                      |
1446  *                |                      |
1447  *     ------------------------          |
1448  *     |                      |          |
1449  *     |                      |          |
1450  *     v                      v          |
1451  *   CONNECTING       ACCEPTING          |
1452  *     |  |   |       |       |          |
1453  *     |  |   |       |       |          |
1454  *     |  |   |       |       |          |
1455  *     |  |   |_______|_______|          |
1456  *     |  |           |   |   |          |
1457  *     |  |___________|   |   |          |
1458  *     |        |         |   |          |
1459  *     |        v         |   |---->DISCONNECTED
1460  *     |     CONNECTED    |              ^
1461  *     v        |         |              |
1462  *    ABORTING  |---------|--------------|
1463  *     |        |         |              |
1464  *     |        |         v              |
1465  *     |        |-------->DISCONNECTING--|
1466  *     |                                 |
1467  *     |---------------------------------|
1468  *
1469  *	*not shown in this diagram:
1470  *	    -loopback transitions
1471  *	    -transitions to the FREED state
1472  */
1473 static boolean_t
daplka_ep_transition_is_valid(uint32_t old_state,uint32_t new_state)1474 daplka_ep_transition_is_valid(uint32_t old_state, uint32_t new_state)
1475 {
1476 	boolean_t valid = B_FALSE;
1477 
1478 	/*
1479 	 * reseting to the same state is a no-op and is always
1480 	 * permitted. transitioning to the FREED state indicates
1481 	 * that the ep is about to be freed and no further operation
1482 	 * is allowed on it. to support abrupt close, the ep is
1483 	 * permitted to transition to the FREED state from any state.
1484 	 */
1485 	if (old_state == new_state ||
1486 	    new_state == DAPLKA_EP_STATE_FREED) {
1487 		return (B_TRUE);
1488 	}
1489 
1490 	switch (old_state) {
1491 	case DAPLKA_EP_STATE_CLOSED:
1492 		/*
1493 		 * this is the initial ep_state.
1494 		 * a transition to CONNECTING or ACCEPTING may occur
1495 		 * upon calling daplka_ep_connect or daplka_cr_accept,
1496 		 * respectively.
1497 		 */
1498 		if (new_state == DAPLKA_EP_STATE_CONNECTING ||
1499 		    new_state == DAPLKA_EP_STATE_ACCEPTING) {
1500 			valid = B_TRUE;
1501 		}
1502 		break;
1503 	case DAPLKA_EP_STATE_CONNECTING:
1504 		/*
1505 		 * we transition to this state if daplka_ep_connect
1506 		 * is successful. from this state, we can transition
1507 		 * to CONNECTED if daplka_cm_rc_conn_est gets called;
1508 		 * or to DISCONNECTED if daplka_cm_rc_conn_closed or
1509 		 * daplka_cm_rc_event_failure gets called. If the
1510 		 * client calls daplka_ep_disconnect, we transition
1511 		 * to DISCONNECTING. If a timer was set at ep_connect
1512 		 * time and if the timer expires prior to any of the
1513 		 * CM callbacks, we transition to ABORTING and then
1514 		 * to DISCONNECTED.
1515 		 */
1516 		if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1517 		    new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1518 		    new_state == DAPLKA_EP_STATE_DISCONNECTED ||
1519 		    new_state == DAPLKA_EP_STATE_ABORTING) {
1520 			valid = B_TRUE;
1521 		}
1522 		break;
1523 	case DAPLKA_EP_STATE_ACCEPTING:
1524 		/*
1525 		 * we transition to this state if daplka_cr_accept
1526 		 * is successful. from this state, we can transition
1527 		 * to CONNECTED if daplka_cm_service_conn_est gets called;
1528 		 * or to DISCONNECTED if daplka_cm_service_conn_closed or
1529 		 * daplka_cm_service_event_failure gets called. If the
1530 		 * client calls daplka_ep_disconnect, we transition to
1531 		 * DISCONNECTING.
1532 		 */
1533 		if (new_state == DAPLKA_EP_STATE_CONNECTED ||
1534 		    new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1535 		    new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1536 			valid = B_TRUE;
1537 		}
1538 		break;
1539 	case DAPLKA_EP_STATE_CONNECTED:
1540 		/*
1541 		 * we transition to this state if a active or passive
1542 		 * connection gets established. if the client calls
1543 		 * daplka_ep_disconnect, we transition to the
1544 		 * DISCONNECTING state. subsequent CM callbacks will
1545 		 * cause ep_state to be set to DISCONNECTED. If the
1546 		 * remote peer terminates the connection before we do,
1547 		 * it is possible for us to transition directly from
1548 		 * CONNECTED to DISCONNECTED.
1549 		 */
1550 		if (new_state == DAPLKA_EP_STATE_DISCONNECTING ||
1551 		    new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1552 			valid = B_TRUE;
1553 		}
1554 		break;
1555 	case DAPLKA_EP_STATE_DISCONNECTING:
1556 		/*
1557 		 * we transition to this state if the client calls
1558 		 * daplka_ep_disconnect.
1559 		 */
1560 		if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1561 			valid = B_TRUE;
1562 		}
1563 		break;
1564 	case DAPLKA_EP_STATE_ABORTING:
1565 		/*
1566 		 * we transition to this state if the active side
1567 		 * EP timer has expired. this is only a transient
1568 		 * state that is set during timer processing. when
1569 		 * timer processing completes, ep_state will become
1570 		 * DISCONNECTED.
1571 		 */
1572 		if (new_state == DAPLKA_EP_STATE_DISCONNECTED) {
1573 			valid = B_TRUE;
1574 		}
1575 		break;
1576 	case DAPLKA_EP_STATE_DISCONNECTED:
1577 		/*
1578 		 * we transition to this state if we get a closed
1579 		 * or event_failure CM callback. an expired timer
1580 		 * can also cause us to be in this state. this
1581 		 * is the only state in which we permit the
1582 		 * ep_reinit operation.
1583 		 */
1584 		if (new_state == DAPLKA_EP_STATE_CLOSED) {
1585 			valid = B_TRUE;
1586 		}
1587 		break;
1588 	default:
1589 		break;
1590 	}
1591 
1592 	if (!valid) {
1593 		DERR("ep_transition: invalid state change %d -> %d\n",
1594 		    old_state, new_state);
1595 	}
1596 	return (valid);
1597 }
1598 
1599 /*
1600  * first check if the transition is valid. then set ep_state
1601  * to new_state and wake up all waiters.
1602  */
1603 static void
daplka_ep_set_state(daplka_ep_resource_t * ep_rp,uint32_t old_state,uint32_t new_state)1604 daplka_ep_set_state(daplka_ep_resource_t *ep_rp, uint32_t old_state,
1605 	uint32_t new_state)
1606 {
1607 	boolean_t	valid;
1608 
1609 	ASSERT(new_state != DAPLKA_EP_STATE_TRANSITIONING);
1610 
1611 	valid = daplka_ep_transition_is_valid(old_state, new_state);
1612 	mutex_enter(&ep_rp->ep_lock);
1613 	if (ep_rp->ep_state != DAPLKA_EP_STATE_FREED) {
1614 		if (valid) {
1615 			ep_rp->ep_state = new_state;
1616 		} else {
1617 			/*
1618 			 * this case is impossible.
1619 			 * we have a serious problem if we get here.
1620 			 * instead of panicing, we reset the state to
1621 			 * old_state. doing this would at least prevent
1622 			 * threads from hanging due to ep_state being
1623 			 * stuck in TRANSITIONING.
1624 			 */
1625 			ep_rp->ep_state = old_state;
1626 			ASSERT(B_FALSE);
1627 		}
1628 	}
1629 	cv_broadcast(&ep_rp->ep_cv);
1630 	mutex_exit(&ep_rp->ep_lock);
1631 }
1632 
1633 /*
1634  * modifies RC channel attributes.
1635  * currently, only the rdma_in and rdma_out attributes may
1636  * be modified. the channel must be in quiescent state when
1637  * this function is called.
1638  */
1639 /* ARGSUSED */
1640 static int
daplka_ep_modify(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)1641 daplka_ep_modify(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1642 	cred_t *cred, int *rvalp)
1643 {
1644 	daplka_ep_resource_t		*ep_rp = NULL;
1645 	ibt_cep_modify_flags_t		good_flags;
1646 	ibt_rc_chan_modify_attr_t	rcm_attr;
1647 	ibt_hca_attr_t			*hca_attrp;
1648 	dapl_ep_modify_t		args;
1649 	ibt_status_t			status;
1650 	uint32_t			old_state, new_state;
1651 	int				retval = 0;
1652 
1653 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_modify_t),
1654 	    mode);
1655 	if (retval != 0) {
1656 		DERR("ep_modify: copyin error %d\n", retval);
1657 		return (EFAULT);
1658 	}
1659 	ep_rp = (daplka_ep_resource_t *)
1660 	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epm_hkey);
1661 	if (ep_rp == NULL) {
1662 		DERR("ep_modify: cannot find ep resource\n");
1663 		return (EINVAL);
1664 	}
1665 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1666 	new_state = old_state = daplka_ep_get_state(ep_rp);
1667 
1668 	if (old_state != DAPLKA_EP_STATE_CLOSED &&
1669 	    old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1670 		DERR("ep_modify: invalid state %d\n", old_state);
1671 		retval = EINVAL;
1672 		goto cleanup;
1673 	}
1674 
1675 	good_flags = IBT_CEP_SET_RDMARA_OUT | IBT_CEP_SET_RDMARA_IN;
1676 	if ((args.epm_flags & ~good_flags) != 0) {
1677 		DERR("ep_modify: invalid flags 0x%x\n", args.epm_flags);
1678 		retval = EINVAL;
1679 		goto cleanup;
1680 	}
1681 
1682 	hca_attrp = &ia_rp->ia_hca->hca_attr;
1683 
1684 	bzero(&rcm_attr, sizeof (ibt_rc_chan_modify_attr_t));
1685 	if ((args.epm_flags & IBT_CEP_SET_RDMARA_OUT) != 0) {
1686 		if (args.epm_rdma_ra_out > hca_attrp->hca_max_rdma_out_chan) {
1687 			DERR("ep_modify: invalid epm_rdma_ra_out %d\n",
1688 			    args.epm_rdma_ra_out);
1689 			retval = EINVAL;
1690 			goto cleanup;
1691 		}
1692 		rcm_attr.rc_rdma_ra_out = args.epm_rdma_ra_out;
1693 	}
1694 	if ((args.epm_flags & IBT_CEP_SET_RDMARA_IN) != 0) {
1695 		if (args.epm_rdma_ra_in > hca_attrp->hca_max_rdma_in_chan) {
1696 			DERR("ep_modify: epm_rdma_ra_in %d\n",
1697 			    args.epm_rdma_ra_in);
1698 			retval = EINVAL;
1699 			goto cleanup;
1700 		}
1701 		rcm_attr.rc_rdma_ra_in = args.epm_rdma_ra_in;
1702 	}
1703 	status = ibt_modify_rc_channel(ep_rp->ep_chan_hdl, args.epm_flags,
1704 	    &rcm_attr, NULL);
1705 	if (status != IBT_SUCCESS) {
1706 		DERR("ep_modify: modify_rc_channel returned %d\n", status);
1707 		*rvalp = (int)status;
1708 		retval = 0;
1709 		goto cleanup;
1710 	}
1711 
1712 	/*
1713 	 * ep_modify does not change ep_state
1714 	 */
1715 cleanup:;
1716 	daplka_ep_set_state(ep_rp, old_state, new_state);
1717 	DAPLKA_RS_UNREF(ep_rp);
1718 	return (retval);
1719 }
1720 
1721 /*
1722  * Frees a EP resource.
1723  * a EP may only be freed when it is in the CLOSED or
1724  * DISCONNECTED state.
1725  */
1726 /* ARGSUSED */
1727 static int
daplka_ep_free(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)1728 daplka_ep_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
1729 	cred_t *cred, int *rvalp)
1730 {
1731 	daplka_ep_resource_t	*ep_rp = NULL;
1732 	dapl_ep_free_t		args;
1733 	uint32_t		old_state, new_state;
1734 	int			retval;
1735 
1736 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_free_t), mode);
1737 	if (retval != 0) {
1738 		DERR("ep_free: copyin error %d\n", retval);
1739 		return (EFAULT);
1740 	}
1741 	ep_rp = (daplka_ep_resource_t *)
1742 	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epf_hkey);
1743 	if (ep_rp == NULL) {
1744 		DERR("ep_free: cannot find ep resource\n");
1745 		return (EINVAL);
1746 	}
1747 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
1748 	new_state = old_state = daplka_ep_get_state(ep_rp);
1749 
1750 	/*
1751 	 * ep cannot be freed if it is in an invalid state.
1752 	 */
1753 	if (old_state != DAPLKA_EP_STATE_CLOSED &&
1754 	    old_state != DAPLKA_EP_STATE_DISCONNECTED) {
1755 		DERR("ep_free: invalid state %d\n", old_state);
1756 		retval = EINVAL;
1757 		goto cleanup;
1758 	}
1759 	ep_rp = NULL;
1760 	retval = daplka_hash_remove(&ia_rp->ia_ep_htbl,
1761 	    args.epf_hkey, (void **)&ep_rp);
1762 	if (retval != 0 || ep_rp == NULL) {
1763 		/*
1764 		 * this is only possible if we have two threads
1765 		 * calling ep_free in parallel.
1766 		 */
1767 		DERR("ep_free: cannot find ep resource\n");
1768 		goto cleanup;
1769 	}
1770 	/* there should not be any outstanding timers */
1771 	ASSERT(ep_rp->ep_timer_hkey == 0);
1772 
1773 	new_state = DAPLKA_EP_STATE_FREED;
1774 	daplka_ep_set_state(ep_rp, old_state, new_state);
1775 
1776 	/* remove reference obtained by lookup */
1777 	DAPLKA_RS_UNREF(ep_rp);
1778 
1779 	/* UNREF calls the actual free function when refcnt is zero */
1780 	DAPLKA_RS_UNREF(ep_rp);
1781 	return (0);
1782 
1783 cleanup:;
1784 	daplka_ep_set_state(ep_rp, old_state, new_state);
1785 
1786 	/* remove reference obtained by lookup */
1787 	DAPLKA_RS_UNREF(ep_rp);
1788 	return (retval);
1789 }
1790 
1791 /*
1792  * The following routines supports the timeout feature of ep_connect.
1793  * Refer to the description of ep_connect for details.
1794  */
1795 
1796 /*
1797  * this is the timer processing thread.
1798  */
1799 static void
daplka_timer_thread(void * arg)1800 daplka_timer_thread(void *arg)
1801 {
1802 	daplka_timer_info_t	*timerp = (daplka_timer_info_t *)arg;
1803 	daplka_ep_resource_t	*ep_rp;
1804 	daplka_evd_event_t	*disc_ev = NULL;
1805 	ibt_status_t		status;
1806 	int			old_state, new_state;
1807 
1808 	ep_rp = timerp->ti_ep_res;
1809 	ASSERT(ep_rp != NULL);
1810 	ASSERT(timerp->ti_tmo_id != 0);
1811 	timerp->ti_tmo_id = 0;
1812 
1813 	new_state = old_state = daplka_ep_get_state(ep_rp);
1814 	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
1815 		/* unblock hash_ep_free */
1816 		mutex_enter(&ep_rp->ep_lock);
1817 		ASSERT(ep_rp->ep_timer_hkey != 0);
1818 		ep_rp->ep_timer_hkey = 0;
1819 		cv_broadcast(&ep_rp->ep_cv);
1820 		mutex_exit(&ep_rp->ep_lock);
1821 
1822 		/* reset state to original state */
1823 		daplka_ep_set_state(ep_rp, old_state, new_state);
1824 
1825 		/* this function will also unref ep_rp */
1826 		daplka_timer_info_free(timerp);
1827 		return;
1828 	}
1829 
1830 	ASSERT(ep_rp->ep_timer_hkey != 0);
1831 	ep_rp->ep_timer_hkey = 0;
1832 
1833 	/*
1834 	 * we cannot keep ep_state in TRANSITIONING if we call
1835 	 * ibt_close_rc_channel in blocking mode. this would cause
1836 	 * a deadlock because the cm callbacks will be blocked and
1837 	 * will not be able to wake us up.
1838 	 */
1839 	new_state = DAPLKA_EP_STATE_ABORTING;
1840 	daplka_ep_set_state(ep_rp, old_state, new_state);
1841 
1842 	/*
1843 	 * when we return from close_rc_channel, all callbacks should have
1844 	 * completed. we can also be certain that these callbacks did not
1845 	 * enqueue any events to conn_evd.
1846 	 */
1847 	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
1848 	    NULL, 0, NULL, NULL, NULL);
1849 	if (status != IBT_SUCCESS) {
1850 		DERR("timer_thread: ibt_close_rc_channel returned %d\n",
1851 		    status);
1852 	}
1853 	old_state = daplka_ep_get_state(ep_rp);
1854 
1855 	/*
1856 	 * this is the only thread that can transition ep_state out
1857 	 * of ABORTING. all other ep operations would fail when
1858 	 * ep_state is in ABORTING.
1859 	 */
1860 	ASSERT(old_state == DAPLKA_EP_STATE_ABORTING);
1861 
1862 	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_SLEEP);
1863 	ASSERT(disc_ev != NULL);
1864 
1865 	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
1866 	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
1867 	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
1868 	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
1869 	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
1870 	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
1871 
1872 	D2("timer_thread: enqueue event(%p) evdp(%p)\n",
1873 	    disc_ev, ep_rp->ep_conn_evd);
1874 
1875 	new_state = DAPLKA_EP_STATE_DISCONNECTED;
1876 	daplka_ep_set_state(ep_rp, old_state, new_state);
1877 
1878 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
1879 	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
1880 
1881 	/* this function will also unref ep_rp */
1882 	daplka_timer_info_free(timerp);
1883 }
1884 
1885 /*
1886  * dispatches a thread to continue with timer processing.
1887  */
1888 static void
daplka_timer_dispatch(void * arg)1889 daplka_timer_dispatch(void *arg)
1890 {
1891 	/*
1892 	 * keep rescheduling this function until
1893 	 * taskq_dispatch succeeds.
1894 	 */
1895 	if (taskq_dispatch(daplka_taskq,
1896 	    daplka_timer_thread, arg, TQ_NOSLEEP) == 0) {
1897 		DERR("timer_dispatch: taskq_dispatch failed, retrying...\n");
1898 		(void) timeout(daplka_timer_dispatch, arg, 10);
1899 	}
1900 }
1901 
1902 /*
1903  * this function is called by the kernel's callout thread.
1904  * we first attempt to remove the timer object from the
1905  * global timer table. if it is found, we dispatch a thread
1906  * to continue processing the timer object. if it is not
1907  * found, that means the timer has been cancelled by someone
1908  * else.
1909  */
1910 static void
daplka_timer_handler(void * arg)1911 daplka_timer_handler(void *arg)
1912 {
1913 	uint64_t		timer_hkey = (uintptr_t)arg;
1914 	daplka_timer_info_t	*timerp = NULL;
1915 
1916 	D2("timer_handler: timer_hkey 0x%llx\n", (longlong_t)timer_hkey);
1917 
1918 	(void) daplka_hash_remove(&daplka_timer_info_htbl,
1919 	    timer_hkey, (void **)&timerp);
1920 	if (timerp == NULL) {
1921 		D2("timer_handler: timer already cancelled\n");
1922 		return;
1923 	}
1924 	daplka_timer_dispatch((void *)timerp);
1925 }
1926 
1927 /*
1928  * allocates a timer_info object.
1929  * a reference to a EP is held by this object. this ensures
1930  * that the EP stays valid when a timer is outstanding.
1931  */
1932 static daplka_timer_info_t *
daplka_timer_info_alloc(daplka_ep_resource_t * ep_rp)1933 daplka_timer_info_alloc(daplka_ep_resource_t *ep_rp)
1934 {
1935 	daplka_timer_info_t	*timerp;
1936 
1937 	timerp = kmem_zalloc(sizeof (*timerp), daplka_km_flags);
1938 	if (timerp == NULL) {
1939 		DERR("timer_info_alloc: cannot allocate timer info\n");
1940 		return (NULL);
1941 	}
1942 	timerp->ti_ep_res = ep_rp;
1943 	timerp->ti_tmo_id = 0;
1944 
1945 	return (timerp);
1946 }
1947 
1948 /*
1949  * Frees the timer_info object.
1950  * we release the EP reference before freeing the object.
1951  */
1952 static void
daplka_timer_info_free(daplka_timer_info_t * timerp)1953 daplka_timer_info_free(daplka_timer_info_t *timerp)
1954 {
1955 	ASSERT(timerp->ti_ep_res != NULL);
1956 	DAPLKA_RS_UNREF(timerp->ti_ep_res);
1957 	timerp->ti_ep_res = NULL;
1958 	ASSERT(timerp->ti_tmo_id == 0);
1959 	kmem_free(timerp, sizeof (*timerp));
1960 }
1961 
1962 /*
1963  * cancels the timer set by ep_connect.
1964  * returns -1 if timer handling is in progress
1965  * and 0 otherwise.
1966  */
1967 static int
daplka_cancel_timer(daplka_ep_resource_t * ep_rp)1968 daplka_cancel_timer(daplka_ep_resource_t *ep_rp)
1969 {
1970 	/*
1971 	 * this function can only be called when ep_state
1972 	 * is frozen.
1973 	 */
1974 	ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_TRANSITIONING);
1975 	if (ep_rp->ep_timer_hkey != 0) {
1976 		daplka_timer_info_t	*timerp = NULL;
1977 
1978 		(void) daplka_hash_remove(&daplka_timer_info_htbl,
1979 		    ep_rp->ep_timer_hkey, (void **)&timerp);
1980 		if (timerp == NULL) {
1981 			/*
1982 			 * this is possible if the timer_handler has
1983 			 * removed the timerp but the taskq thread has
1984 			 * not transitioned the ep_state to DISCONNECTED.
1985 			 * we need to reset the ep_state to allow the
1986 			 * taskq thread to continue with its work. the
1987 			 * taskq thread will set the ep_timer_hkey to 0
1988 			 * so we don't have to do it here.
1989 			 */
1990 			DERR("cancel_timer: timer is being processed\n");
1991 			return (-1);
1992 		}
1993 		/*
1994 		 * we got the timer object. if the handler fires at
1995 		 * this point, it will not be able to find the object
1996 		 * and will return immediately. normally, ti_tmo_id gets
1997 		 * cleared when the handler fires.
1998 		 */
1999 		ASSERT(timerp->ti_tmo_id != 0);
2000 
2001 		/*
2002 		 * note that untimeout can possibly call the handler.
2003 		 * we are safe because the handler will be a no-op.
2004 		 */
2005 		(void) untimeout(timerp->ti_tmo_id);
2006 		timerp->ti_tmo_id = 0;
2007 		daplka_timer_info_free(timerp);
2008 		ep_rp->ep_timer_hkey = 0;
2009 	}
2010 	return (0);
2011 }
2012 
2013 /*
2014  * this function is called by daplka_hash_destroy for
2015  * freeing timer_info objects
2016  */
2017 static void
daplka_hash_timer_free(void * obj)2018 daplka_hash_timer_free(void *obj)
2019 {
2020 	daplka_timer_info_free((daplka_timer_info_t *)obj);
2021 }
2022 
2023 /* ARGSUSED */
2024 static uint16_t
daplka_hellomsg_cksum(DAPL_PRIVATE * dp)2025 daplka_hellomsg_cksum(DAPL_PRIVATE *dp)
2026 {
2027 	uint8_t *bp;
2028 	int i;
2029 	uint16_t cksum = 0;
2030 
2031 	bp = (uint8_t *)dp;
2032 	for (i = 0; i < sizeof (DAPL_PRIVATE); i++) {
2033 		cksum += bp[i];
2034 	}
2035 	return (cksum);
2036 }
2037 
2038 /*
2039  * ep_connect is called by the client to initiate a connection to a
2040  * remote service point. It is a non-blocking call. If a non-zero
2041  * timeout is specified by the client, a timer will be set just before
2042  * returning from ep_connect. Upon a successful return from ep_connect,
2043  * the client will call evd_wait to wait for the connection to complete.
2044  * If the connection is rejected or has failed due to an error, the
2045  * client will be notified with an event containing the appropriate error
2046  * code. If the connection is accepted, the client will be notified with
2047  * the CONN_ESTABLISHED event. If the timer expires before either of the
2048  * above events (error or established), a TIMED_OUT event will be delivered
2049  * to the client.
2050  *
2051  * the complicated part of the timer logic is the handling of race
2052  * conditions with CM callbacks. we need to ensure that either the CM or
2053  * the timer thread gets to deliver an event, but not both. when the
2054  * CM callback is about to deliver an event, it always tries to cancel
2055  * the outstanding timer. if cancel_timer indicates a that the timer is
2056  * already being processed, the CM callback will simply return without
2057  * delivering an event. when the timer thread executes, it tries to check
2058  * if the EP is still in CONNECTING state (timers only work on the active
2059  * side). if the EP is not in this state, the timer thread will return
2060  * without delivering an event.
2061  */
2062 /* ARGSUSED */
2063 static int
daplka_ep_connect(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)2064 daplka_ep_connect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2065 	cred_t *cred, int *rvalp)
2066 {
2067 	daplka_ep_resource_t	*ep_rp = NULL;
2068 	dapl_ep_connect_t	args;
2069 	daplka_timer_info_t	*timerp = NULL;
2070 	uint32_t		old_state, new_state;
2071 	boolean_t		timer_inserted = B_FALSE;
2072 	uint64_t		timer_hkey = 0;
2073 	ibt_path_info_t		path_info;
2074 	ibt_path_attr_t		path_attr;
2075 	ibt_hca_attr_t		*hca_attrp;
2076 	ibt_chan_open_args_t	chan_args;
2077 	ibt_status_t		status = IBT_SUCCESS;
2078 	uint8_t			num_paths;
2079 	void			*priv_data;
2080 	DAPL_PRIVATE		*dp;
2081 	int			retval = 0;
2082 	ib_gid_t		*sgid;
2083 	ib_gid_t		*dgid;
2084 	uint64_t		dgid_ored;
2085 	ibt_ar_t		ar_query_s;
2086 	ibt_ar_t		ar_result_s;
2087 	ibt_path_flags_t	pathflags;
2088 
2089 	D3("ep_connect: enter\n");
2090 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_connect_t),
2091 	    mode);
2092 	if (retval != 0) {
2093 		DERR("ep_connect: copyin error %d\n", retval);
2094 		return (EFAULT);
2095 	}
2096 	ep_rp = (daplka_ep_resource_t *)
2097 	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epc_hkey);
2098 	if (ep_rp == NULL) {
2099 		DERR("ep_connect: cannot find ep resource\n");
2100 		return (EINVAL);
2101 	}
2102 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2103 
2104 	new_state = old_state = daplka_ep_get_state(ep_rp);
2105 	if (old_state != DAPLKA_EP_STATE_CLOSED) {
2106 		DERR("ep_connect: invalid state %d\n", old_state);
2107 		retval = EINVAL;
2108 		goto cleanup;
2109 	}
2110 	if (args.epc_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
2111 		DERR("ep_connect: private data len (%d) exceeded "
2112 		    "max size %d\n", args.epc_priv_sz,
2113 		    DAPL_MAX_PRIVATE_DATA_SIZE);
2114 		retval = EINVAL;
2115 		goto cleanup;
2116 	}
2117 
2118 	/*
2119 	 * check for remote ipaddress to dgid resolution needs ATS
2120 	 */
2121 	dgid = &args.epc_dgid;
2122 	dgid_ored = dgid->gid_guid | dgid->gid_prefix;
2123 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2124 	dgid_ored = 0ULL;
2125 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2126 	/* check for unidentified dgid */
2127 	if (dgid_ored == 0ULL) {
2128 		/*
2129 		 * setup for ibt_query_ar()
2130 		 */
2131 		sgid = &ia_rp->ia_hca_sgid;
2132 		ar_query_s.ar_gid.gid_guid = 0ULL;
2133 		ar_query_s.ar_gid.gid_prefix = 0ULL;
2134 		ar_query_s.ar_pkey = 0;
2135 		bcopy(args.epc_raddr_sadata.iad_sadata,
2136 		    ar_query_s.ar_data, DAPL_ATS_NBYTES);
2137 #define	UR(b) ar_query_s.ar_data[(b)]
2138 		D3("daplka_ep_connect: SA[8] %d.%d.%d.%d\n",
2139 		    UR(8), UR(9), UR(10), UR(11));
2140 		D3("daplka_ep_connect: SA[12] %d.%d.%d.%d\n",
2141 		    UR(12), UR(13), UR(14), UR(15));
2142 		status = ibt_query_ar(sgid, &ar_query_s, &ar_result_s);
2143 		if (status != IBT_SUCCESS) {
2144 			DERR("ep_connect: ibt_query_ar returned %d\n", status);
2145 			*rvalp = (int)status;
2146 			retval = 0;
2147 			goto cleanup;
2148 		}
2149 		/*
2150 		 * dgid identified from SA record
2151 		 */
2152 		dgid = &ar_result_s.ar_gid;
2153 		D2("daplka_ep_connect: ATS dgid=%llx:%llx\n",
2154 		    (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
2155 	}
2156 
2157 	bzero(&path_info, sizeof (ibt_path_info_t));
2158 	bzero(&path_attr, sizeof (ibt_path_attr_t));
2159 	bzero(&chan_args, sizeof (ibt_chan_open_args_t));
2160 
2161 	path_attr.pa_dgids = dgid;
2162 	path_attr.pa_num_dgids = 1;
2163 	/*
2164 	 * don't set sid in path_attr saves 1 SA query
2165 	 * Also makes server side not to write the service record
2166 	 */
2167 	path_attr.pa_sgid = ia_rp->ia_hca_sgid;
2168 	path_attr.pa_pkey = ia_rp->ia_port_pkey;
2169 
2170 	/* save the connection ep  - struct copy */
2171 	ep_rp->ep_sgid = ia_rp->ia_hca_sgid;
2172 	ep_rp->ep_dgid = *dgid;
2173 
2174 	num_paths = 0;
2175 	pathflags = IBT_PATH_PKEY;
2176 	/* enable APM on remote port but not on loopback case */
2177 	if (daplka_apm && ((dgid->gid_prefix != path_attr.pa_sgid.gid_prefix) ||
2178 	    (dgid->gid_guid != path_attr.pa_sgid.gid_guid))) {
2179 		pathflags |= IBT_PATH_APM;
2180 	}
2181 	status = ibt_get_paths(daplka_dev->daplka_clnt_hdl,
2182 	    pathflags, &path_attr, 1, &path_info, &num_paths);
2183 
2184 	if (status != IBT_SUCCESS && status != IBT_INSUFF_DATA) {
2185 		DERR("ep_connect: ibt_get_paths returned %d paths %d\n",
2186 		    status, num_paths);
2187 		*rvalp = (int)status;
2188 		retval = 0;
2189 		goto cleanup;
2190 	}
2191 	/* fill in the sid directly to path_info */
2192 	path_info.pi_sid = args.epc_sid;
2193 	hca_attrp = &ia_rp->ia_hca->hca_attr;
2194 
2195 	/* fill in open channel args */
2196 	chan_args.oc_path = &path_info;
2197 	chan_args.oc_cm_handler = daplka_cm_rc_handler;
2198 	chan_args.oc_cm_clnt_private = (void *)ep_rp;
2199 	chan_args.oc_rdma_ra_out = hca_attrp->hca_max_rdma_out_chan;
2200 	chan_args.oc_rdma_ra_in = hca_attrp->hca_max_rdma_in_chan;
2201 	chan_args.oc_path_retry_cnt = 7;	/* 3-bit field */
2202 	chan_args.oc_path_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
2203 
2204 	ASSERT(args.epc_priv_sz > 0);
2205 	priv_data = (void *)args.epc_priv;
2206 
2207 	chan_args.oc_priv_data_len = args.epc_priv_sz;
2208 	chan_args.oc_priv_data = priv_data;
2209 
2210 	/*
2211 	 * calculate checksum value of hello message and
2212 	 * put hello message in networking byte order
2213 	 */
2214 	dp = (DAPL_PRIVATE *)priv_data;
2215 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dp))
2216 	dp->hello_msg.hi_port = htons(dp->hello_msg.hi_port);
2217 	dp->hello_msg.hi_checksum = 0;
2218 	dp->hello_msg.hi_checksum = htons(daplka_hellomsg_cksum(dp));
2219 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*dp))
2220 
2221 	if (args.epc_timeout > 0) {
2222 		/*
2223 		 * increment refcnt before passing reference to
2224 		 * timer_info_alloc.
2225 		 */
2226 		DAPLKA_RS_REF(ep_rp);
2227 		timerp = daplka_timer_info_alloc(ep_rp);
2228 		if (timerp == NULL) {
2229 			DERR("ep_connect: cannot allocate timer\n");
2230 			/*
2231 			 * we need to remove the reference if
2232 			 * allocation failed.
2233 			 */
2234 			DAPLKA_RS_UNREF(ep_rp);
2235 			retval = ENOMEM;
2236 			goto cleanup;
2237 		}
2238 		/*
2239 		 * We generate our own hkeys so that timer_hkey can fit
2240 		 * into a pointer and passed as an arg to timeout()
2241 		 */
2242 		timer_hkey = (uint64_t)daplka_timer_hkey_gen();
2243 		retval = daplka_hash_insert(&daplka_timer_info_htbl,
2244 		    &timer_hkey, (void *)timerp);
2245 		if (retval != 0) {
2246 			DERR("ep_connect: cannot insert timer info\n");
2247 			goto cleanup;
2248 		}
2249 		ASSERT(ep_rp->ep_timer_hkey == 0);
2250 		ep_rp->ep_timer_hkey = timer_hkey;
2251 		timer_inserted = B_TRUE;
2252 		D2("ep_connect: timer_hkey = 0x%llx\n",
2253 		    (longlong_t)timer_hkey);
2254 	}
2255 	status = ibt_open_rc_channel(ep_rp->ep_chan_hdl, IBT_OCHAN_NO_FLAGS,
2256 	    IBT_NONBLOCKING, &chan_args, NULL);
2257 
2258 	if (status != IBT_SUCCESS) {
2259 		DERR("ep_connect: ibt_open_rc_channel returned %d\n", status);
2260 		*rvalp = (int)status;
2261 		retval = 0;
2262 		goto cleanup;
2263 	}
2264 	/*
2265 	 * if a cm callback gets called at this point, it'll have to wait until
2266 	 * ep_state becomes connecting (or some other state if another thread
2267 	 * manages to get ahead of the callback). this guarantees that the
2268 	 * callback will not touch the timer until it gets set.
2269 	 */
2270 	if (timerp != NULL) {
2271 		clock_t		tmo;
2272 
2273 		tmo = drv_usectohz((clock_t)args.epc_timeout);
2274 		/*
2275 		 * We generate our own 32 bit timer_hkey so that it can fit
2276 		 * into a pointer
2277 		 */
2278 		ASSERT(timer_hkey != 0);
2279 		timerp->ti_tmo_id = timeout(daplka_timer_handler,
2280 		    (void *)(uintptr_t)timer_hkey, tmo);
2281 	}
2282 	new_state = DAPLKA_EP_STATE_CONNECTING;
2283 
2284 cleanup:;
2285 	if (timerp != NULL && (retval != 0 || status != IBT_SUCCESS)) {
2286 		/*
2287 		 * if ibt_open_rc_channel failed, the timerp must still
2288 		 * be in daplka_timer_info_htbl because neither the cm
2289 		 * callback nor the timer_handler will be called.
2290 		 */
2291 		if (timer_inserted) {
2292 			daplka_timer_info_t	*new_timerp = NULL;
2293 
2294 			ASSERT(timer_hkey != 0);
2295 			(void) daplka_hash_remove(&daplka_timer_info_htbl,
2296 			    timer_hkey, (void **)&new_timerp);
2297 			ASSERT(new_timerp == timerp);
2298 			ep_rp->ep_timer_hkey = 0;
2299 		}
2300 		daplka_timer_info_free(timerp);
2301 	}
2302 	daplka_ep_set_state(ep_rp, old_state, new_state);
2303 	DAPLKA_RS_UNREF(ep_rp);
2304 	D3("ep_connect: exit\n");
2305 	return (retval);
2306 }
2307 
2308 /*
2309  * ep_disconnect closes a connection with a remote peer.
2310  * if a connection has not been established, ep_disconnect
2311  * will instead flush all recv bufs posted to this channel.
2312  * if the EP state is CONNECTED, CONNECTING or ACCEPTING upon
2313  * entry to ep_disconnect, the EP state will transition to
2314  * DISCONNECTING upon exit. the CM callbacks triggered by
2315  * ibt_close_rc_channel will cause EP state to become
2316  * DISCONNECTED. This function is a no-op if EP state is
2317  * DISCONNECTED.
2318  */
2319 /* ARGSUSED */
2320 static int
daplka_ep_disconnect(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)2321 daplka_ep_disconnect(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2322 	cred_t *cred, int *rvalp)
2323 {
2324 	daplka_ep_resource_t	*ep_rp = NULL;
2325 	dapl_ep_disconnect_t	args;
2326 	ibt_status_t		status;
2327 	uint32_t		old_state, new_state;
2328 	int			retval = 0;
2329 
2330 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_disconnect_t),
2331 	    mode);
2332 	if (retval != 0) {
2333 		DERR("ep_disconnect: copyin error %d\n", retval);
2334 		return (EFAULT);
2335 	}
2336 	ep_rp = (daplka_ep_resource_t *)
2337 	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epd_hkey);
2338 	if (ep_rp == NULL) {
2339 		DERR("ep_disconnect: cannot find ep resource\n");
2340 		return (EINVAL);
2341 	}
2342 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2343 
2344 	new_state = old_state = daplka_ep_get_state(ep_rp);
2345 	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
2346 	    old_state != DAPLKA_EP_STATE_CONNECTING &&
2347 	    old_state != DAPLKA_EP_STATE_ACCEPTING &&
2348 	    old_state != DAPLKA_EP_STATE_DISCONNECTED &&
2349 	    old_state != DAPLKA_EP_STATE_DISCONNECTING &&
2350 	    old_state != DAPLKA_EP_STATE_CLOSED) {
2351 		DERR("ep_disconnect: invalid state %d\n", old_state);
2352 		retval = EINVAL;
2353 		goto cleanup;
2354 	}
2355 
2356 	if ((old_state == DAPLKA_EP_STATE_DISCONNECTED) ||
2357 	    (old_state == DAPLKA_EP_STATE_DISCONNECTING)) {
2358 		D2("ep_disconnect: ep already disconnected\n");
2359 		retval = 0;
2360 		/* we leave the state as DISCONNECTED */
2361 		goto cleanup;
2362 	}
2363 	if (old_state == DAPLKA_EP_STATE_CONNECTING ||
2364 	    old_state == DAPLKA_EP_STATE_ACCEPTING) {
2365 		D2("ep_disconnect: aborting, old_state = %d\n", old_state);
2366 	}
2367 
2368 	/*
2369 	 * according to the udapl spec, ep_disconnect should
2370 	 * flush the channel if the channel is not CONNECTED.
2371 	 */
2372 	if (old_state == DAPLKA_EP_STATE_CLOSED) {
2373 		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
2374 		if (status != IBT_SUCCESS) {
2375 			DERR("ep_disconnect: ibt_flush_channel failed %d\n",
2376 			    status);
2377 			*rvalp = (int)status;
2378 		}
2379 		retval = 0;
2380 		/* we leave the state as CLOSED */
2381 		goto cleanup;
2382 	}
2383 
2384 	new_state = DAPLKA_EP_STATE_DISCONNECTING;
2385 	daplka_ep_set_state(ep_rp, old_state, new_state);
2386 	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_NONBLOCKING,
2387 	    NULL, 0, NULL, NULL, NULL);
2388 
2389 	if (status == IBT_SUCCESS) {
2390 		DAPLKA_RS_UNREF(ep_rp);
2391 		return (retval);
2392 	} else {
2393 		DERR("ep_disconnect: ibt_close_rc_channel returned %d\n",
2394 		    status);
2395 		*rvalp = (int)status;
2396 		retval = 0;
2397 		new_state = old_state;
2398 	}
2399 
2400 cleanup:;
2401 	daplka_ep_set_state(ep_rp, old_state, new_state);
2402 	DAPLKA_RS_UNREF(ep_rp);
2403 	return (retval);
2404 }
2405 
2406 /*
2407  * this function resets the EP to a usable state (ie. from
2408  * DISCONNECTED to CLOSED). this function is best implemented using
2409  * the ibt_recycle_channel interface. until that is available, we will
2410  * instead clone and tear down the existing channel and replace the
2411  * existing channel with the cloned one.
2412  */
2413 /* ARGSUSED */
2414 static int
daplka_ep_reinit(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)2415 daplka_ep_reinit(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2416 	cred_t *cred, int *rvalp)
2417 {
2418 	daplka_ep_resource_t		*ep_rp = NULL;
2419 	dapl_ep_reinit_t		args;
2420 	ibt_status_t			status;
2421 	uint32_t			old_state, new_state;
2422 	int				retval = 0;
2423 
2424 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ep_reinit_t),
2425 	    mode);
2426 	if (retval != 0) {
2427 		DERR("reinit: copyin error %d\n", retval);
2428 		return (EFAULT);
2429 	}
2430 	ep_rp = (daplka_ep_resource_t *)
2431 	    daplka_hash_lookup(&ia_rp->ia_ep_htbl, args.epri_hkey);
2432 	if (ep_rp == NULL) {
2433 		DERR("reinit: cannot find ep resource\n");
2434 		return (EINVAL);
2435 	}
2436 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
2437 	new_state = old_state = daplka_ep_get_state(ep_rp);
2438 	if ((old_state != DAPLKA_EP_STATE_CLOSED) &&
2439 	    (old_state != DAPLKA_EP_STATE_DISCONNECTED)) {
2440 		DERR("reinit: invalid state %d\n", old_state);
2441 		retval = EINVAL;
2442 		goto cleanup;
2443 	}
2444 
2445 	status = ibt_recycle_rc(ep_rp->ep_chan_hdl,
2446 	    IBT_CEP_RDMA_RD|IBT_CEP_RDMA_WR,
2447 	    ia_rp->ia_port_num, NULL, NULL);
2448 	if (status != IBT_SUCCESS) {
2449 		DERR("reinit: unable to clone channel\n");
2450 		*rvalp = (int)status;
2451 		retval = 0;
2452 		goto cleanup;
2453 	}
2454 	new_state = DAPLKA_EP_STATE_CLOSED;
2455 
2456 cleanup:;
2457 	daplka_ep_set_state(ep_rp, old_state, new_state);
2458 	DAPLKA_RS_UNREF(ep_rp);
2459 	return (retval);
2460 }
2461 
2462 /*
2463  * destroys a EP resource.
2464  * called when refcnt drops to zero.
2465  */
2466 static int
daplka_ep_destroy(daplka_resource_t * gen_rp)2467 daplka_ep_destroy(daplka_resource_t *gen_rp)
2468 {
2469 	daplka_ep_resource_t	*ep_rp = (daplka_ep_resource_t *)gen_rp;
2470 	ibt_status_t		status;
2471 
2472 	ASSERT(DAPLKA_RS_REFCNT(ep_rp) == 0);
2473 	ASSERT(ep_rp->ep_state == DAPLKA_EP_STATE_FREED);
2474 
2475 	/*
2476 	 * by the time we get here, we can be sure that
2477 	 * there is no outstanding timer.
2478 	 */
2479 	ASSERT(ep_rp->ep_timer_hkey == 0);
2480 
2481 	D3("ep_destroy: entering, ep_rp 0x%p, rnum %d\n",
2482 	    ep_rp, DAPLKA_RS_RNUM(ep_rp));
2483 	/*
2484 	 * free rc channel
2485 	 */
2486 	if (ep_rp->ep_chan_hdl != NULL) {
2487 		mutex_enter(&daplka_dev->daplka_mutex);
2488 		ibt_set_chan_private(ep_rp->ep_chan_hdl, NULL);
2489 		mutex_exit(&daplka_dev->daplka_mutex);
2490 		status = daplka_ibt_free_channel(ep_rp, ep_rp->ep_chan_hdl);
2491 		if (status != IBT_SUCCESS) {
2492 			DERR("ep_free: ibt_free_channel returned %d\n",
2493 			    status);
2494 		}
2495 		ep_rp->ep_chan_hdl = NULL;
2496 		D3("ep_destroy: qp freed, rnum %d\n", DAPLKA_RS_RNUM(ep_rp));
2497 	}
2498 	/*
2499 	 * release all references
2500 	 */
2501 	if (ep_rp->ep_snd_evd != NULL) {
2502 		DAPLKA_RS_UNREF(ep_rp->ep_snd_evd);
2503 		ep_rp->ep_snd_evd = NULL;
2504 	}
2505 	if (ep_rp->ep_rcv_evd != NULL) {
2506 		DAPLKA_RS_UNREF(ep_rp->ep_rcv_evd);
2507 		ep_rp->ep_rcv_evd = NULL;
2508 	}
2509 	if (ep_rp->ep_conn_evd != NULL) {
2510 		DAPLKA_RS_UNREF(ep_rp->ep_conn_evd);
2511 		ep_rp->ep_conn_evd = NULL;
2512 	}
2513 	if (ep_rp->ep_srq_res != NULL) {
2514 		DAPLKA_RS_UNREF(ep_rp->ep_srq_res);
2515 		ep_rp->ep_srq_res = NULL;
2516 	}
2517 	if (ep_rp->ep_pd_res != NULL) {
2518 		DAPLKA_RS_UNREF(ep_rp->ep_pd_res);
2519 		ep_rp->ep_pd_res = NULL;
2520 	}
2521 	cv_destroy(&ep_rp->ep_cv);
2522 	mutex_destroy(&ep_rp->ep_lock);
2523 
2524 	DAPLKA_RS_FINI(ep_rp);
2525 	kmem_free(ep_rp, sizeof (daplka_ep_resource_t));
2526 	D3("ep_destroy: exiting, ep_rp 0x%p\n", ep_rp);
2527 	return (0);
2528 }
2529 
2530 /*
2531  * this function is called by daplka_hash_destroy for
2532  * freeing EP resource objects
2533  */
2534 static void
daplka_hash_ep_free(void * obj)2535 daplka_hash_ep_free(void *obj)
2536 {
2537 	daplka_ep_resource_t	*ep_rp = (daplka_ep_resource_t *)obj;
2538 	ibt_status_t		status;
2539 	uint32_t		old_state, new_state;
2540 	int			retval;
2541 
2542 	old_state = daplka_ep_get_state(ep_rp);
2543 	retval = daplka_cancel_timer(ep_rp);
2544 	new_state = DAPLKA_EP_STATE_FREED;
2545 	daplka_ep_set_state(ep_rp, old_state, new_state);
2546 
2547 	if (retval != 0) {
2548 		D2("hash_ep_free: ep_rp 0x%p "
2549 		    "timer is still being processed\n", ep_rp);
2550 		mutex_enter(&ep_rp->ep_lock);
2551 		if (ep_rp->ep_timer_hkey != 0) {
2552 			D2("hash_ep_free: ep_rp 0x%p "
2553 			    "waiting for timer_hkey to be 0\n", ep_rp);
2554 			cv_wait(&ep_rp->ep_cv, &ep_rp->ep_lock);
2555 		}
2556 		mutex_exit(&ep_rp->ep_lock);
2557 	}
2558 
2559 	/* call ibt_close_rc_channel regardless of what state we are in */
2560 	status = ibt_close_rc_channel(ep_rp->ep_chan_hdl, IBT_BLOCKING,
2561 	    NULL, 0, NULL, NULL, NULL);
2562 	if (status != IBT_SUCCESS) {
2563 		if (old_state == DAPLKA_EP_STATE_CONNECTED ||
2564 		    old_state == DAPLKA_EP_STATE_CONNECTING ||
2565 		    old_state == DAPLKA_EP_STATE_ACCEPTING) {
2566 			DERR("hash_ep_free: ep_rp 0x%p state %d "
2567 			    "unexpected error %d from close_rc_channel\n",
2568 			    ep_rp, old_state, status);
2569 		}
2570 		D2("hash_ep_free: close_rc_channel, status %d\n", status);
2571 	}
2572 
2573 	DAPLKA_RS_UNREF(ep_rp);
2574 }
2575 
2576 /*
2577  * creates a EVD resource.
2578  * a EVD is used by the client to wait for events from one
2579  * or more sources.
2580  */
2581 /* ARGSUSED */
2582 static int
daplka_evd_create(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)2583 daplka_evd_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2584 	cred_t *cred, int *rvalp)
2585 {
2586 	daplka_evd_resource_t		*evd_rp = NULL;
2587 	daplka_async_evd_hkey_t		*async_evd;
2588 	ibt_hca_attr_t			*hca_attrp;
2589 	ibt_cq_attr_t			cq_attr;
2590 	dapl_evd_create_t		args;
2591 	uint64_t			evd_hkey = 0;
2592 	boolean_t			inserted = B_FALSE;
2593 	int				retval = 0;
2594 	ibt_status_t			status;
2595 
2596 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_create_t),
2597 	    mode);
2598 	if (retval != 0) {
2599 		DERR("evd_create: copyin error %d", retval);
2600 		return (EFAULT);
2601 	}
2602 	if ((args.evd_flags &
2603 	    ~(DAT_EVD_DEFAULT_FLAG | DAT_EVD_SOFTWARE_FLAG)) != 0) {
2604 		DERR("evd_create: invalid flags 0x%x\n", args.evd_flags);
2605 		return (EINVAL);
2606 	}
2607 
2608 	evd_rp = kmem_zalloc(sizeof (daplka_evd_resource_t), daplka_km_flags);
2609 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
2610 	DAPLKA_RS_INIT(evd_rp, DAPL_TYPE_EVD,
2611 	    DAPLKA_RS_RNUM(ia_rp), daplka_evd_destroy);
2612 
2613 	mutex_init(&evd_rp->evd_lock, NULL, MUTEX_DRIVER, NULL);
2614 	cv_init(&evd_rp->evd_cv, NULL, CV_DRIVER, NULL);
2615 	evd_rp->evd_hca = ia_rp->ia_hca;
2616 	evd_rp->evd_flags = args.evd_flags;
2617 	evd_rp->evd_hca_hdl = ia_rp->ia_hca_hdl;
2618 	evd_rp->evd_cookie = args.evd_cookie;
2619 	evd_rp->evd_cno_res = NULL;
2620 	evd_rp->evd_cr_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2621 	evd_rp->evd_conn_events.eel_event_type = DAPLKA_EVD_CM_EVENTS;
2622 	evd_rp->evd_async_events.eel_event_type = DAPLKA_EVD_ASYNC_EVENTS;
2623 
2624 	/*
2625 	 * if the client specified a non-zero cno_hkey, we
2626 	 * lookup the cno and save the reference for later use.
2627 	 */
2628 	if (args.evd_cno_hkey > 0) {
2629 		daplka_cno_resource_t *cno_rp;
2630 
2631 		cno_rp = (daplka_cno_resource_t *)
2632 		    daplka_hash_lookup(&ia_rp->ia_cno_htbl,
2633 		    args.evd_cno_hkey);
2634 		if (cno_rp == NULL) {
2635 			DERR("evd_create: cannot find cno resource\n");
2636 			goto cleanup;
2637 		}
2638 		ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
2639 		evd_rp->evd_cno_res = cno_rp;
2640 	}
2641 	hca_attrp = &ia_rp->ia_hca->hca_attr;
2642 	if ((evd_rp->evd_flags &
2643 	    (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) != 0) {
2644 		if (args.evd_cq_size > hca_attrp->hca_max_cq_sz) {
2645 			DERR("evd_create: invalid cq size %d",
2646 			    args.evd_cq_size);
2647 			retval = EINVAL;
2648 			goto cleanup;
2649 		}
2650 		cq_attr.cq_size = args.evd_cq_size;
2651 		cq_attr.cq_sched = NULL;
2652 		cq_attr.cq_flags = IBT_CQ_USER_MAP;
2653 
2654 		status = daplka_ibt_alloc_cq(evd_rp, evd_rp->evd_hca_hdl,
2655 		    &cq_attr, &evd_rp->evd_cq_hdl, &evd_rp->evd_cq_real_size);
2656 
2657 		if (status != IBT_SUCCESS) {
2658 			DERR("evd_create: ibt_alloc_cq returned %d", status);
2659 			*rvalp = (int)status;
2660 			retval = 0;
2661 			goto cleanup;
2662 		}
2663 
2664 		/*
2665 		 * store evd ptr with cq_hdl
2666 		 * mutex is only needed for race of "destroy" and "async"
2667 		 */
2668 		mutex_enter(&daplka_dev->daplka_mutex);
2669 		ibt_set_cq_private(evd_rp->evd_cq_hdl, (void *)evd_rp);
2670 		mutex_exit(&daplka_dev->daplka_mutex);
2671 
2672 		/* Get HCA-specific data_out info */
2673 		status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2674 		    IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2675 		    &args.evd_cq_data_out, sizeof (args.evd_cq_data_out));
2676 
2677 		if (status != IBT_SUCCESS) {
2678 			DERR("evd_create: ibt_ci_data_out error(%d)", status);
2679 			*rvalp = (int)status;
2680 			retval = 0;
2681 			goto cleanup;
2682 		}
2683 
2684 		args.evd_cq_real_size = evd_rp->evd_cq_real_size;
2685 
2686 		ibt_set_cq_handler(evd_rp->evd_cq_hdl, daplka_cq_handler,
2687 		    (void *)evd_rp);
2688 	}
2689 
2690 	retval = daplka_hash_insert(&ia_rp->ia_evd_htbl,
2691 	    &evd_hkey, (void *)evd_rp);
2692 	if (retval != 0) {
2693 		DERR("evd_ceate: cannot insert evd %d\n", retval);
2694 		goto cleanup;
2695 	}
2696 	inserted = B_TRUE;
2697 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*evd_rp))
2698 
2699 	/*
2700 	 * If this evd handles async events need to add to the IA resource
2701 	 * async evd list
2702 	 */
2703 	if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
2704 		async_evd = kmem_zalloc(sizeof (daplka_async_evd_hkey_t),
2705 		    daplka_km_flags);
2706 		/* add the evd to the head of the list */
2707 		mutex_enter(&ia_rp->ia_lock);
2708 		async_evd->aeh_evd_hkey = evd_hkey;
2709 		async_evd->aeh_next = ia_rp->ia_async_evd_hkeys;
2710 		ia_rp->ia_async_evd_hkeys = async_evd;
2711 		mutex_exit(&ia_rp->ia_lock);
2712 	}
2713 
2714 	args.evd_hkey = evd_hkey;
2715 	retval = copyout(&args, (void *)arg, sizeof (dapl_evd_create_t));
2716 	if (retval != 0) {
2717 		DERR("evd_create: copyout error %d\n", retval);
2718 		retval = EFAULT;
2719 		goto cleanup;
2720 	}
2721 	return (0);
2722 
2723 cleanup:;
2724 	if (inserted) {
2725 		daplka_evd_resource_t *free_rp = NULL;
2726 
2727 		(void) daplka_hash_remove(&ia_rp->ia_evd_htbl, evd_hkey,
2728 		    (void **)&free_rp);
2729 		if (free_rp != evd_rp) {
2730 			DERR("evd_create: cannot remove evd\n");
2731 			/*
2732 			 * we can only get here if another thread
2733 			 * has completed the cleanup in evd_free
2734 			 */
2735 			return (retval);
2736 		}
2737 	}
2738 	DAPLKA_RS_UNREF(evd_rp);
2739 	return (retval);
2740 }
2741 
2742 /*
2743  * resizes CQ and returns new mapping info to library.
2744  */
2745 /* ARGSUSED */
2746 static int
daplka_cq_resize(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)2747 daplka_cq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
2748 	cred_t *cred, int *rvalp)
2749 {
2750 	daplka_evd_resource_t		*evd_rp = NULL;
2751 	ibt_hca_attr_t			*hca_attrp;
2752 	dapl_cq_resize_t		args;
2753 	ibt_status_t			status;
2754 	int				retval = 0;
2755 
2756 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cq_resize_t),
2757 	    mode);
2758 	if (retval != 0) {
2759 		DERR("cq_resize: copyin error %d\n", retval);
2760 		return (EFAULT);
2761 	}
2762 
2763 	/* get evd resource */
2764 	evd_rp = (daplka_evd_resource_t *)
2765 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.cqr_evd_hkey);
2766 	if (evd_rp == NULL) {
2767 		DERR("cq_resize: cannot find evd resource\n");
2768 		return (EINVAL);
2769 	}
2770 	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
2771 
2772 	hca_attrp = &ia_rp->ia_hca->hca_attr;
2773 	if (args.cqr_cq_new_size > hca_attrp->hca_max_cq_sz) {
2774 		DERR("cq_resize: invalid cq size %d", args.cqr_cq_new_size);
2775 		retval = EINVAL;
2776 		goto cleanup;
2777 	}
2778 	/*
2779 	 * If ibt_resize_cq fails that it is primarily due to resource
2780 	 * shortage. Per IB spec resize will never loose events and
2781 	 * a resize error leaves the CQ intact. Therefore even if the
2782 	 * resize request fails we proceed and get the mapping data
2783 	 * from the CQ so that the library can mmap it.
2784 	 */
2785 	status = ibt_resize_cq(evd_rp->evd_cq_hdl, args.cqr_cq_new_size,
2786 	    &args.cqr_cq_real_size);
2787 	if (status != IBT_SUCCESS) {
2788 		/* we return the size of the old CQ if resize fails */
2789 		args.cqr_cq_real_size = evd_rp->evd_cq_real_size;
2790 		ASSERT(status != IBT_CQ_HDL_INVALID);
2791 		DERR("cq_resize: ibt_resize_cq failed:%d\n", status);
2792 	} else {
2793 		mutex_enter(&evd_rp->evd_lock);
2794 		evd_rp->evd_cq_real_size = args.cqr_cq_real_size;
2795 		mutex_exit(&evd_rp->evd_lock);
2796 	}
2797 
2798 	D2("cq_resize(%d): done new_sz(%u) real_sz(%u)\n",
2799 	    DAPLKA_RS_RNUM(evd_rp),
2800 	    args.cqr_cq_new_size, args.cqr_cq_real_size);
2801 
2802 	/* Get HCA-specific data_out info */
2803 	status = ibt_ci_data_out(evd_rp->evd_hca_hdl,
2804 	    IBT_CI_NO_FLAGS, IBT_HDL_CQ, (void *)evd_rp->evd_cq_hdl,
2805 	    &args.cqr_cq_data_out, sizeof (args.cqr_cq_data_out));
2806 	if (status != IBT_SUCCESS) {
2807 		DERR("cq_resize: ibt_ci_data_out error(%d)\n", status);
2808 		/* return ibt_ci_data_out status */
2809 		*rvalp = (int)status;
2810 		retval = 0;
2811 		goto cleanup;
2812 	}
2813 
2814 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cq_resize_t),
2815 	    mode);
2816 	if (retval != 0) {
2817 		DERR("cq_resize: copyout error %d\n", retval);
2818 		retval = EFAULT;
2819 		goto cleanup;
2820 	}
2821 
2822 cleanup:;
2823 	if (evd_rp != NULL) {
2824 		DAPLKA_RS_UNREF(evd_rp);
2825 	}
2826 	return (retval);
2827 }
2828 
2829 /*
2830  * Routine to copyin the event poll message so that 32 bit libraries
2831  * can be safely supported
2832  */
2833 int
daplka_event_poll_copyin(intptr_t inarg,dapl_event_poll_t * outarg,int mode)2834 daplka_event_poll_copyin(intptr_t inarg, dapl_event_poll_t *outarg, int mode)
2835 {
2836 	int	retval;
2837 
2838 #ifdef _MULTI_DATAMODEL
2839 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2840 		dapl_event_poll32_t	args32;
2841 
2842 		retval = ddi_copyin((void *)inarg, &args32,
2843 		    sizeof (dapl_event_poll32_t), mode);
2844 		if (retval != 0) {
2845 			DERR("event_poll_copyin: 32bit error %d\n", retval);
2846 			return (EFAULT);
2847 		}
2848 
2849 		outarg->evp_evd_hkey = args32.evp_evd_hkey;
2850 		outarg->evp_threshold = args32.evp_threshold;
2851 		outarg->evp_timeout = args32.evp_timeout;
2852 		outarg->evp_ep = (dapl_ib_event_t *)(uintptr_t)args32.evp_ep;
2853 		outarg->evp_num_ev = args32.evp_num_ev;
2854 		outarg->evp_num_polled = args32.evp_num_polled;
2855 		return (0);
2856 	}
2857 #endif
2858 	retval = ddi_copyin((void *)inarg, outarg, sizeof (dapl_event_poll_t),
2859 	    mode);
2860 	if (retval != 0) {
2861 		DERR("event_poll: copyin error %d\n", retval);
2862 		return (EFAULT);
2863 	}
2864 
2865 	return (0);
2866 }
2867 
2868 /*
2869  * Routine to copyout the event poll message so that 32 bit libraries
2870  * can be safely supported
2871  */
2872 int
daplka_event_poll_copyout(dapl_event_poll_t * inarg,intptr_t outarg,int mode)2873 daplka_event_poll_copyout(dapl_event_poll_t *inarg, intptr_t outarg, int mode)
2874 {
2875 	int	retval;
2876 
2877 #ifdef _MULTI_DATAMODEL
2878 	if ((mode & DATAMODEL_MASK) == DATAMODEL_ILP32) {
2879 		dapl_event_poll32_t	args32;
2880 
2881 		args32.evp_evd_hkey = inarg->evp_evd_hkey;
2882 		args32.evp_threshold = inarg->evp_threshold;
2883 		args32.evp_timeout = inarg->evp_timeout;
2884 		args32.evp_ep = (caddr32_t)(uintptr_t)inarg->evp_ep;
2885 		args32.evp_num_ev = inarg->evp_num_ev;
2886 		args32.evp_num_polled = inarg->evp_num_polled;
2887 
2888 		retval = ddi_copyout((void *)&args32, (void *)outarg,
2889 		    sizeof (dapl_event_poll32_t), mode);
2890 		if (retval != 0) {
2891 			DERR("event_poll_copyout: 32bit error %d\n", retval);
2892 			return (EFAULT);
2893 		}
2894 		return (0);
2895 	}
2896 #endif
2897 	retval = ddi_copyout((void *)inarg, (void *)outarg,
2898 	    sizeof (dapl_event_poll_t), mode);
2899 	if (retval != 0) {
2900 		DERR("event_poll_copyout: error %d\n", retval);
2901 		return (EFAULT);
2902 	}
2903 
2904 	return (0);
2905 }
2906 
2907 /*
2908  * fucntion to handle CM REQ RCV private data from Solaris or third parties
2909  */
2910 /* ARGSUSED */
2911 static void
daplka_crevent_privdata_post(daplka_ia_resource_t * ia_rp,dapl_ib_event_t * evd_rp,daplka_evd_event_t * cr_ev)2912 daplka_crevent_privdata_post(daplka_ia_resource_t *ia_rp,
2913 	dapl_ib_event_t *evd_rp, daplka_evd_event_t *cr_ev)
2914 {
2915 	DAPL_PRIVATE	*dp;
2916 	ib_gid_t	*lgid;
2917 	ibt_ar_t	ar_query_s;
2918 	ibt_ar_t	ar_result_s;
2919 	DAPL_HELLO_MSG	*hip;
2920 	uint32_t	ipaddr_ord;
2921 	ibt_priv_data_len_t clen;
2922 	ibt_priv_data_len_t olen;
2923 	ibt_status_t	status;
2924 	uint16_t	cksum;
2925 
2926 	/*
2927 	 * get private data and len
2928 	 */
2929 	dp = (DAPL_PRIVATE *)cr_ev->ee_cmev.ec_cm_ev_priv_data;
2930 	clen = cr_ev->ee_cmev.ec_cm_ev_priv_data_len;
2931 #if defined(DAPLKA_DEBUG_FORCE_ATS)
2932 	/* skip the DAPL_PRIVATE chekcsum check */
2933 #else
2934 	/* for remote connects */
2935 	/* look up hello message in the CM private data area */
2936 	if (clen >= sizeof (DAPL_PRIVATE) &&
2937 	    (dp->hello_msg.hi_vers == DAPL_HELLO_MSG_VERS)) {
2938 		cksum = ntohs(dp->hello_msg.hi_checksum);
2939 		dp->hello_msg.hi_checksum = 0;
2940 		if (daplka_hellomsg_cksum(dp) == cksum) {
2941 			D2("daplka_crevent_privdata_post: Solaris msg\n");
2942 			evd_rp->ibe_ce.ibce_priv_data_size = clen;
2943 			dp->hello_msg.hi_checksum = DAPL_CHECKSUM;
2944 			dp->hello_msg.hi_port = ntohs(dp->hello_msg.hi_port);
2945 			bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2946 			kmem_free(dp, clen);
2947 			return;
2948 		}
2949 	}
2950 #endif /* DAPLKA_DEBUG_FORCE_ATS */
2951 
2952 	D2("daplka_crevent_privdata_post: 3rd party msg\n");
2953 	/* transpose CM private data into hello message */
2954 	if (clen) {
2955 		olen = clen;
2956 		if (clen > DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE) {
2957 			clen = DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE;
2958 		}
2959 		bcopy(dp, evd_rp->ibe_ce.ibce_priv_data_ptr, clen);
2960 		kmem_free(dp, olen);
2961 	} else {
2962 		bzero(evd_rp->ibe_ce.ibce_priv_data_ptr,
2963 		    DAPL_CONSUMER_MAX_PRIVATE_DATA_SIZE);
2964 	}
2965 	evd_rp->ibe_ce.ibce_priv_data_size = sizeof (DAPL_PRIVATE);
2966 	dp = (DAPL_PRIVATE *)evd_rp->ibe_ce.ibce_priv_data_ptr;
2967 	/*
2968 	 * fill in hello message
2969 	 */
2970 	hip = &dp->hello_msg;
2971 	hip->hi_checksum = DAPL_CHECKSUM;
2972 	hip->hi_clen = clen;
2973 	hip->hi_mid = 0;
2974 	hip->hi_vers = DAPL_HELLO_MSG_VERS;
2975 	hip->hi_port = 0;
2976 
2977 	/* assign sgid and dgid */
2978 	lgid = &ia_rp->ia_hca_sgid;
2979 	ar_query_s.ar_gid.gid_prefix =
2980 	    cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix;
2981 	ar_query_s.ar_gid.gid_guid =
2982 	    cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid;
2983 	ar_query_s.ar_pkey = ia_rp->ia_port_pkey;
2984 	bzero(ar_query_s.ar_data, DAPL_ATS_NBYTES);
2985 
2986 	/* reverse ip address lookup through ATS */
2987 	status = ibt_query_ar(lgid, &ar_query_s, &ar_result_s);
2988 	if (status == IBT_SUCCESS) {
2989 		bcopy(ar_result_s.ar_data, hip->hi_saaddr, DAPL_ATS_NBYTES);
2990 		/* determine the address families */
2991 		ipaddr_ord = hip->hi_v4pad[0] | hip->hi_v4pad[1] |
2992 		    hip->hi_v4pad[2];
2993 		if (ipaddr_ord == 0) {
2994 			hip->hi_ipv = AF_INET;
2995 		} else {
2996 			hip->hi_ipv = AF_INET6;
2997 		}
2998 
2999 #define	UL(b) ar_result_s.ar_data[(b)]
3000 		D3("daplka_privdata_post: family=%d :SA[8] %d.%d.%d.%d\n",
3001 		    hip->hi_ipv, UL(8), UL(9), UL(10), UL(11));
3002 		D3("daplka_privdata_post: SA[12] %d.%d.%d.%d\n",
3003 		    UL(12), UL(13), UL(14), UL(15));
3004 	} else {
3005 		/* non-conformed third parties */
3006 		hip->hi_ipv = AF_UNSPEC;
3007 		bzero(hip->hi_saaddr, DAPL_ATS_NBYTES);
3008 	}
3009 }
3010 
3011 /*
3012  * this function is called by evd_wait and evd_dequeue to wait for
3013  * connection events and CQ notifications. typically this function
3014  * is called when the userland CQ is empty and the client has
3015  * specified a non-zero timeout to evd_wait. if the client is
3016  * interested in CQ events, the CQ must be armed in userland prior
3017  * to calling this function.
3018  */
3019 /* ARGSUSED */
3020 static int
daplka_event_poll(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3021 daplka_event_poll(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3022 	cred_t *cred, int *rvalp)
3023 {
3024 	daplka_evd_resource_t	*evd_rp = NULL;
3025 	dapl_event_poll_t	args;
3026 	daplka_evd_event_t	*head;
3027 	dapl_ib_event_t		evp_arr[NUM_EVENTS_PER_POLL];
3028 	dapl_ib_event_t		*evp;
3029 	dapl_ib_event_t		*evp_start;
3030 	size_t			evp_size;
3031 	int			threshold;
3032 	clock_t			timeout;
3033 	uint32_t		max_events;
3034 	uint32_t		num_events = 0;
3035 	void			*pd;
3036 	ibt_priv_data_len_t	n;
3037 	int			retval = 0;
3038 	int			rc;
3039 
3040 	retval = daplka_event_poll_copyin(arg, &args, mode);
3041 	if (retval != 0) {
3042 		return (EFAULT);
3043 	}
3044 
3045 	if ((args.evp_num_ev > 0) && (args.evp_ep == NULL)) {
3046 		DERR("event_poll: evp_ep cannot be NULL if num_wc=%d",
3047 		    args.evp_num_ev);
3048 		return (EINVAL);
3049 	}
3050 	/*
3051 	 * Note: dequeue requests have a threshold = 0, timeout = 0
3052 	 */
3053 	threshold = args.evp_threshold;
3054 
3055 	max_events = args.evp_num_ev;
3056 	/* ensure library is passing sensible values */
3057 	if (max_events < threshold) {
3058 		DERR("event_poll: max_events(%d) < threshold(%d)\n",
3059 		    max_events, threshold);
3060 		return (EINVAL);
3061 	}
3062 	/* Do a sanity check to avoid excessive memory allocation */
3063 	if (max_events > DAPL_EVD_MAX_EVENTS) {
3064 		DERR("event_poll: max_events(%d) > %d",
3065 		    max_events, DAPL_EVD_MAX_EVENTS);
3066 		return (EINVAL);
3067 	}
3068 	D4("event_poll: threshold(%d) timeout(0x%llx) max_events(%d)\n",
3069 	    threshold, (longlong_t)args.evp_timeout, max_events);
3070 
3071 	/* get evd resource */
3072 	evd_rp = (daplka_evd_resource_t *)
3073 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evp_evd_hkey);
3074 	if (evd_rp == NULL) {
3075 		DERR("event_poll: cannot find evd resource\n");
3076 		return (EINVAL);
3077 	}
3078 	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3079 
3080 	/*
3081 	 * Use event array on the stack if possible
3082 	 */
3083 	if (max_events <= NUM_EVENTS_PER_POLL) {
3084 		evp_start = evp = &evp_arr[0];
3085 	} else {
3086 		evp_size = max_events * sizeof (dapl_ib_event_t);
3087 		evp_start = evp = kmem_zalloc(evp_size, daplka_km_flags);
3088 		if (evp == NULL) {
3089 			DERR("event_poll: kmem_zalloc failed, evp_size %d",
3090 			    evp_size);
3091 			retval = ENOMEM;
3092 			goto cleanup;
3093 		}
3094 	}
3095 
3096 	/*
3097 	 * The Event poll algorithm is as follows -
3098 	 * The library passes a buffer big enough to hold "max_events"
3099 	 * events. max_events is >= threshold. If at any stage we get
3100 	 * max_events no. of events we bail. The events are polled in
3101 	 * the following order -
3102 	 * 1) Check for CR events in the evd_cr_events list
3103 	 * 2) Check for Connection events in the evd_connection_events list
3104 	 *
3105 	 * If after the above 2 steps we don't have enough(>= threshold) events
3106 	 * we block for CQ notification and sleep. Upon being woken up we start
3107 	 * at step 1 again.
3108 	 */
3109 
3110 	/*
3111 	 * Note: this could be 0 or INFINITE or anyother value in microsec
3112 	 */
3113 	if (args.evp_timeout > 0) {
3114 		if (args.evp_timeout >= LONG_MAX) {
3115 			timeout = LONG_MAX;
3116 		} else {
3117 			clock_t	curr_time = ddi_get_lbolt();
3118 
3119 			timeout = curr_time +
3120 			    drv_usectohz((clock_t)args.evp_timeout);
3121 			/*
3122 			 * use the max value if we wrapped around
3123 			 */
3124 			if (timeout <= curr_time) {
3125 				timeout = LONG_MAX;
3126 			}
3127 		}
3128 	} else {
3129 		timeout = 0;
3130 	}
3131 
3132 	mutex_enter(&evd_rp->evd_lock);
3133 	for (;;) {
3134 		/*
3135 		 * If this evd is waiting for CM events check that now.
3136 		 */
3137 		if ((evd_rp->evd_flags & DAT_EVD_CR_FLAG) &&
3138 		    (evd_rp->evd_cr_events.eel_num_elements > 0)) {
3139 			/* dequeue events from evd_cr_events list */
3140 			while (head = daplka_evd_event_dequeue(
3141 			    &evd_rp->evd_cr_events)) {
3142 				/*
3143 				 * populate the evp array
3144 				 */
3145 				evp[num_events].ibe_ev_family = DAPL_CR_EVENTS;
3146 				evp[num_events].ibe_ce.ibce_event =
3147 				    head->ee_cmev.ec_cm_ev_type;
3148 				evp[num_events].ibe_ce.ibce_cookie =
3149 				    (uint64_t)head->ee_cmev.ec_cm_cookie;
3150 				evp[num_events].ibe_ce.ibce_psep_cookie =
3151 				    head->ee_cmev.ec_cm_psep_cookie;
3152 				daplka_crevent_privdata_post(ia_rp,
3153 				    &evp[num_events], head);
3154 				kmem_free(head, sizeof (daplka_evd_event_t));
3155 
3156 				if (++num_events == max_events) {
3157 					mutex_exit(&evd_rp->evd_lock);
3158 					goto maxevent_reached;
3159 				}
3160 			}
3161 		}
3162 
3163 		if ((evd_rp->evd_flags & DAT_EVD_CONNECTION_FLAG) &&
3164 		    (evd_rp->evd_conn_events.eel_num_elements > 0)) {
3165 			/* dequeue events from evd_connection_events list */
3166 			while ((head = daplka_evd_event_dequeue
3167 			    (&evd_rp->evd_conn_events))) {
3168 				/*
3169 				 * populate the evp array -
3170 				 *
3171 				 */
3172 				if (head->ee_cmev.ec_cm_is_passive) {
3173 					evp[num_events].ibe_ev_family =
3174 					    DAPL_PASSIVE_CONNECTION_EVENTS;
3175 				} else {
3176 					evp[num_events].ibe_ev_family =
3177 					    DAPL_ACTIVE_CONNECTION_EVENTS;
3178 				}
3179 				evp[num_events].ibe_ce.ibce_event =
3180 				    head->ee_cmev.ec_cm_ev_type;
3181 				evp[num_events].ibe_ce.ibce_cookie =
3182 				    (uint64_t)head->ee_cmev.ec_cm_cookie;
3183 				evp[num_events].ibe_ce.ibce_psep_cookie =
3184 				    head->ee_cmev.ec_cm_psep_cookie;
3185 
3186 				if (head->ee_cmev.ec_cm_ev_priv_data_len > 0) {
3187 					pd = head->ee_cmev.ec_cm_ev_priv_data;
3188 					n = head->
3189 					    ee_cmev.ec_cm_ev_priv_data_len;
3190 					bcopy(pd, (void *)evp[num_events].
3191 					    ibe_ce.ibce_priv_data_ptr, n);
3192 					evp[num_events].ibe_ce.
3193 					    ibce_priv_data_size = n;
3194 					kmem_free(pd, n);
3195 				}
3196 
3197 				kmem_free(head, sizeof (daplka_evd_event_t));
3198 
3199 				if (++num_events == max_events) {
3200 					mutex_exit(&evd_rp->evd_lock);
3201 					goto maxevent_reached;
3202 				}
3203 			}
3204 		}
3205 
3206 		if ((evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) &&
3207 		    (evd_rp->evd_async_events.eel_num_elements > 0)) {
3208 			/* dequeue events from evd_async_events list */
3209 			while (head = daplka_evd_event_dequeue(
3210 			    &evd_rp->evd_async_events)) {
3211 				/*
3212 				 * populate the evp array
3213 				 */
3214 				evp[num_events].ibe_ev_family =
3215 				    DAPL_ASYNC_EVENTS;
3216 				evp[num_events].ibe_async.ibae_type =
3217 				    head->ee_aev.ibae_type;
3218 				evp[num_events].ibe_async.ibae_hca_guid =
3219 				    head->ee_aev.ibae_hca_guid;
3220 				evp[num_events].ibe_async.ibae_cookie =
3221 				    head->ee_aev.ibae_cookie;
3222 				evp[num_events].ibe_async.ibae_port =
3223 				    head->ee_aev.ibae_port;
3224 
3225 				kmem_free(head, sizeof (daplka_evd_event_t));
3226 
3227 				if (++num_events == max_events) {
3228 					break;
3229 				}
3230 			}
3231 		}
3232 
3233 		/*
3234 		 * We have sufficient events for this call so no need to wait
3235 		 */
3236 		if ((threshold > 0) && (num_events >= threshold)) {
3237 			mutex_exit(&evd_rp->evd_lock);
3238 			break;
3239 		}
3240 
3241 		evd_rp->evd_waiters++;
3242 		/*
3243 		 * There are no new events and a timeout was specified.
3244 		 * Note: for CQ events threshold is 0 but timeout is
3245 		 * not necessarily 0.
3246 		 */
3247 		while ((evd_rp->evd_newevents == DAPLKA_EVD_NO_EVENTS) &&
3248 		    timeout) {
3249 			retval = DAPLKA_EVD_WAIT(&evd_rp->evd_cv,
3250 			    &evd_rp->evd_lock, timeout);
3251 			if (retval == 0) {
3252 				retval = EINTR;
3253 				break;
3254 			} else if (retval == -1) {
3255 				retval = ETIME;
3256 				break;
3257 			} else {
3258 				retval = 0;
3259 				continue;
3260 			}
3261 		}
3262 		evd_rp->evd_waiters--;
3263 		if (evd_rp->evd_newevents != DAPLKA_EVD_NO_EVENTS) {
3264 			/*
3265 			 * If we got woken up by the CQ handler due to events
3266 			 * in the CQ. Need to go to userland to check for
3267 			 * CQ events. Or if we were woken up due to S/W events
3268 			 */
3269 
3270 			/* check for userland events only */
3271 			if (!(evd_rp->evd_newevents &
3272 			    ~DAPLKA_EVD_ULAND_EVENTS)) {
3273 				evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3274 				mutex_exit(&evd_rp->evd_lock);
3275 				break;
3276 			}
3277 			/*
3278 			 * Clear newevents since we are going to loopback
3279 			 * back and check for both CM and CQ events
3280 			 */
3281 			evd_rp->evd_newevents = DAPLKA_EVD_NO_EVENTS;
3282 		} else { /* error */
3283 			mutex_exit(&evd_rp->evd_lock);
3284 			break;
3285 		}
3286 	}
3287 
3288 maxevent_reached:
3289 	args.evp_num_polled = num_events;
3290 
3291 	/*
3292 	 * At this point retval might have a value that we want to return
3293 	 * back to the user. So the copyouts shouldn't tamper retval.
3294 	 */
3295 	if (args.evp_num_polled > 0) { /* copyout the events */
3296 		rc = ddi_copyout(evp, args.evp_ep, args.evp_num_polled *
3297 		    sizeof (dapl_ib_event_t), mode);
3298 		if (rc != 0) { /* XXX: we are losing events here */
3299 			DERR("event_poll: event array copyout error %d", rc);
3300 			retval = EFAULT;
3301 			goto cleanup;
3302 		}
3303 		rc = daplka_event_poll_copyout(&args, arg, mode);
3304 		if (rc != 0) {  /* XXX: we are losing events here */
3305 			DERR("event_poll: copyout error %d\n", rc);
3306 			retval = EFAULT;
3307 			goto cleanup;
3308 		}
3309 	}
3310 
3311 cleanup:;
3312 	if ((max_events > NUM_EVENTS_PER_POLL) && (evp_start != NULL)) {
3313 		kmem_free(evp_start, evp_size);
3314 	}
3315 
3316 	if (evd_rp != NULL) {
3317 		DAPLKA_RS_UNREF(evd_rp);
3318 	}
3319 	return (retval);
3320 }
3321 
3322 /* ARGSUSED */
3323 static int
daplka_event_wakeup(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3324 daplka_event_wakeup(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3325 	cred_t *cred, int *rvalp)
3326 {
3327 	dapl_event_wakeup_t	args;
3328 	daplka_evd_resource_t	*evd_rp;
3329 	int			retval;
3330 
3331 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_event_wakeup_t),
3332 	    mode);
3333 	if (retval != 0) {
3334 		DERR("event_wakeup: copyin error %d\n", retval);
3335 		return (EFAULT);
3336 	}
3337 
3338 	/* get evd resource */
3339 	evd_rp = (daplka_evd_resource_t *)
3340 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evw_hkey);
3341 	if (evd_rp == NULL) {
3342 		DERR("event_wakeup: cannot find evd resource\n");
3343 		return (EINVAL);
3344 	}
3345 	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3346 
3347 	daplka_evd_wakeup(evd_rp, NULL, NULL);
3348 
3349 	DAPLKA_RS_UNREF(evd_rp);
3350 
3351 	return (retval);
3352 }
3353 
3354 /* ARGSUSED */
3355 static int
daplka_evd_modify_cno(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3356 daplka_evd_modify_cno(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3357 	cred_t *cred, int *rvalp)
3358 {
3359 	dapl_evd_modify_cno_t	args;
3360 	daplka_evd_resource_t	*evd_rp;
3361 	daplka_cno_resource_t	*cno_rp;
3362 	daplka_cno_resource_t	*old_cno_rp;
3363 	int			retval;
3364 
3365 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_modify_cno_t),
3366 	    mode);
3367 	if (retval != 0) {
3368 		DERR("evd_modify_cno: copyin error %d\n", retval);
3369 		return (EFAULT);
3370 	}
3371 
3372 	/* get evd resource */
3373 	evd_rp = (daplka_evd_resource_t *)
3374 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.evmc_hkey);
3375 	if (evd_rp == NULL) {
3376 		DERR("evd_modify_cno: cannot find evd resource\n");
3377 		retval = EINVAL;
3378 		goto cleanup;
3379 	}
3380 	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3381 
3382 	if (args.evmc_cno_hkey > 0) {
3383 		/* get cno resource corresponding to the new CNO */
3384 		cno_rp = (daplka_cno_resource_t *)
3385 		    daplka_hash_lookup(&ia_rp->ia_cno_htbl,
3386 		    args.evmc_cno_hkey);
3387 		if (cno_rp == NULL) {
3388 			DERR("evd_modify_cno: cannot find CNO resource\n");
3389 			retval = EINVAL;
3390 			goto cleanup;
3391 		}
3392 		ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3393 	} else {
3394 		cno_rp = NULL;
3395 	}
3396 
3397 	mutex_enter(&evd_rp->evd_lock);
3398 	old_cno_rp = evd_rp->evd_cno_res;
3399 	evd_rp->evd_cno_res = cno_rp;
3400 	mutex_exit(&evd_rp->evd_lock);
3401 
3402 	/*
3403 	 * drop the refcnt on the old CNO, the refcnt on the new CNO is
3404 	 * retained since the evd holds a reference to it.
3405 	 */
3406 	if (old_cno_rp) {
3407 		DAPLKA_RS_UNREF(old_cno_rp);
3408 	}
3409 
3410 cleanup:
3411 	if (evd_rp) {
3412 		DAPLKA_RS_UNREF(evd_rp);
3413 	}
3414 
3415 	return (retval);
3416 }
3417 
3418 /*
3419  * Frees the EVD and associated resources.
3420  * If there are other threads still using this EVD, the destruction
3421  * will defer until the EVD's refcnt drops to zero.
3422  */
3423 /* ARGSUSED */
3424 static int
daplka_evd_free(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3425 daplka_evd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3426 	cred_t *cred, int *rvalp)
3427 {
3428 	daplka_evd_resource_t	*evd_rp = NULL;
3429 	daplka_async_evd_hkey_t	*curr;
3430 	daplka_async_evd_hkey_t	*prev;
3431 	dapl_evd_free_t		args;
3432 	int			retval = 0;
3433 
3434 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_evd_free_t), mode);
3435 	if (retval != 0) {
3436 		DERR("evd_free: copyin error %d\n", retval);
3437 		return (EFAULT);
3438 	}
3439 	retval = daplka_hash_remove(&ia_rp->ia_evd_htbl, args.evf_hkey,
3440 	    (void **)&evd_rp);
3441 	if (retval != 0 || evd_rp == NULL) {
3442 		DERR("evd_free: cannot find evd resource\n");
3443 		return (EINVAL);
3444 	}
3445 	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3446 
3447 	/* If this is an async evd remove it from the IA's async evd list */
3448 	if (evd_rp->evd_flags & DAT_EVD_ASYNC_FLAG) {
3449 		mutex_enter(&ia_rp->ia_lock);
3450 		curr = prev = ia_rp->ia_async_evd_hkeys;
3451 		while (curr != NULL) {
3452 			if (curr->aeh_evd_hkey == args.evf_hkey) {
3453 				/* unlink curr from the list */
3454 				if (curr == prev) {
3455 					/*
3456 					 * if first element in the list update
3457 					 * the list head
3458 					 */
3459 					ia_rp->ia_async_evd_hkeys =
3460 					    curr->aeh_next;
3461 				} else {
3462 					prev->aeh_next = curr->aeh_next;
3463 				}
3464 				break;
3465 			}
3466 			prev = curr;
3467 			curr = curr->aeh_next;
3468 		}
3469 		mutex_exit(&ia_rp->ia_lock);
3470 		/* free the curr entry */
3471 		kmem_free(curr, sizeof (daplka_async_evd_hkey_t));
3472 	}
3473 
3474 	/* UNREF calls the actual free function when refcnt is zero */
3475 	DAPLKA_RS_UNREF(evd_rp);
3476 	return (0);
3477 }
3478 
3479 /*
3480  * destroys EVD resource.
3481  * called when refcnt drops to zero.
3482  */
3483 static int
daplka_evd_destroy(daplka_resource_t * gen_rp)3484 daplka_evd_destroy(daplka_resource_t *gen_rp)
3485 {
3486 	daplka_evd_resource_t	*evd_rp = (daplka_evd_resource_t *)gen_rp;
3487 	ibt_status_t		status;
3488 	daplka_evd_event_t	*evt;
3489 	ibt_priv_data_len_t	len;
3490 
3491 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*evd_rp))
3492 	D3("evd_destroy: entering, evd_rp 0x%p, rnum %d\n",
3493 	    evd_rp, DAPLKA_RS_RNUM(evd_rp));
3494 	/*
3495 	 * free CQ
3496 	 */
3497 	if (evd_rp->evd_cq_hdl) {
3498 		ibt_set_cq_handler(evd_rp->evd_cq_hdl, NULL, NULL);
3499 		mutex_enter(&daplka_dev->daplka_mutex);
3500 		ibt_set_cq_private(evd_rp->evd_cq_hdl, NULL);
3501 		mutex_exit(&daplka_dev->daplka_mutex);
3502 
3503 		status = daplka_ibt_free_cq(evd_rp, evd_rp->evd_cq_hdl);
3504 		if (status != IBT_SUCCESS) {
3505 			DERR("evd_destroy: ibt_free_cq returned %d\n", status);
3506 		}
3507 		evd_rp->evd_cq_hdl = NULL;
3508 		D2("evd_destroy: cq freed, rnum %d\n", DAPLKA_RS_RNUM(evd_rp));
3509 	}
3510 
3511 	/*
3512 	 * release reference on CNO
3513 	 */
3514 	if (evd_rp->evd_cno_res != NULL) {
3515 		mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3516 		if (evd_rp->evd_cno_res->cno_evd_cookie ==
3517 		    evd_rp->evd_cookie) {
3518 			evd_rp->evd_cno_res->cno_evd_cookie = 0;
3519 		}
3520 		mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3521 		DAPLKA_RS_UNREF(evd_rp->evd_cno_res);
3522 		evd_rp->evd_cno_res = NULL;
3523 	}
3524 
3525 	/*
3526 	 * discard all remaining events
3527 	 */
3528 	mutex_enter(&evd_rp->evd_lock);
3529 	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_cr_events))) {
3530 		D2("evd_destroy: discarding CR event: %d\n",
3531 		    evt->ee_cmev.ec_cm_ev_type);
3532 		len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3533 		if (len > 0) {
3534 			kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3535 			evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3536 			evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3537 		}
3538 		kmem_free(evt, sizeof (*evt));
3539 	}
3540 	ASSERT(evd_rp->evd_cr_events.eel_num_elements == 0);
3541 
3542 	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_conn_events))) {
3543 		D2("evd_destroy: discarding CONN event: %d\n",
3544 		    evt->ee_cmev.ec_cm_ev_type);
3545 		len = evt->ee_cmev.ec_cm_ev_priv_data_len;
3546 		if (len > 0) {
3547 			kmem_free(evt->ee_cmev.ec_cm_ev_priv_data, len);
3548 			evt->ee_cmev.ec_cm_ev_priv_data = NULL;
3549 			evt->ee_cmev.ec_cm_ev_priv_data_len = 0;
3550 		}
3551 		kmem_free(evt, sizeof (*evt));
3552 	}
3553 	ASSERT(evd_rp->evd_conn_events.eel_num_elements == 0);
3554 
3555 	while ((evt = daplka_evd_event_dequeue(&evd_rp->evd_async_events))) {
3556 		DERR("evd_destroy: discarding ASYNC event: %d\n",
3557 		    evt->ee_aev.ibae_type);
3558 		kmem_free(evt, sizeof (*evt));
3559 	}
3560 	ASSERT(evd_rp->evd_async_events.eel_num_elements == 0);
3561 	mutex_exit(&evd_rp->evd_lock);
3562 
3563 	mutex_destroy(&evd_rp->evd_lock);
3564 	DAPLKA_RS_FINI(evd_rp);
3565 	kmem_free(evd_rp, sizeof (daplka_evd_resource_t));
3566 	D3("evd_destroy: exiting, evd_rp 0x%p\n", evd_rp);
3567 	return (0);
3568 }
3569 
3570 static void
daplka_hash_evd_free(void * obj)3571 daplka_hash_evd_free(void *obj)
3572 {
3573 	daplka_evd_resource_t *evd_rp = (daplka_evd_resource_t *)obj;
3574 
3575 	ASSERT(DAPLKA_RS_TYPE(evd_rp) == DAPL_TYPE_EVD);
3576 	DAPLKA_RS_UNREF(evd_rp);
3577 }
3578 
3579 /*
3580  * this handler fires when new completions arrive.
3581  */
3582 /* ARGSUSED */
3583 static void
daplka_cq_handler(ibt_cq_hdl_t ibt_cq,void * arg)3584 daplka_cq_handler(ibt_cq_hdl_t ibt_cq, void *arg)
3585 {
3586 	D3("cq_handler: fired setting evd_newevents\n");
3587 	daplka_evd_wakeup((daplka_evd_resource_t *)arg, NULL, NULL);
3588 }
3589 
3590 /*
3591  * this routine wakes up a client from evd_wait. if evtq and evt
3592  * are non-null, the event evt will be enqueued prior to waking
3593  * up the client. if the evd is associated with a CNO and if there
3594  * are no waiters on the evd, the CNO will be notified.
3595  */
3596 static void
daplka_evd_wakeup(daplka_evd_resource_t * evd_rp,daplka_evd_event_list_t * evtq,daplka_evd_event_t * evt)3597 daplka_evd_wakeup(daplka_evd_resource_t *evd_rp, daplka_evd_event_list_t *evtq,
3598 	daplka_evd_event_t *evt)
3599 {
3600 	uint32_t waiters = 0;
3601 
3602 	mutex_enter(&evd_rp->evd_lock);
3603 	if (evtq != NULL && evt != NULL) {
3604 		ASSERT(evtq == &evd_rp->evd_cr_events ||
3605 		    evtq == &evd_rp->evd_conn_events ||
3606 		    evtq == &evd_rp->evd_async_events);
3607 		daplka_evd_event_enqueue(evtq, evt);
3608 		ASSERT((evtq->eel_event_type == DAPLKA_EVD_CM_EVENTS) ||
3609 		    (evtq->eel_event_type == DAPLKA_EVD_ASYNC_EVENTS));
3610 		evd_rp->evd_newevents |= evtq->eel_event_type;
3611 	} else {
3612 		evd_rp->evd_newevents |= DAPLKA_EVD_ULAND_EVENTS;
3613 	}
3614 	waiters = evd_rp->evd_waiters;
3615 	cv_broadcast(&evd_rp->evd_cv);
3616 	mutex_exit(&evd_rp->evd_lock);
3617 
3618 	/*
3619 	 * only wakeup the CNO if there are no waiters on this evd.
3620 	 */
3621 	if (evd_rp->evd_cno_res != NULL && waiters == 0) {
3622 		mutex_enter(&evd_rp->evd_cno_res->cno_lock);
3623 		evd_rp->evd_cno_res->cno_evd_cookie = evd_rp->evd_cookie;
3624 		cv_broadcast(&evd_rp->evd_cno_res->cno_cv);
3625 		mutex_exit(&evd_rp->evd_cno_res->cno_lock);
3626 	}
3627 }
3628 
3629 /*
3630  * daplka_evd_event_enqueue adds elem to the end of the event list
3631  * The caller is expected to acquire appropriate locks before
3632  * calling enqueue
3633  */
3634 static void
daplka_evd_event_enqueue(daplka_evd_event_list_t * evlist,daplka_evd_event_t * elem)3635 daplka_evd_event_enqueue(daplka_evd_event_list_t *evlist,
3636     daplka_evd_event_t *elem)
3637 {
3638 	if (evlist->eel_tail) {
3639 		evlist->eel_tail->ee_next = elem;
3640 		evlist->eel_tail = elem;
3641 	} else {
3642 		/* list is empty */
3643 		ASSERT(evlist->eel_head == NULL);
3644 		evlist->eel_head = elem;
3645 		evlist->eel_tail = elem;
3646 	}
3647 	evlist->eel_num_elements++;
3648 }
3649 
3650 /*
3651  * daplka_evd_event_dequeue removes and returns the first element of event
3652  * list. NULL is returned if the list is empty. The caller is expected to
3653  * acquire appropriate locks before calling enqueue.
3654  */
3655 static daplka_evd_event_t *
daplka_evd_event_dequeue(daplka_evd_event_list_t * evlist)3656 daplka_evd_event_dequeue(daplka_evd_event_list_t *evlist)
3657 {
3658 	daplka_evd_event_t *head;
3659 
3660 	head = evlist->eel_head;
3661 	if (head == NULL) {
3662 		return (NULL);
3663 	}
3664 
3665 	evlist->eel_head = head->ee_next;
3666 	evlist->eel_num_elements--;
3667 	/* if it was the last element update the tail pointer too */
3668 	if (evlist->eel_head == NULL) {
3669 		ASSERT(evlist->eel_num_elements == 0);
3670 		evlist->eel_tail = NULL;
3671 	}
3672 	return (head);
3673 }
3674 
3675 /*
3676  * A CNO allows the client to wait for notifications from multiple EVDs.
3677  * To use a CNO, the client needs to follow the procedure below:
3678  * 1. allocate a CNO. this returns a cno_hkey that identifies the CNO.
3679  * 2. create one or more EVDs using the returned cno_hkey.
3680  * 3. call cno_wait. when one of the associated EVDs get notified, the
3681  *    CNO will also get notified. cno_wait will then return with a
3682  *    evd_cookie identifying the EVD that triggered the event.
3683  *
3684  * A note about cno_wait:
3685  * -unlike a EVD, a CNO does not maintain a queue of notifications. For
3686  *  example, suppose multiple EVDs triggered a CNO before the client calls
3687  *  cno_wait; when the client calls cno_wait, it will return with the
3688  *  evd_cookie that identifies the *last* EVD that triggered the CNO. It
3689  *  is the responsibility of the client, upon returning from cno_wait, to
3690  *  check on all EVDs that can potentially trigger the CNO. the returned
3691  *  evd_cookie is only meant to be a hint. there is no guarantee that the
3692  *  EVD identified by the evd_cookie still contains an event or still
3693  *  exists by the time cno_wait returns.
3694  */
3695 
3696 /*
3697  * allocates a CNO.
3698  * the returned cno_hkey may subsequently be used in evd_create.
3699  */
3700 /* ARGSUSED */
3701 static int
daplka_cno_alloc(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3702 daplka_cno_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3703 	cred_t *cred, int *rvalp)
3704 {
3705 	dapl_cno_alloc_t	args;
3706 	daplka_cno_resource_t	*cno_rp = NULL;
3707 	uint64_t		cno_hkey = 0;
3708 	boolean_t		inserted = B_FALSE;
3709 	int			retval = 0;
3710 
3711 	cno_rp = kmem_zalloc(sizeof (*cno_rp), daplka_km_flags);
3712 	if (cno_rp == NULL) {
3713 		DERR("cno_alloc: cannot allocate cno resource\n");
3714 		return (ENOMEM);
3715 	}
3716 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cno_rp))
3717 	DAPLKA_RS_INIT(cno_rp, DAPL_TYPE_CNO,
3718 	    DAPLKA_RS_RNUM(ia_rp), daplka_cno_destroy);
3719 
3720 	mutex_init(&cno_rp->cno_lock, NULL, MUTEX_DRIVER, NULL);
3721 	cv_init(&cno_rp->cno_cv, NULL, CV_DRIVER, NULL);
3722 	cno_rp->cno_evd_cookie = 0;
3723 
3724 	/* insert into cno hash table */
3725 	retval = daplka_hash_insert(&ia_rp->ia_cno_htbl,
3726 	    &cno_hkey, (void *)cno_rp);
3727 	if (retval != 0) {
3728 		DERR("cno_alloc: cannot insert cno resource\n");
3729 		goto cleanup;
3730 	}
3731 	inserted = B_TRUE;
3732 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*cno_rp))
3733 
3734 	/* return hkey to library */
3735 	args.cno_hkey = cno_hkey;
3736 
3737 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_cno_alloc_t),
3738 	    mode);
3739 	if (retval != 0) {
3740 		DERR("cno_alloc: copyout error %d\n", retval);
3741 		retval = EFAULT;
3742 		goto cleanup;
3743 	}
3744 	return (0);
3745 
3746 cleanup:;
3747 	if (inserted) {
3748 		daplka_cno_resource_t *free_rp = NULL;
3749 
3750 		(void) daplka_hash_remove(&ia_rp->ia_cno_htbl, cno_hkey,
3751 		    (void **)&free_rp);
3752 		if (free_rp != cno_rp) {
3753 			DERR("cno_alloc: cannot remove cno\n");
3754 			/*
3755 			 * we can only get here if another thread
3756 			 * has completed the cleanup in cno_free
3757 			 */
3758 			return (retval);
3759 		}
3760 	}
3761 	DAPLKA_RS_UNREF(cno_rp);
3762 	return (retval);
3763 }
3764 
3765 /*
3766  * destroys a CNO.
3767  * this gets called when a CNO resource's refcnt drops to zero.
3768  */
3769 static int
daplka_cno_destroy(daplka_resource_t * gen_rp)3770 daplka_cno_destroy(daplka_resource_t *gen_rp)
3771 {
3772 	daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)gen_rp;
3773 
3774 	ASSERT(DAPLKA_RS_REFCNT(cno_rp) == 0);
3775 	D2("cno_destroy: entering, cno_rp %p, rnum %d\n",
3776 	    cno_rp, DAPLKA_RS_RNUM(cno_rp));
3777 
3778 	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3779 	cv_destroy(&cno_rp->cno_cv);
3780 	mutex_destroy(&cno_rp->cno_lock);
3781 
3782 	DAPLKA_RS_FINI(cno_rp);
3783 	kmem_free(cno_rp, sizeof (daplka_cno_resource_t));
3784 	D2("cno_destroy: exiting, cno_rp %p\n", cno_rp);
3785 	return (0);
3786 }
3787 
3788 static void
daplka_hash_cno_free(void * obj)3789 daplka_hash_cno_free(void *obj)
3790 {
3791 	daplka_cno_resource_t *cno_rp = (daplka_cno_resource_t *)obj;
3792 
3793 	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3794 	DAPLKA_RS_UNREF(cno_rp);
3795 }
3796 
3797 /*
3798  * removes the CNO from the cno hash table and frees the CNO
3799  * if there are no references to it. if there are references to
3800  * it, the CNO will be destroyed when the last of the references
3801  * is released. once the CNO is removed from the cno hash table,
3802  * the client will no longer be able to call cno_wait on the CNO.
3803  */
3804 /* ARGSUSED */
3805 static int
daplka_cno_free(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3806 daplka_cno_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3807 	cred_t *cred, int *rvalp)
3808 {
3809 	daplka_cno_resource_t	*cno_rp = NULL;
3810 	dapl_cno_free_t		args;
3811 	int			retval = 0;
3812 
3813 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_free_t), mode);
3814 	if (retval != 0) {
3815 		DERR("cno_free: copyin error %d\n", retval);
3816 		return (EINVAL);
3817 	}
3818 
3819 	retval = daplka_hash_remove(&ia_rp->ia_cno_htbl,
3820 	    args.cnf_hkey, (void **)&cno_rp);
3821 	if (retval != 0 || cno_rp == NULL) {
3822 		DERR("cno_free: cannot find cno resource\n");
3823 		return (EINVAL);
3824 	}
3825 	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3826 
3827 	/* UNREF calls the actual free function when refcnt is zero */
3828 	DAPLKA_RS_UNREF(cno_rp);
3829 	return (0);
3830 }
3831 
3832 /*
3833  * wait for a notification from one of the associated EVDs.
3834  */
3835 /* ARGSUSED */
3836 static int
daplka_cno_wait(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3837 daplka_cno_wait(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3838 	cred_t *cred, int *rvalp)
3839 {
3840 	daplka_cno_resource_t	*cno_rp = NULL;
3841 	dapl_cno_wait_t		args;
3842 	int			retval = 0;
3843 	uint64_t		evd_cookie = 0;
3844 	clock_t			timeout, curr_time;
3845 
3846 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cno_wait_t), mode);
3847 	if (retval != 0) {
3848 		DERR("cno_wait: copyin error %d\n", retval);
3849 		return (EINVAL);
3850 	}
3851 	/* get cno resource */
3852 	cno_rp = (daplka_cno_resource_t *)
3853 	    daplka_hash_lookup(&ia_rp->ia_cno_htbl, args.cnw_hkey);
3854 	if (cno_rp == NULL) {
3855 		DERR("cno_wait: cannot find cno resource\n");
3856 		return (EINVAL);
3857 	}
3858 	ASSERT(DAPLKA_RS_TYPE(cno_rp) == DAPL_TYPE_CNO);
3859 
3860 	curr_time = ddi_get_lbolt();
3861 	timeout = curr_time + drv_usectohz(args.cnw_timeout);
3862 
3863 	/*
3864 	 * use the max value if we wrapped around
3865 	 */
3866 	if (args.cnw_timeout > 0 && timeout <= curr_time) {
3867 		/*
3868 		 * clock_t (size long) changes between 32 and 64-bit kernels
3869 		 */
3870 		timeout = LONG_MAX >> 4;
3871 	}
3872 	mutex_enter(&cno_rp->cno_lock);
3873 	while (cno_rp->cno_evd_cookie == 0) {
3874 		int rval = 0;
3875 
3876 		rval = cv_timedwait_sig(&cno_rp->cno_cv,
3877 		    &cno_rp->cno_lock, timeout);
3878 		if (rval == 0) {
3879 			DERR("cno_wait: interrupted\n");
3880 			mutex_exit(&cno_rp->cno_lock);
3881 			retval = EINTR;
3882 			goto cleanup;
3883 		} else if (rval == -1) {
3884 			DERR("cno_wait: timed out\n");
3885 			mutex_exit(&cno_rp->cno_lock);
3886 			retval = ETIME;
3887 			goto cleanup;
3888 		}
3889 	}
3890 	evd_cookie = cno_rp->cno_evd_cookie;
3891 	cno_rp->cno_evd_cookie = 0;
3892 	mutex_exit(&cno_rp->cno_lock);
3893 
3894 	ASSERT(evd_cookie != 0);
3895 	D2("cno_wait: returning evd_cookie 0x%p\n",
3896 	    (void *)(uintptr_t)evd_cookie);
3897 	args.cnw_evd_cookie = evd_cookie;
3898 	retval = ddi_copyout((void *)&args, (void *)arg,
3899 	    sizeof (dapl_cno_wait_t), mode);
3900 	if (retval != 0) {
3901 		DERR("cno_wait: copyout error %d\n", retval);
3902 		retval = EFAULT;
3903 		goto cleanup;
3904 	}
3905 
3906 cleanup:;
3907 	if (cno_rp != NULL) {
3908 		DAPLKA_RS_UNREF(cno_rp);
3909 	}
3910 	return (retval);
3911 }
3912 
3913 /*
3914  * this function is called by the client when it decides to
3915  * accept a connection request. a connection request is generated
3916  * when the active side generates REQ MAD to a service point on
3917  * the destination node. this causes the CM service handler
3918  * (daplka_cm_service_req) on the passive side to be callee. This
3919  * handler will then enqueue this connection request to the backlog
3920  * array of the service point. A connection event containing the
3921  * backlog array index and connection request private data is passed
3922  * to the client's service point EVD (sp_evd_res). once the event
3923  * is passed up to the userland, the client may examine the request
3924  * to decide whether to call daplka_cr_accept or dapka_cr_reject.
3925  */
3926 /* ARGSUSED */
3927 static int
daplka_cr_accept(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)3928 daplka_cr_accept(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
3929 	cred_t *cred, int *rvalp)
3930 {
3931 	daplka_ep_resource_t		*ep_rp = NULL;
3932 	daplka_sp_resource_t		*sp_rp = NULL;
3933 	dapl_cr_accept_t		args;
3934 	daplka_sp_conn_pend_t		*conn;
3935 	ibt_cm_proceed_reply_t		proc_reply;
3936 	ibt_status_t			status;
3937 	uint16_t			bkl_index;
3938 	uint32_t			old_state, new_state;
3939 	int				retval = 0;
3940 	void				*priv_data = NULL, *sid;
3941 
3942 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_accept_t),
3943 	    mode);
3944 	if (retval != 0) {
3945 		DERR("cr_accept: copyin error %d\n", retval);
3946 		return (EFAULT);
3947 	}
3948 	if (args.cra_priv_sz > DAPL_MAX_PRIVATE_DATA_SIZE) {
3949 		DERR("cr_accept: private data len (%d) exceeded "
3950 		    "max size %d\n", args.cra_priv_sz,
3951 		    DAPL_MAX_PRIVATE_DATA_SIZE);
3952 		return (EINVAL);
3953 	}
3954 	priv_data = (args.cra_priv_sz > 0) ? (void *)args.cra_priv : NULL;
3955 
3956 	D2("cr_accept: priv(0x%p) priv_len(%u) psep(0x%llx)\n", priv_data,
3957 	    args.cra_priv_sz, (longlong_t)args.cra_bkl_cookie);
3958 
3959 	/* get sp resource */
3960 	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
3961 	    args.cra_sp_hkey);
3962 	if (sp_rp == NULL) {
3963 		DERR("cr_accept: cannot find sp resource\n");
3964 		return (EINVAL);
3965 	}
3966 	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
3967 
3968 	/* get ep resource */
3969 	ep_rp = (daplka_ep_resource_t *)daplka_hash_lookup(&ia_rp->ia_ep_htbl,
3970 	    args.cra_ep_hkey);
3971 	if (ep_rp == NULL) {
3972 		DERR("cr_accept: cannot find ep resource\n");
3973 		retval = EINVAL;
3974 		goto cleanup;
3975 	}
3976 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
3977 
3978 	/*
3979 	 * accept is only allowed if ep_state is CLOSED.
3980 	 * note that after this point, the ep_state is frozen
3981 	 * (i.e. TRANSITIONING) until we transition ep_state
3982 	 * to ACCEPTING or back to CLOSED if we get an error.
3983 	 */
3984 	new_state = old_state = daplka_ep_get_state(ep_rp);
3985 	if (old_state != DAPLKA_EP_STATE_CLOSED) {
3986 		DERR("cr_accept: invalid ep state %d\n", old_state);
3987 		retval = EINVAL;
3988 		goto cleanup;
3989 	}
3990 
3991 	mutex_enter(&sp_rp->sp_lock);
3992 	bkl_index = DAPLKA_GET_PSEP_INDEX(args.cra_bkl_cookie);
3993 	/*
3994 	 * make sure the backlog index is not bogus.
3995 	 */
3996 	if (bkl_index >= sp_rp->sp_backlog_size) {
3997 		DERR("cr_accept: invalid backlog index 0x%llx %d\n",
3998 		    (longlong_t)args.cra_bkl_cookie, bkl_index);
3999 		mutex_exit(&sp_rp->sp_lock);
4000 		retval = EINVAL;
4001 		goto cleanup;
4002 	}
4003 	/*
4004 	 * make sure the backlog index indeed refers
4005 	 * to a pending connection.
4006 	 */
4007 	conn = &sp_rp->sp_backlog[bkl_index];
4008 	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4009 		DERR("cr_accept: invalid conn state %d\n",
4010 		    conn->spcp_state);
4011 		mutex_exit(&sp_rp->sp_lock);
4012 		retval = EINVAL;
4013 		goto cleanup;
4014 	}
4015 	if (conn->spcp_sid == NULL) {
4016 		DERR("cr_accept: sid == NULL\n");
4017 		mutex_exit(&sp_rp->sp_lock);
4018 		retval = EINVAL;
4019 		goto cleanup;
4020 	}
4021 	if (ep_rp->ep_chan_hdl == NULL) {
4022 		/*
4023 		 * a ep_rp with a NULL chan_hdl is impossible.
4024 		 */
4025 		DERR("cr_accept: ep_chan_hdl == NULL\n");
4026 		mutex_exit(&sp_rp->sp_lock);
4027 		ASSERT(B_FALSE);
4028 		retval = EINVAL;
4029 		goto cleanup;
4030 	}
4031 	proc_reply.rep.cm_channel = ep_rp->ep_chan_hdl;
4032 	proc_reply.rep.cm_rdma_ra_out = conn->spcp_rdma_ra_out;
4033 	proc_reply.rep.cm_rdma_ra_in = conn->spcp_rdma_ra_in;
4034 	proc_reply.rep.cm_rnr_retry_cnt = IBT_RNR_INFINITE_RETRY;
4035 	sid = conn->spcp_sid;
4036 
4037 	/*
4038 	 * this clears our slot in the backlog array.
4039 	 * this slot may now be used by other pending connections.
4040 	 */
4041 	conn->spcp_sid = NULL;
4042 	conn->spcp_state = DAPLKA_SPCP_INIT;
4043 	conn->spcp_req_len = 0;
4044 	mutex_exit(&sp_rp->sp_lock);
4045 
4046 	/*
4047 	 * Set the unique cookie corresponding to the CR to this EP
4048 	 * so that is can be used in passive side CM callbacks
4049 	 */
4050 	ep_rp->ep_psep_cookie = args.cra_bkl_cookie;
4051 
4052 	status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, IBT_CM_ACCEPT,
4053 	    &proc_reply, priv_data, (ibt_priv_data_len_t)args.cra_priv_sz);
4054 
4055 	if (status != IBT_SUCCESS) {
4056 		DERR("cr_accept: ibt_cm_proceed returned %d\n", status);
4057 		*rvalp = (int)status;
4058 		retval = 0;
4059 	}
4060 	/*
4061 	 * note that the CM handler may actually be called at this
4062 	 * point. but since ep_state is still in TRANSITIONING, the
4063 	 * handler will wait until we transition to ACCEPTING. this
4064 	 * prevents the case where we set ep_state to ACCEPTING after
4065 	 * daplka_service_conn_est sets ep_state to CONNECTED.
4066 	 */
4067 	new_state = DAPLKA_EP_STATE_ACCEPTING;
4068 
4069 cleanup:;
4070 	if (sp_rp != NULL) {
4071 		DAPLKA_RS_UNREF(sp_rp);
4072 	}
4073 	if (ep_rp != NULL) {
4074 		daplka_ep_set_state(ep_rp, old_state, new_state);
4075 		DAPLKA_RS_UNREF(ep_rp);
4076 	}
4077 	return (retval);
4078 }
4079 
4080 /*
4081  * this function is called by the client to reject a
4082  * connection request.
4083  */
4084 /* ARGSUSED */
4085 static int
daplka_cr_reject(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)4086 daplka_cr_reject(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4087 	cred_t *cred, int *rvalp)
4088 {
4089 	dapl_cr_reject_t	args;
4090 	daplka_sp_resource_t	*sp_rp = NULL;
4091 	daplka_sp_conn_pend_t	*conn;
4092 	ibt_cm_proceed_reply_t	proc_reply;
4093 	ibt_cm_status_t		proc_status;
4094 	ibt_status_t		status;
4095 	uint16_t		bkl_index;
4096 	int			retval = 0;
4097 	void			*sid;
4098 
4099 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_reject_t),
4100 	    mode);
4101 	if (retval != 0) {
4102 		DERR("cr_reject: copyin error %d\n", retval);
4103 		return (EFAULT);
4104 	}
4105 	/* get sp resource */
4106 	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4107 	    args.crr_sp_hkey);
4108 	if (sp_rp == NULL) {
4109 		DERR("cr_reject: cannot find sp resource\n");
4110 		return (EINVAL);
4111 	}
4112 	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4113 
4114 	D2("cr_reject: psep(0x%llx)\n", (longlong_t)args.crr_bkl_cookie);
4115 
4116 	mutex_enter(&sp_rp->sp_lock);
4117 	bkl_index = DAPLKA_GET_PSEP_INDEX(args.crr_bkl_cookie);
4118 	/*
4119 	 * make sure the backlog index is not bogus.
4120 	 */
4121 	if (bkl_index >= sp_rp->sp_backlog_size) {
4122 		DERR("cr_reject: invalid backlog index 0x%llx %d\n",
4123 		    (longlong_t)args.crr_bkl_cookie, bkl_index);
4124 		mutex_exit(&sp_rp->sp_lock);
4125 		retval = EINVAL;
4126 		goto cleanup;
4127 	}
4128 	/*
4129 	 * make sure the backlog index indeed refers
4130 	 * to a pending connection.
4131 	 */
4132 	conn = &sp_rp->sp_backlog[bkl_index];
4133 	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4134 		DERR("cr_reject: invalid conn state %d\n",
4135 		    conn->spcp_state);
4136 		mutex_exit(&sp_rp->sp_lock);
4137 		retval = EINVAL;
4138 		goto cleanup;
4139 	}
4140 	if (conn->spcp_sid == NULL) {
4141 		DERR("cr_reject: sid == NULL\n");
4142 		mutex_exit(&sp_rp->sp_lock);
4143 		retval = EINVAL;
4144 		goto cleanup;
4145 	}
4146 	bzero(&proc_reply, sizeof (proc_reply));
4147 	sid = conn->spcp_sid;
4148 
4149 	/*
4150 	 * this clears our slot in the backlog array.
4151 	 * this slot may now be used by other pending connections.
4152 	 */
4153 	conn->spcp_sid = NULL;
4154 	conn->spcp_state = DAPLKA_SPCP_INIT;
4155 	conn->spcp_req_len = 0;
4156 
4157 	switch (args.crr_reason) {
4158 	case DAPL_IB_CM_REJ_REASON_CONSUMER_REJ:
4159 		/* results in IBT_CM_CONSUMER as the reason for reject */
4160 		proc_status = IBT_CM_REJECT;
4161 		break;
4162 	case DAPL_IB_CME_LOCAL_FAILURE:
4163 		/*FALLTHRU*/
4164 	case DAPL_IB_CME_DESTINATION_UNREACHABLE:
4165 		/* results in IBT_CM_NO_RESC as the reason for reject */
4166 		proc_status = IBT_CM_NO_RESOURCE;
4167 		break;
4168 	default:
4169 		/* unexpect reason code */
4170 		ASSERT(!"unexpected reject reason code");
4171 		proc_status = IBT_CM_NO_RESOURCE;
4172 		break;
4173 	}
4174 
4175 	mutex_exit(&sp_rp->sp_lock);
4176 
4177 	status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid, proc_status,
4178 	    &proc_reply, NULL, 0);
4179 
4180 	if (status != IBT_SUCCESS) {
4181 		DERR("cr_reject: ibt_cm_proceed returned %d\n", status);
4182 		*rvalp = (int)status;
4183 		retval = 0;
4184 	}
4185 
4186 cleanup:;
4187 	if (sp_rp != NULL) {
4188 		DAPLKA_RS_UNREF(sp_rp);
4189 	}
4190 	return (retval);
4191 }
4192 
4193 
4194 /*
4195  * daplka_sp_match is used by daplka_hash_walk for finding SPs
4196  */
4197 typedef struct daplka_sp_match_s {
4198 	uint64_t		spm_conn_qual;
4199 	daplka_sp_resource_t	*spm_sp_rp;
4200 } daplka_sp_match_t;
4201 _NOTE(SCHEME_PROTECTS_DATA("daplka", daplka_sp_match_s::spm_sp_rp))
4202 
4203 static int
daplka_sp_match(void * objp,void * arg)4204 daplka_sp_match(void *objp, void *arg)
4205 {
4206 	daplka_sp_resource_t	*sp_rp = (daplka_sp_resource_t *)objp;
4207 
4208 	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4209 	if (sp_rp->sp_conn_qual ==
4210 	    ((daplka_sp_match_t *)arg)->spm_conn_qual) {
4211 		((daplka_sp_match_t *)arg)->spm_sp_rp = sp_rp;
4212 		D2("daplka_sp_match: found sp, conn_qual %016llu\n",
4213 		    (longlong_t)((daplka_sp_match_t *)arg)->spm_conn_qual);
4214 		DAPLKA_RS_REF(sp_rp);
4215 		return (1);
4216 	}
4217 	return (0);
4218 }
4219 
4220 /*
4221  * cr_handoff allows the client to handoff a connection request from
4222  * one service point to another.
4223  */
4224 /* ARGSUSED */
4225 static int
daplka_cr_handoff(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)4226 daplka_cr_handoff(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4227 	cred_t *cred, int *rvalp)
4228 {
4229 	dapl_cr_handoff_t		args;
4230 	daplka_sp_resource_t		*sp_rp = NULL, *new_sp_rp = NULL;
4231 	daplka_sp_conn_pend_t		*conn;
4232 	daplka_sp_match_t		sp_match;
4233 	ibt_cm_event_t			fake_event;
4234 	ibt_cm_status_t			cm_status;
4235 	ibt_status_t			status;
4236 	uint16_t			bkl_index;
4237 	void				*sid, *priv = NULL;
4238 	int				retval = 0, priv_len = 0;
4239 
4240 	D3("cr_handoff: entering\n");
4241 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_cr_handoff_t),
4242 	    mode);
4243 	if (retval != 0) {
4244 		DERR("cr_handoff: copyin error %d\n", retval);
4245 		return (EFAULT);
4246 	}
4247 	/* get sp resource */
4248 	sp_rp = (daplka_sp_resource_t *)daplka_hash_lookup(&ia_rp->ia_sp_htbl,
4249 	    args.crh_sp_hkey);
4250 	if (sp_rp == NULL) {
4251 		DERR("cr_handoff: cannot find sp resource\n");
4252 		return (EINVAL);
4253 	}
4254 	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
4255 
4256 	/*
4257 	 * find the destination service point.
4258 	 */
4259 	sp_match.spm_conn_qual = args.crh_conn_qual;
4260 	sp_match.spm_sp_rp = NULL;
4261 	daplka_hash_walk(&daplka_global_sp_htbl, daplka_sp_match,
4262 	    (void *)&sp_match, RW_READER);
4263 
4264 	/*
4265 	 * return if we cannot find the service point
4266 	 */
4267 	if (sp_match.spm_sp_rp == NULL) {
4268 		DERR("cr_handoff: new sp not found, conn qual = %llu\n",
4269 		    (longlong_t)args.crh_conn_qual);
4270 		retval = EINVAL;
4271 		goto cleanup;
4272 	}
4273 	new_sp_rp = sp_match.spm_sp_rp;
4274 
4275 	/*
4276 	 * the spec does not discuss the security implications of this
4277 	 * function. to be safe, we currently only allow processes
4278 	 * owned by the same user to handoff connection requests
4279 	 * to each other.
4280 	 */
4281 	if (crgetruid(cred) != new_sp_rp->sp_ruid) {
4282 		DERR("cr_handoff: permission denied\n");
4283 		retval = EPERM;
4284 		goto cleanup;
4285 	}
4286 
4287 	D2("cr_handoff: psep(0x%llx)\n", (longlong_t)args.crh_bkl_cookie);
4288 
4289 	mutex_enter(&sp_rp->sp_lock);
4290 	bkl_index = DAPLKA_GET_PSEP_INDEX(args.crh_bkl_cookie);
4291 	/*
4292 	 * make sure the backlog index is not bogus.
4293 	 */
4294 	if (bkl_index >= sp_rp->sp_backlog_size) {
4295 		DERR("cr_handoff: invalid backlog index 0x%llx %d\n",
4296 		    (longlong_t)args.crh_bkl_cookie, bkl_index);
4297 		mutex_exit(&sp_rp->sp_lock);
4298 		retval = EINVAL;
4299 		goto cleanup;
4300 	}
4301 	/*
4302 	 * make sure the backlog index indeed refers
4303 	 * to a pending connection.
4304 	 */
4305 	conn = &sp_rp->sp_backlog[bkl_index];
4306 	if (conn->spcp_state != DAPLKA_SPCP_PENDING) {
4307 		DERR("cr_handoff: invalid conn state %d\n",
4308 		    conn->spcp_state);
4309 		mutex_exit(&sp_rp->sp_lock);
4310 		retval = EINVAL;
4311 		goto cleanup;
4312 	}
4313 	if (conn->spcp_sid == NULL) {
4314 		DERR("cr_handoff: sid == NULL\n");
4315 		mutex_exit(&sp_rp->sp_lock);
4316 		retval = EINVAL;
4317 		goto cleanup;
4318 	}
4319 	sid = conn->spcp_sid;
4320 	priv = NULL;
4321 	priv_len = conn->spcp_req_len;
4322 	if (priv_len > 0) {
4323 		priv = kmem_zalloc(priv_len, daplka_km_flags);
4324 		if (priv == NULL) {
4325 			mutex_exit(&sp_rp->sp_lock);
4326 			retval = ENOMEM;
4327 			goto cleanup;
4328 		}
4329 		bcopy(conn->spcp_req_data, priv, priv_len);
4330 	}
4331 	/*
4332 	 * this clears our slot in the backlog array.
4333 	 * this slot may now be used by other pending connections.
4334 	 */
4335 	conn->spcp_sid = NULL;
4336 	conn->spcp_state = DAPLKA_SPCP_INIT;
4337 	conn->spcp_req_len = 0;
4338 	mutex_exit(&sp_rp->sp_lock);
4339 
4340 	/* fill fake_event and call service_req handler */
4341 	bzero(&fake_event, sizeof (fake_event));
4342 	fake_event.cm_type = IBT_CM_EVENT_REQ_RCV;
4343 	fake_event.cm_session_id = sid;
4344 	fake_event.cm_priv_data_len = priv_len;
4345 	fake_event.cm_priv_data = priv;
4346 
4347 	cm_status = daplka_cm_service_req(new_sp_rp,
4348 	    &fake_event, NULL, priv, (ibt_priv_data_len_t)priv_len);
4349 	if (cm_status != IBT_CM_DEFER) {
4350 		ibt_cm_proceed_reply_t	proc_reply;
4351 
4352 		DERR("cr_handoff: service_req returned %d\n", cm_status);
4353 		/*
4354 		 * if for some reason cm_service_req failed, we
4355 		 * reject the connection.
4356 		 */
4357 		bzero(&proc_reply, sizeof (proc_reply));
4358 
4359 		status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV, sid,
4360 		    IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
4361 		if (status != IBT_SUCCESS) {
4362 			DERR("cr_handoff: ibt_cm_proceed returned %d\n",
4363 			    status);
4364 		}
4365 		*rvalp = (int)status;
4366 		retval = 0;
4367 	}
4368 
4369 cleanup:;
4370 	if (priv_len > 0 && priv != NULL) {
4371 		kmem_free(priv, priv_len);
4372 	}
4373 	if (new_sp_rp != NULL) {
4374 		DAPLKA_RS_UNREF(new_sp_rp);
4375 	}
4376 	if (sp_rp != NULL) {
4377 		DAPLKA_RS_UNREF(sp_rp);
4378 	}
4379 	D3("cr_handoff: exiting\n");
4380 	return (retval);
4381 }
4382 
4383 /*
4384  * returns a list of hca attributes
4385  */
4386 /* ARGSUSED */
4387 static int
daplka_ia_query(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)4388 daplka_ia_query(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4389 	cred_t *cred, int *rvalp)
4390 {
4391 	dapl_ia_query_t		args;
4392 	int			retval;
4393 	ibt_hca_attr_t		*hcap;
4394 
4395 	hcap = &ia_rp->ia_hca->hca_attr;
4396 
4397 	/*
4398 	 * Take the ibt_hca_attr_t and stuff them into dapl_hca_attr_t
4399 	 */
4400 	args.hca_attr.dhca_vendor_id = hcap->hca_vendor_id;
4401 	args.hca_attr.dhca_device_id = hcap->hca_device_id;
4402 	args.hca_attr.dhca_version_id = hcap->hca_version_id;
4403 	args.hca_attr.dhca_max_chans = hcap->hca_max_chans;
4404 	args.hca_attr.dhca_max_chan_sz = hcap->hca_max_chan_sz;
4405 	args.hca_attr.dhca_max_sgl = hcap->hca_max_sgl;
4406 	args.hca_attr.dhca_max_cq = hcap->hca_max_cq;
4407 	args.hca_attr.dhca_max_cq_sz = hcap->hca_max_cq_sz;
4408 	args.hca_attr.dhca_max_memr = hcap->hca_max_memr;
4409 	args.hca_attr.dhca_max_memr_len = hcap->hca_max_memr_len;
4410 	args.hca_attr.dhca_max_mem_win = hcap->hca_max_mem_win;
4411 	args.hca_attr.dhca_max_rdma_in_chan = hcap->hca_max_rdma_in_chan;
4412 	args.hca_attr.dhca_max_rdma_out_chan = hcap->hca_max_rdma_out_chan;
4413 	args.hca_attr.dhca_max_partitions  = hcap->hca_max_partitions;
4414 	args.hca_attr.dhca_nports  = hcap->hca_nports;
4415 	args.hca_attr.dhca_node_guid  = hcap->hca_node_guid;
4416 	args.hca_attr.dhca_max_pd = hcap->hca_max_pd;
4417 	args.hca_attr.dhca_max_srqs = hcap->hca_max_srqs;
4418 	args.hca_attr.dhca_max_srqs_sz = hcap->hca_max_srqs_sz;
4419 	args.hca_attr.dhca_max_srq_sgl = hcap->hca_max_srq_sgl;
4420 
4421 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_ia_query_t),
4422 	    mode);
4423 	if (retval != 0) {
4424 		DERR("ia_query: copyout error %d\n", retval);
4425 		return (EFAULT);
4426 	}
4427 	return (0);
4428 }
4429 
4430 /*
4431  * This routine is passed to hash walk in the daplka_pre_mr_cleanup_callback,
4432  * it frees the mw embedded in the mw resource object.
4433  */
4434 
4435 /* ARGSUSED */
4436 static int
daplka_mr_cb_freemw(void * objp,void * arg)4437 daplka_mr_cb_freemw(void *objp, void *arg)
4438 {
4439 	daplka_mw_resource_t	*mw_rp = (daplka_mw_resource_t *)objp;
4440 	ibt_mw_hdl_t		mw_hdl;
4441 	ibt_status_t		status;
4442 
4443 	D3("mr_cb_freemw: entering, mw_rp 0x%p\n", mw_rp);
4444 	DAPLKA_RS_REF(mw_rp);
4445 
4446 	mutex_enter(&mw_rp->mw_lock);
4447 	mw_hdl = mw_rp->mw_hdl;
4448 	/*
4449 	 * we set mw_hdl to NULL so it won't get freed again
4450 	 */
4451 	mw_rp->mw_hdl = NULL;
4452 	mutex_exit(&mw_rp->mw_lock);
4453 
4454 	if (mw_hdl != NULL) {
4455 		status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl, mw_hdl);
4456 		if (status != IBT_SUCCESS) {
4457 			DERR("mr_cb_freemw: ibt_free_mw returned %d\n", status);
4458 		}
4459 		D3("mr_cb_freemw: mw freed\n");
4460 	}
4461 
4462 	DAPLKA_RS_UNREF(mw_rp);
4463 	return (0);
4464 }
4465 
4466 /*
4467  * This routine is called from HCA driver's umem lock undo callback
4468  * when the memory associated with an MR is being unmapped. In this callback
4469  * we free all the MW associated with the IA and post an unaffiliated
4470  * async event to tell the app that there was a catastrophic event.
4471  * This allows the HCA to deregister the MR in its callback processing.
4472  */
4473 static void
daplka_pre_mr_cleanup_callback(void * arg1,void * arg2)4474 daplka_pre_mr_cleanup_callback(void *arg1, void *arg2 /*ARGSUSED*/)
4475 {
4476 	daplka_mr_resource_t	*mr_rp;
4477 	daplka_ia_resource_t	*ia_rp;
4478 #ifdef	_THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4479 	ibt_async_event_t	event;
4480 	ibt_hca_attr_t		*hca_attrp;
4481 #endif
4482 	minor_t			rnum;
4483 
4484 	mr_rp = (daplka_mr_resource_t *)arg1;
4485 	rnum = DAPLKA_RS_RNUM(mr_rp);
4486 	daplka_shared_mr_free(mr_rp);
4487 
4488 	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
4489 	if (ia_rp == NULL) {
4490 		DERR("daplka_mr_unlock_callback: resource not found, rnum %d\n",
4491 		    rnum);
4492 		return;
4493 	}
4494 
4495 	DERR("daplka_mr_unlock_callback: resource(%p) rnum(%d)\n", ia_rp, rnum);
4496 
4497 	mutex_enter(&ia_rp->ia_lock);
4498 	/*
4499 	 * MW is being alloced OR MW freeze has already begun. In
4500 	 * both these cases we wait for that to complete before
4501 	 * continuing.
4502 	 */
4503 	while ((ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS) ||
4504 	    (ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS)) {
4505 		cv_wait(&ia_rp->ia_cv, &ia_rp->ia_lock);
4506 	}
4507 
4508 	switch (ia_rp->ia_state) {
4509 	case DAPLKA_IA_INIT:
4510 		ia_rp->ia_state = DAPLKA_IA_MW_FREEZE_IN_PROGRESS;
4511 		mutex_exit(&ia_rp->ia_lock);
4512 		break;
4513 	case DAPLKA_IA_MW_FROZEN:
4514 		/* the mw on this ia have been freed */
4515 		D2("daplka_mr_unlock_callback: ia_state %d nothing to do\n",
4516 		    ia_rp->ia_state);
4517 		mutex_exit(&ia_rp->ia_lock);
4518 		goto cleanup;
4519 	default:
4520 		ASSERT(!"daplka_mr_unlock_callback: IA state invalid");
4521 		DERR("daplka_mr_unlock_callback: invalid ia_state %d\n",
4522 		    ia_rp->ia_state);
4523 		mutex_exit(&ia_rp->ia_lock);
4524 		goto cleanup;
4525 	}
4526 
4527 	/*
4528 	 * Walk the mw hash table and free the mws. Acquire a writer
4529 	 * lock since we don't want anyone else traversing this tree
4530 	 * while we are freeing the MW.
4531 	 */
4532 	daplka_hash_walk(&ia_rp->ia_mw_htbl, daplka_mr_cb_freemw, NULL,
4533 	    RW_WRITER);
4534 
4535 	mutex_enter(&ia_rp->ia_lock);
4536 	ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_FREEZE_IN_PROGRESS);
4537 	ia_rp->ia_state = DAPLKA_IA_MW_FROZEN;
4538 	cv_broadcast(&ia_rp->ia_cv);
4539 	mutex_exit(&ia_rp->ia_lock);
4540 
4541 	/*
4542 	 * Currently commented out because Oracle skgxp is incapable
4543 	 * of handling async events correctly.
4544 	 */
4545 #ifdef	_THROW_ASYNC_EVENT_FROM_MRUNLOCKCB
4546 	/*
4547 	 * Enqueue an unaffiliated async error event to indicate this
4548 	 * IA has encountered a problem that caused the MW to freed up
4549 	 */
4550 
4551 	/* Create a fake event, only relevant field is the hca_guid */
4552 	bzero(&event, sizeof (ibt_async_event_t));
4553 	hca_attrp = &ia_rp->ia_hca->hca_attr;
4554 	event.ev_hca_guid = hca_attrp->hca_node_guid;
4555 
4556 	daplka_async_event_create(IBT_ERROR_LOCAL_CATASTROPHIC, &event, 0,
4557 	    ia_rp);
4558 #endif	/* _THROW_ASYNC_EVENT_FROM_MRUNLOCKCB */
4559 
4560 cleanup:;
4561 	D2("daplka_mr_unlock_callback: resource(%p) done\n", ia_rp);
4562 	DAPLKA_RS_UNREF(ia_rp);
4563 }
4564 
4565 /*
4566  * registers a memory region.
4567  * memory locking will be done by the HCA driver.
4568  */
4569 /* ARGSUSED */
4570 static int
daplka_mr_register(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)4571 daplka_mr_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4572 	cred_t *cred, int *rvalp)
4573 {
4574 	boolean_t			inserted = B_FALSE;
4575 	daplka_mr_resource_t		*mr_rp;
4576 	daplka_pd_resource_t		*pd_rp;
4577 	dapl_mr_register_t		args;
4578 	ibt_mr_data_in_t		mr_cb_data_in;
4579 	uint64_t			mr_hkey = 0;
4580 	ibt_status_t			status;
4581 	int				retval;
4582 
4583 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_register_t),
4584 	    mode);
4585 	if (retval != 0) {
4586 		DERR("mr_register: copyin error %d\n", retval);
4587 		return (EINVAL);
4588 	}
4589 	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4590 	if (mr_rp == NULL) {
4591 		DERR("mr_register: cannot allocate mr resource\n");
4592 		return (ENOMEM);
4593 	}
4594 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4595 	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4596 	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4597 
4598 	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4599 	mr_rp->mr_hca = ia_rp->ia_hca;
4600 	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4601 	mr_rp->mr_next = NULL;
4602 	mr_rp->mr_shared_mr = NULL;
4603 
4604 	/* get pd handle */
4605 	pd_rp = (daplka_pd_resource_t *)
4606 	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mr_pd_hkey);
4607 	if (pd_rp == NULL) {
4608 		DERR("mr_register: cannot find pd resource\n");
4609 		retval = EINVAL;
4610 		goto cleanup;
4611 	}
4612 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4613 	mr_rp->mr_pd_res = pd_rp;
4614 
4615 	mr_rp->mr_attr.mr_vaddr = args.mr_vaddr;
4616 	mr_rp->mr_attr.mr_len = args.mr_len;
4617 	mr_rp->mr_attr.mr_as = curproc->p_as;
4618 	mr_rp->mr_attr.mr_flags = args.mr_flags | IBT_MR_NOSLEEP;
4619 
4620 	D3("mr_register: mr_vaddr %p, mr_len %llu, mr_flags 0x%x\n",
4621 	    (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4622 	    (longlong_t)mr_rp->mr_attr.mr_len,
4623 	    mr_rp->mr_attr.mr_flags);
4624 
4625 	status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4626 	    mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr, &mr_rp->mr_hdl,
4627 	    &mr_rp->mr_desc);
4628 
4629 	if (status != IBT_SUCCESS) {
4630 		DERR("mr_register: ibt_register_mr error %d\n", status);
4631 		*rvalp = (int)status;
4632 		retval = 0;
4633 		goto cleanup;
4634 	}
4635 
4636 	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4637 	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4638 	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4639 	mr_cb_data_in.mr_arg2 = NULL;
4640 
4641 	/* Pass the service driver mr cleanup handler to the hca driver */
4642 	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4643 	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4644 	    &mr_cb_data_in, sizeof (mr_cb_data_in));
4645 
4646 	if (status != IBT_SUCCESS) {
4647 		DERR("mr_register: ibt_ci_data_in error(%d) ver(%d)",
4648 		    status, mr_cb_data_in.mr_rev);
4649 		*rvalp = (int)status;
4650 		retval = 0;
4651 		goto cleanup;
4652 	}
4653 
4654 	/* insert into mr hash table */
4655 	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4656 	    &mr_hkey, (void *)mr_rp);
4657 	if (retval != 0) {
4658 		DERR("mr_register: cannot insert mr resource into mr_htbl\n");
4659 		goto cleanup;
4660 	}
4661 	inserted = B_TRUE;
4662 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4663 
4664 	args.mr_lkey = mr_rp->mr_desc.md_lkey;
4665 	args.mr_rkey = mr_rp->mr_desc.md_rkey;
4666 	args.mr_hkey = mr_hkey;
4667 
4668 	retval = ddi_copyout((void *)&args, (void *)arg,
4669 	    sizeof (dapl_mr_register_t), mode);
4670 	if (retval != 0) {
4671 		DERR("mr_register: copyout error %d\n", retval);
4672 		retval = EFAULT;
4673 		goto cleanup;
4674 	}
4675 	return (0);
4676 
4677 cleanup:;
4678 	if (inserted) {
4679 		daplka_mr_resource_t *free_rp = NULL;
4680 
4681 		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4682 		    (void **)&free_rp);
4683 		if (free_rp != mr_rp) {
4684 			DERR("mr_register: cannot remove mr from hash table\n");
4685 			/*
4686 			 * we can only get here if another thread
4687 			 * has completed the cleanup in mr_deregister
4688 			 */
4689 			return (retval);
4690 		}
4691 	}
4692 	DAPLKA_RS_UNREF(mr_rp);
4693 	return (retval);
4694 }
4695 
4696 /*
4697  * registers a shared memory region.
4698  * the client calls this function with the intention to share the memory
4699  * region with other clients. it is assumed that, prior to calling this
4700  * function, the client(s) are already sharing parts of their address
4701  * space using a mechanism such as SYSV shared memory. the first client
4702  * that calls this function will create and insert a daplka_shared_mr_t
4703  * object into the global daplka_shared_mr_tree. this shared mr object
4704  * will be identified by a unique 40-byte key and will maintain a list
4705  * of mr resources. every time this function gets called with the same
4706  * 40-byte key, a new mr resource (containing a new mr handle generated
4707  * by ibt_register_mr or ibt_register_shared_mr) is created and inserted
4708  * into this list. similarly, every time a shared mr gets deregistered
4709  * or invalidated by a callback, the mr resource gets removed from this
4710  * list. the shared mr object has a reference count. when it drops to
4711  * zero, the shared mr object will be removed from the global avl tree
4712  * and be freed.
4713  */
4714 /* ARGSUSED */
4715 static int
daplka_mr_register_shared(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)4716 daplka_mr_register_shared(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
4717 	cred_t *cred, int *rvalp)
4718 {
4719 	dapl_mr_register_shared_t	args;
4720 	daplka_shared_mr_t		*smrp = NULL;
4721 	daplka_shared_mr_t		tmp_smr;
4722 	ibt_mr_data_in_t		mr_cb_data_in;
4723 	avl_index_t			where;
4724 	boolean_t			inserted = B_FALSE;
4725 	daplka_mr_resource_t		*mr_rp = NULL;
4726 	daplka_pd_resource_t		*pd_rp;
4727 	uint64_t			mr_hkey = 0;
4728 	ibt_status_t			status;
4729 	int				retval;
4730 
4731 	retval = ddi_copyin((void *)arg, &args,
4732 	    sizeof (dapl_mr_register_shared_t), mode);
4733 	if (retval != 0) {
4734 		DERR("mr_register_shared: copyin error %d\n", retval);
4735 		return (EINVAL);
4736 	}
4737 
4738 	mutex_enter(&daplka_shared_mr_lock);
4739 	/*
4740 	 * find smrp from the global avl tree.
4741 	 * the 40-byte key is used as the lookup key.
4742 	 */
4743 	tmp_smr.smr_cookie = args.mrs_shm_cookie;
4744 	smrp = (daplka_shared_mr_t *)
4745 	    avl_find(&daplka_shared_mr_tree, &tmp_smr, &where);
4746 	if (smrp != NULL) {
4747 		D2("mr_register_shared: smrp 0x%p, found cookie:\n"
4748 		    "0x%016llx%016llx%016llx%016llx%016llx\n", smrp,
4749 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4750 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4751 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4752 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4753 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4754 
4755 		/*
4756 		 * if the smrp exists, other threads could still be
4757 		 * accessing it. we wait until they are done before
4758 		 * we continue.
4759 		 */
4760 		smrp->smr_refcnt++;
4761 		while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
4762 			D2("mr_register_shared: smrp 0x%p, "
4763 			    "waiting in transitioning state, refcnt %d\n",
4764 			    smrp, smrp->smr_refcnt);
4765 			cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
4766 		}
4767 		ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
4768 		D2("mr_register_shared: smrp 0x%p, refcnt %d, ready\n",
4769 		    smrp, smrp->smr_refcnt);
4770 
4771 		/*
4772 		 * we set smr_state to TRANSITIONING to temporarily
4773 		 * prevent other threads from trying to access smrp.
4774 		 */
4775 		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4776 	} else {
4777 		D2("mr_register_shared: cannot find cookie:\n"
4778 		    "0x%016llx%016llx%016llx%016llx%016llx\n",
4779 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[4],
4780 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[3],
4781 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[2],
4782 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[1],
4783 		    (longlong_t)tmp_smr.smr_cookie.mc_uint_arr[0]);
4784 
4785 		/*
4786 		 * if we cannot find smrp, we need to create and
4787 		 * insert one into daplka_shared_mr_tree
4788 		 */
4789 		smrp = kmem_zalloc(sizeof (daplka_shared_mr_t),
4790 		    daplka_km_flags);
4791 		if (smrp == NULL) {
4792 			retval = ENOMEM;
4793 			mutex_exit(&daplka_shared_mr_lock);
4794 			goto cleanup;
4795 		}
4796 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4797 		smrp->smr_refcnt = 1;
4798 		smrp->smr_cookie = args.mrs_shm_cookie;
4799 		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
4800 		smrp->smr_mr_list = NULL;
4801 		cv_init(&smrp->smr_cv, NULL, CV_DRIVER, NULL);
4802 		avl_insert(&daplka_shared_mr_tree, smrp, where);
4803 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*smrp))
4804 	}
4805 	mutex_exit(&daplka_shared_mr_lock);
4806 
4807 	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
4808 	if (mr_rp == NULL) {
4809 		DERR("mr_register_shared: cannot allocate mr resource\n");
4810 		goto cleanup;
4811 	}
4812 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4813 	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
4814 	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
4815 
4816 	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
4817 	mr_rp->mr_hca = ia_rp->ia_hca;
4818 	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
4819 	mr_rp->mr_next = NULL;
4820 	mr_rp->mr_shared_mr = NULL;
4821 
4822 	/* get pd handle */
4823 	pd_rp = (daplka_pd_resource_t *)
4824 	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mrs_pd_hkey);
4825 	if (pd_rp == NULL) {
4826 		DERR("mr_register_shared: cannot find pd resource\n");
4827 		retval = EINVAL;
4828 		goto cleanup;
4829 	}
4830 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
4831 	mr_rp->mr_pd_res = pd_rp;
4832 
4833 	mr_rp->mr_attr.mr_vaddr = args.mrs_vaddr;
4834 	mr_rp->mr_attr.mr_len = args.mrs_len;
4835 	mr_rp->mr_attr.mr_flags = args.mrs_flags | IBT_MR_NOSLEEP;
4836 	mr_rp->mr_attr.mr_as = curproc->p_as;
4837 
4838 	D2("mr_register_shared: mr_vaddr 0x%p, mr_len %llu, "
4839 	    "mr_flags 0x%x, mr_as 0x%p, mr_exists %d, smrp 0x%p\n",
4840 	    (void *)(uintptr_t)mr_rp->mr_attr.mr_vaddr,
4841 	    (longlong_t)mr_rp->mr_attr.mr_len,
4842 	    mr_rp->mr_attr.mr_flags, mr_rp->mr_attr.mr_as,
4843 	    (int)(smrp->smr_mr_list != NULL), smrp);
4844 
4845 	/*
4846 	 * since we are in TRANSITIONING state, we are guaranteed
4847 	 * that we have exclusive access to smr_mr_list.
4848 	 */
4849 	if (smrp->smr_mr_list != NULL) {
4850 		ibt_smr_attr_t	mem_sattr;
4851 
4852 		/*
4853 		 * a non-null smr_mr_list indicates that someone
4854 		 * else has already inserted an mr_resource into
4855 		 * smr_mr_list. we use the mr_handle from the first
4856 		 * element as an arg to ibt_register_shared_mr.
4857 		 */
4858 		mem_sattr.mr_vaddr = smrp->smr_mr_list->mr_desc.md_vaddr;
4859 		mem_sattr.mr_flags = mr_rp->mr_attr.mr_flags;
4860 
4861 		D2("mr_register_shared: mem_sattr vaddr 0x%p flags 0x%x\n",
4862 		    (void *)(uintptr_t)mem_sattr.mr_vaddr, mem_sattr.mr_flags);
4863 		status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
4864 		    smrp->smr_mr_list->mr_hdl, mr_rp->mr_pd_res->pd_hdl,
4865 		    &mem_sattr, &mr_rp->mr_hdl, &mr_rp->mr_desc);
4866 
4867 		if (status != IBT_SUCCESS) {
4868 			DERR("mr_register_shared: "
4869 			    "ibt_register_shared_mr error %d\n", status);
4870 			*rvalp = (int)status;
4871 			retval = 0;
4872 			goto cleanup;
4873 		}
4874 	} else {
4875 		/*
4876 		 * an mr does not exist yet. we need to create one
4877 		 * using ibt_register_mr.
4878 		 */
4879 		status = daplka_ibt_register_mr(mr_rp, ia_rp->ia_hca_hdl,
4880 		    mr_rp->mr_pd_res->pd_hdl, &mr_rp->mr_attr,
4881 		    &mr_rp->mr_hdl, &mr_rp->mr_desc);
4882 
4883 		if (status != IBT_SUCCESS) {
4884 			DERR("mr_register_shared: "
4885 			    "ibt_register_mr error %d\n", status);
4886 			*rvalp = (int)status;
4887 			retval = 0;
4888 			goto cleanup;
4889 		}
4890 	}
4891 
4892 	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
4893 	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
4894 	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
4895 	mr_cb_data_in.mr_arg2 = NULL;
4896 
4897 	/* Pass the service driver mr cleanup handler to the hca driver */
4898 	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
4899 	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
4900 	    &mr_cb_data_in, sizeof (mr_cb_data_in));
4901 
4902 	if (status != IBT_SUCCESS) {
4903 		DERR("mr_register_shared: ibt_ci_data_in error(%d) ver(%d)",
4904 		    status, mr_cb_data_in.mr_rev);
4905 		*rvalp = (int)status;
4906 		retval = 0;
4907 		goto cleanup;
4908 	}
4909 
4910 	/*
4911 	 * we bump reference of mr_rp and enqueue it onto smrp.
4912 	 */
4913 	DAPLKA_RS_REF(mr_rp);
4914 	mr_rp->mr_next = smrp->smr_mr_list;
4915 	smrp->smr_mr_list = mr_rp;
4916 	mr_rp->mr_shared_mr = smrp;
4917 
4918 	/* insert into mr hash table */
4919 	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl,
4920 	    &mr_hkey, (void *)mr_rp);
4921 	if (retval != 0) {
4922 		DERR("mr_register_shared: cannot insert mr resource\n");
4923 		goto cleanup;
4924 	}
4925 	inserted = B_TRUE;
4926 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
4927 
4928 	/*
4929 	 * at this point, there are two references to our mr resource.
4930 	 * one is kept in ia_mr_htbl. the other is kept in the list
4931 	 * within this shared mr object (smrp). when we deregister this
4932 	 * mr or when a callback invalidates this mr, the reference kept
4933 	 * by this shared mr object will be removed.
4934 	 */
4935 
4936 	args.mrs_lkey = mr_rp->mr_desc.md_lkey;
4937 	args.mrs_rkey = mr_rp->mr_desc.md_rkey;
4938 	args.mrs_hkey = mr_hkey;
4939 
4940 	retval = ddi_copyout((void *)&args, (void *)arg,
4941 	    sizeof (dapl_mr_register_shared_t), mode);
4942 	if (retval != 0) {
4943 		DERR("mr_register_shared: copyout error %d\n", retval);
4944 		retval = EFAULT;
4945 		goto cleanup;
4946 	}
4947 
4948 	/*
4949 	 * set the state to READY to allow others to continue
4950 	 */
4951 	mutex_enter(&daplka_shared_mr_lock);
4952 	smrp->smr_state = DAPLKA_SMR_READY;
4953 	cv_broadcast(&smrp->smr_cv);
4954 	mutex_exit(&daplka_shared_mr_lock);
4955 	return (0);
4956 
4957 cleanup:;
4958 	if (inserted) {
4959 		daplka_mr_resource_t *free_rp = NULL;
4960 
4961 		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
4962 		    (void **)&free_rp);
4963 		if (free_rp != mr_rp) {
4964 			DERR("mr_register_shared: "
4965 			    "cannot remove mr from hash table\n");
4966 			/*
4967 			 * we can only get here if another thread
4968 			 * has completed the cleanup in mr_deregister
4969 			 */
4970 			return (retval);
4971 		}
4972 	}
4973 	if (smrp != NULL) {
4974 		mutex_enter(&daplka_shared_mr_lock);
4975 		ASSERT(smrp->smr_refcnt > 0);
4976 		smrp->smr_refcnt--;
4977 
4978 		if (smrp->smr_refcnt == 0) {
4979 			DERR("mr_register_shared: freeing smrp 0x%p\n", smrp);
4980 			avl_remove(&daplka_shared_mr_tree, smrp);
4981 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*smrp))
4982 			if (smrp->smr_mr_list != NULL) {
4983 				/*
4984 				 * the refcnt is 0. if there is anything
4985 				 * left on the list, it must be ours.
4986 				 */
4987 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
4988 				ASSERT(smrp->smr_mr_list == mr_rp);
4989 				DAPLKA_RS_UNREF(mr_rp);
4990 				smrp->smr_mr_list = NULL;
4991 				ASSERT(mr_rp->mr_shared_mr == smrp);
4992 				mr_rp->mr_shared_mr = NULL;
4993 				ASSERT(mr_rp->mr_next == NULL);
4994 			}
4995 			smrp->smr_state = DAPLKA_SMR_FREED;
4996 			cv_destroy(&smrp->smr_cv);
4997 			kmem_free(smrp, sizeof (daplka_shared_mr_t));
4998 		} else {
4999 			DERR("mr_register_shared: resetting smr_state "
5000 			    "smrp 0x%p, %d waiters remain\n", smrp,
5001 			    smrp->smr_refcnt);
5002 			ASSERT(smrp->smr_state == DAPLKA_SMR_TRANSITIONING);
5003 			if (smrp->smr_mr_list != NULL && mr_rp != NULL) {
5004 				daplka_mr_resource_t	**mpp;
5005 
5006 				/*
5007 				 * search and remove mr_rp from smr_mr_list
5008 				 */
5009 				_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5010 				mpp = &smrp->smr_mr_list;
5011 				while (*mpp != NULL) {
5012 					if (*mpp == mr_rp) {
5013 						*mpp = (*mpp)->mr_next;
5014 						DAPLKA_RS_UNREF(mr_rp);
5015 						ASSERT(mr_rp->mr_shared_mr ==
5016 						    smrp);
5017 						mr_rp->mr_shared_mr = NULL;
5018 						mr_rp->mr_next = NULL;
5019 						break;
5020 					}
5021 					mpp = &(*mpp)->mr_next;
5022 				}
5023 			}
5024 			/*
5025 			 * note that smr_state == READY does not necessarily
5026 			 * mean that smr_mr_list is non empty. for this case,
5027 			 * we are doing cleanup because of a failure. we set
5028 			 * the state to READY to allow other threads to
5029 			 * continue.
5030 			 */
5031 			smrp->smr_state = DAPLKA_SMR_READY;
5032 			cv_broadcast(&smrp->smr_cv);
5033 		}
5034 		mutex_exit(&daplka_shared_mr_lock);
5035 	}
5036 	if (mr_rp != NULL) {
5037 		DAPLKA_RS_UNREF(mr_rp);
5038 	}
5039 	return (retval);
5040 }
5041 
5042 /*
5043  * registers a memory region using the attributes of an
5044  * existing region.
5045  */
5046 /* ARGSUSED */
5047 static int
daplka_mr_register_lmr(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5048 daplka_mr_register_lmr(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5049 	cred_t *cred, int *rvalp)
5050 {
5051 	boolean_t 			inserted = B_FALSE;
5052 	dapl_mr_register_lmr_t		args;
5053 	ibt_mr_data_in_t		mr_cb_data_in;
5054 	daplka_mr_resource_t		*orig_mr_rp = NULL;
5055 	daplka_mr_resource_t		*mr_rp;
5056 	ibt_smr_attr_t			mem_sattr;
5057 	uint64_t			mr_hkey = 0;
5058 	ibt_status_t			status;
5059 	int				retval;
5060 
5061 	retval = ddi_copyin((void *)arg, &args,
5062 	    sizeof (dapl_mr_register_lmr_t), mode);
5063 	if (retval != 0) {
5064 		DERR("mr_register_lmr: copyin error %d\n", retval);
5065 		return (EINVAL);
5066 	}
5067 	orig_mr_rp = (daplka_mr_resource_t *)
5068 	    daplka_hash_lookup(&ia_rp->ia_mr_htbl, args.mrl_orig_hkey);
5069 	if (orig_mr_rp == NULL) {
5070 		DERR("mr_register_lmr: cannot find mr resource\n");
5071 		return (EINVAL);
5072 	}
5073 	ASSERT(DAPLKA_RS_TYPE(orig_mr_rp) == DAPL_TYPE_MR);
5074 
5075 	mr_rp = kmem_zalloc(sizeof (daplka_mr_resource_t), daplka_km_flags);
5076 	if (mr_rp == NULL) {
5077 		DERR("mr_register_lmr: cannot allocate mr resource\n");
5078 		retval = ENOMEM;
5079 		goto cleanup;
5080 	}
5081 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5082 	DAPLKA_RS_INIT(mr_rp, DAPL_TYPE_MR,
5083 	    DAPLKA_RS_RNUM(ia_rp), daplka_mr_destroy);
5084 
5085 	mutex_init(&mr_rp->mr_lock, NULL, MUTEX_DRIVER, NULL);
5086 	mr_rp->mr_hca = ia_rp->ia_hca;
5087 	mr_rp->mr_hca_hdl = ia_rp->ia_hca_hdl;
5088 	mr_rp->mr_next = NULL;
5089 	mr_rp->mr_shared_mr = NULL;
5090 
5091 	DAPLKA_RS_REF(orig_mr_rp->mr_pd_res);
5092 	mr_rp->mr_pd_res = orig_mr_rp->mr_pd_res;
5093 	mr_rp->mr_attr = orig_mr_rp->mr_attr;
5094 
5095 	/* Pass the IO addr that was returned while allocating the orig MR */
5096 	mem_sattr.mr_vaddr = orig_mr_rp->mr_desc.md_vaddr;
5097 	mem_sattr.mr_flags = args.mrl_flags | IBT_MR_NOSLEEP;
5098 
5099 	status = daplka_ibt_register_shared_mr(mr_rp, ia_rp->ia_hca_hdl,
5100 	    orig_mr_rp->mr_hdl, mr_rp->mr_pd_res->pd_hdl, &mem_sattr,
5101 	    &mr_rp->mr_hdl, &mr_rp->mr_desc);
5102 
5103 	if (status != IBT_SUCCESS) {
5104 		DERR("mr_register_lmr: ibt_register_shared_mr error %d\n",
5105 		    status);
5106 		*rvalp = (int)status;
5107 		retval = 0;
5108 		goto cleanup;
5109 	}
5110 
5111 	mr_cb_data_in.mr_rev = IBT_MR_DATA_IN_IF_VERSION;
5112 	mr_cb_data_in.mr_func = daplka_pre_mr_cleanup_callback;
5113 	mr_cb_data_in.mr_arg1 = (void *)mr_rp;
5114 	mr_cb_data_in.mr_arg2 = NULL;
5115 
5116 	/* Pass the service driver mr cleanup handler to the hca driver */
5117 	status = ibt_ci_data_in(ia_rp->ia_hca_hdl,
5118 	    IBT_CI_NO_FLAGS, IBT_HDL_MR, (void *)mr_rp->mr_hdl,
5119 	    &mr_cb_data_in, sizeof (mr_cb_data_in));
5120 
5121 	if (status != IBT_SUCCESS) {
5122 		DERR("mr_register_lmr: ibt_ci_data_in error(%d) ver(%d)",
5123 		    status, mr_cb_data_in.mr_rev);
5124 		*rvalp = (int)status;
5125 		retval = 0;
5126 		goto cleanup;
5127 	}
5128 	mr_rp->mr_attr.mr_len = orig_mr_rp->mr_attr.mr_len;
5129 	mr_rp->mr_attr.mr_flags = mem_sattr.mr_flags;
5130 
5131 	/* insert into mr hash table */
5132 	retval = daplka_hash_insert(&ia_rp->ia_mr_htbl, &mr_hkey,
5133 	    (void *)mr_rp);
5134 	if (retval != 0) {
5135 		DERR("mr_register: cannot insert mr resource into mr_htbl\n");
5136 		goto cleanup;
5137 	}
5138 	inserted = B_TRUE;
5139 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mr_rp))
5140 
5141 	args.mrl_lkey = mr_rp->mr_desc.md_lkey;
5142 	args.mrl_rkey = mr_rp->mr_desc.md_rkey;
5143 	args.mrl_hkey = mr_hkey;
5144 
5145 	retval = ddi_copyout((void *)&args, (void *)arg,
5146 	    sizeof (dapl_mr_register_lmr_t), mode);
5147 	if (retval != 0) {
5148 		DERR("mr_register_lmr: copyout error %d\n", retval);
5149 		retval = EFAULT;
5150 		goto cleanup;
5151 	}
5152 	if (orig_mr_rp != NULL) {
5153 		DAPLKA_RS_UNREF(orig_mr_rp);
5154 	}
5155 	return (0);
5156 
5157 cleanup:;
5158 	if (inserted) {
5159 		daplka_mr_resource_t *free_rp = NULL;
5160 
5161 		(void) daplka_hash_remove(&ia_rp->ia_mr_htbl, mr_hkey,
5162 		    (void **)&free_rp);
5163 		if (free_rp != mr_rp) {
5164 			DERR("mr_register: cannot remove mr from hash table\n");
5165 			/*
5166 			 * we can only get here if another thread
5167 			 * has completed the cleanup in mr_deregister
5168 			 */
5169 			return (retval);
5170 		}
5171 	}
5172 	if (orig_mr_rp != NULL) {
5173 		DAPLKA_RS_UNREF(orig_mr_rp);
5174 	}
5175 	if (mr_rp != NULL) {
5176 		DAPLKA_RS_UNREF(mr_rp);
5177 	}
5178 	return (retval);
5179 }
5180 
5181 /*
5182  * this function is called by mr_deregister and mr_cleanup_callback to
5183  * remove a mr resource from the shared mr object mr_rp->mr_shared_mr.
5184  * if mr_shared_mr is already NULL, that means the region being
5185  * deregistered or invalidated is not a shared mr region and we can
5186  * return immediately.
5187  */
5188 static void
daplka_shared_mr_free(daplka_mr_resource_t * mr_rp)5189 daplka_shared_mr_free(daplka_mr_resource_t *mr_rp)
5190 {
5191 	daplka_shared_mr_t	*smrp;
5192 
5193 	/*
5194 	 * we need a lock because mr_callback also checks this field.
5195 	 * for the rare case that mr_deregister and mr_cleanup_callback
5196 	 * gets called simultaneously, we are guaranteed that smrp won't
5197 	 * be dereferenced twice because either function will find
5198 	 * mr_shared_mr to be NULL.
5199 	 */
5200 	mutex_enter(&mr_rp->mr_lock);
5201 	smrp = mr_rp->mr_shared_mr;
5202 	mr_rp->mr_shared_mr = NULL;
5203 	mutex_exit(&mr_rp->mr_lock);
5204 
5205 	if (smrp != NULL) {
5206 		daplka_mr_resource_t	**mpp;
5207 		boolean_t		mr_found = B_FALSE;
5208 
5209 		mutex_enter(&daplka_shared_mr_lock);
5210 		ASSERT(smrp->smr_refcnt > 0);
5211 		while (smrp->smr_state == DAPLKA_SMR_TRANSITIONING) {
5212 			cv_wait(&smrp->smr_cv, &daplka_shared_mr_lock);
5213 		}
5214 		ASSERT(smrp->smr_state == DAPLKA_SMR_READY);
5215 		smrp->smr_state = DAPLKA_SMR_TRANSITIONING;
5216 		smrp->smr_refcnt--;
5217 
5218 		/*
5219 		 * search and remove mr_rp from smr_mr_list.
5220 		 * also UNREF mr_rp because it is no longer
5221 		 * on the list.
5222 		 */
5223 		mpp = &smrp->smr_mr_list;
5224 		while (*mpp != NULL) {
5225 			if (*mpp == mr_rp) {
5226 				*mpp = (*mpp)->mr_next;
5227 				DAPLKA_RS_UNREF(mr_rp);
5228 				mr_rp->mr_next = NULL;
5229 				mr_found = B_TRUE;
5230 				break;
5231 			}
5232 			mpp = &(*mpp)->mr_next;
5233 		}
5234 		/*
5235 		 * since mr_clean_callback may not touch smr_mr_list
5236 		 * at this time (due to smr_state), we can be sure
5237 		 * that we can find and remove mr_rp from smr_mr_list
5238 		 */
5239 		ASSERT(mr_found);
5240 		if (smrp->smr_refcnt == 0) {
5241 			D3("shared_mr_free: freeing smrp 0x%p\n", smrp);
5242 			avl_remove(&daplka_shared_mr_tree, smrp);
5243 			ASSERT(smrp->smr_mr_list == NULL);
5244 			smrp->smr_state = DAPLKA_SMR_FREED;
5245 			cv_destroy(&smrp->smr_cv);
5246 			kmem_free(smrp, sizeof (daplka_shared_mr_t));
5247 		} else {
5248 			D3("shared_mr_free: smrp 0x%p, refcnt %d\n",
5249 			    smrp, smrp->smr_refcnt);
5250 			smrp->smr_state = DAPLKA_SMR_READY;
5251 			cv_broadcast(&smrp->smr_cv);
5252 		}
5253 		mutex_exit(&daplka_shared_mr_lock);
5254 	}
5255 }
5256 
5257 /*
5258  * deregisters a memory region.
5259  * if mr is shared, remove reference from global shared mr object.
5260  * release the initial reference to the mr. if the mr's refcnt is
5261  * zero, call mr_destroy to free mr.
5262  */
5263 /* ARGSUSED */
5264 static int
daplka_mr_deregister(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5265 daplka_mr_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5266 	cred_t *cred, int *rvalp)
5267 {
5268 	daplka_mr_resource_t	*mr_rp;
5269 	dapl_mr_deregister_t	args;
5270 	int 			retval;
5271 
5272 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_deregister_t),
5273 	    mode);
5274 	if (retval != 0) {
5275 		DERR("mr_deregister: copyin error %d\n", retval);
5276 		return (EINVAL);
5277 	}
5278 	retval = daplka_hash_remove(&ia_rp->ia_mr_htbl,
5279 	    args.mrd_hkey, (void **)&mr_rp);
5280 	if (retval != 0 || mr_rp == NULL) {
5281 		DERR("mr_deregister: cannot find mr resource\n");
5282 		return (EINVAL);
5283 	}
5284 	ASSERT(DAPLKA_RS_TYPE(mr_rp) == DAPL_TYPE_MR);
5285 
5286 	daplka_shared_mr_free(mr_rp);
5287 	DAPLKA_RS_UNREF(mr_rp);
5288 	return (0);
5289 }
5290 
5291 /*
5292  * sync local memory regions on RDMA read or write.
5293  */
5294 /* ARGSUSED */
5295 static int
daplka_mr_sync(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5296 daplka_mr_sync(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5297 	cred_t *cred, int *rvalp)
5298 {
5299 	dapl_mr_sync_t	args;
5300 	daplka_mr_resource_t *mr_rp[DAPL_MR_PER_SYNC];
5301 	ibt_mr_sync_t	mrs[DAPL_MR_PER_SYNC];
5302 	uint32_t	sync_direction_flags;
5303 	ibt_status_t	status;
5304 	int		i, j;
5305 	int		retval;
5306 
5307 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mr_sync_t), mode);
5308 	if (retval != 0) {
5309 		DERR("mr_sync: copyin error %d\n", retval);
5310 		return (EFAULT);
5311 	}
5312 
5313 	/* number of segments bound check */
5314 	if (args.mrs_numseg > DAPL_MR_PER_SYNC) {
5315 		DERR("mr_sync: number of segments too large\n");
5316 		return (EINVAL);
5317 	}
5318 
5319 	/* translate MR sync direction flag */
5320 	if (args.mrs_flags == DAPL_MR_SYNC_RDMA_RD) {
5321 		sync_direction_flags = IBT_SYNC_READ;
5322 	} else if (args.mrs_flags == DAPL_MR_SYNC_RDMA_WR) {
5323 		sync_direction_flags = IBT_SYNC_WRITE;
5324 	} else {
5325 		DERR("mr_sync: unknown flags\n");
5326 		return (EINVAL);
5327 	}
5328 
5329 	/*
5330 	 * all the segments are going to be sync'd by ibtl together
5331 	 */
5332 	for (i = 0; i < args.mrs_numseg; i++) {
5333 		mr_rp[i] = (daplka_mr_resource_t *)daplka_hash_lookup(
5334 		    &ia_rp->ia_mr_htbl, args.mrs_vec[i].mrsv_hkey);
5335 		if (mr_rp[i] == NULL) {
5336 			for (j = 0; j < i; j++) {
5337 				DAPLKA_RS_UNREF(mr_rp[j]);
5338 			}
5339 			DERR("mr_sync: lookup error\n");
5340 			return (EINVAL);
5341 		}
5342 		ASSERT(DAPLKA_RS_TYPE(mr_rp[i]) == DAPL_TYPE_MR);
5343 		mrs[i].ms_handle = mr_rp[i]->mr_hdl;
5344 		mrs[i].ms_vaddr = args.mrs_vec[i].mrsv_va;
5345 		mrs[i].ms_len = args.mrs_vec[i].mrsv_len;
5346 		mrs[i].ms_flags = sync_direction_flags;
5347 	}
5348 
5349 	status = ibt_sync_mr(ia_rp->ia_hca_hdl, mrs, args.mrs_numseg);
5350 	if (status != IBT_SUCCESS) {
5351 		DERR("mr_sync: ibt_sync_mr error %d\n", status);
5352 		*rvalp = (int)status;
5353 	}
5354 	for (i = 0; i < args.mrs_numseg; i++) {
5355 		DAPLKA_RS_UNREF(mr_rp[i]);
5356 	}
5357 	return (0);
5358 }
5359 
5360 /*
5361  * destroys a memory region.
5362  * called when refcnt drops to zero.
5363  */
5364 static int
daplka_mr_destroy(daplka_resource_t * gen_rp)5365 daplka_mr_destroy(daplka_resource_t *gen_rp)
5366 {
5367 	daplka_mr_resource_t	*mr_rp = (daplka_mr_resource_t *)gen_rp;
5368 	ibt_status_t		status;
5369 
5370 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr_rp))
5371 	ASSERT(DAPLKA_RS_REFCNT(mr_rp) == 0);
5372 	ASSERT(mr_rp->mr_shared_mr == NULL);
5373 	D3("mr_destroy: entering, mr_rp 0x%p, rnum %d\n",
5374 	    mr_rp, DAPLKA_RS_RNUM(mr_rp));
5375 
5376 	/*
5377 	 * deregister mr
5378 	 */
5379 	if (mr_rp->mr_hdl) {
5380 		status = daplka_ibt_deregister_mr(mr_rp, mr_rp->mr_hca_hdl,
5381 		    mr_rp->mr_hdl);
5382 		if (status != IBT_SUCCESS) {
5383 			DERR("mr_destroy: ibt_deregister_mr returned %d\n",
5384 			    status);
5385 		}
5386 		mr_rp->mr_hdl = NULL;
5387 		D3("mr_destroy: mr deregistered\n");
5388 	}
5389 	mr_rp->mr_attr.mr_vaddr = NULL;
5390 
5391 	/*
5392 	 * release reference on PD
5393 	 */
5394 	if (mr_rp->mr_pd_res != NULL) {
5395 		DAPLKA_RS_UNREF(mr_rp->mr_pd_res);
5396 		mr_rp->mr_pd_res = NULL;
5397 	}
5398 	mutex_destroy(&mr_rp->mr_lock);
5399 	DAPLKA_RS_FINI(mr_rp);
5400 	kmem_free(mr_rp, sizeof (daplka_mr_resource_t));
5401 	D3("mr_destroy: exiting, mr_rp 0x%p\n", mr_rp);
5402 	return (0);
5403 }
5404 
5405 /*
5406  * this function is called by daplka_hash_destroy for
5407  * freeing MR resource objects
5408  */
5409 static void
daplka_hash_mr_free(void * obj)5410 daplka_hash_mr_free(void *obj)
5411 {
5412 	daplka_mr_resource_t	*mr_rp = (daplka_mr_resource_t *)obj;
5413 
5414 	daplka_shared_mr_free(mr_rp);
5415 	DAPLKA_RS_UNREF(mr_rp);
5416 }
5417 
5418 /*
5419  * comparison function used for finding a shared mr object
5420  * from the global shared mr avl tree.
5421  */
5422 static int
daplka_shared_mr_cmp(const void * smr1,const void * smr2)5423 daplka_shared_mr_cmp(const void *smr1, const void *smr2)
5424 {
5425 	daplka_shared_mr_t	*s1 = (daplka_shared_mr_t *)smr1;
5426 	daplka_shared_mr_t	*s2 = (daplka_shared_mr_t *)smr2;
5427 	int i;
5428 
5429 	for (i = 4; i >= 0; i--) {
5430 		if (s1->smr_cookie.mc_uint_arr[i] <
5431 		    s2->smr_cookie.mc_uint_arr[i]) {
5432 			return (-1);
5433 		}
5434 		if (s1->smr_cookie.mc_uint_arr[i] >
5435 		    s2->smr_cookie.mc_uint_arr[i]) {
5436 			return (1);
5437 		}
5438 	}
5439 	return (0);
5440 }
5441 
5442 /*
5443  * allocates a protection domain.
5444  */
5445 /* ARGSUSED */
5446 static int
daplka_pd_alloc(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5447 daplka_pd_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5448 	cred_t *cred, int *rvalp)
5449 {
5450 	dapl_pd_alloc_t		args;
5451 	daplka_pd_resource_t	*pd_rp;
5452 	ibt_status_t		status;
5453 	uint64_t		pd_hkey = 0;
5454 	boolean_t		inserted = B_FALSE;
5455 	int			retval;
5456 
5457 	pd_rp = kmem_zalloc(sizeof (*pd_rp), daplka_km_flags);
5458 	if (pd_rp == NULL) {
5459 		DERR("pd_alloc: cannot allocate pd resource\n");
5460 		return (ENOMEM);
5461 	}
5462 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5463 	DAPLKA_RS_INIT(pd_rp, DAPL_TYPE_PD,
5464 	    DAPLKA_RS_RNUM(ia_rp), daplka_pd_destroy);
5465 
5466 	pd_rp->pd_hca = ia_rp->ia_hca;
5467 	pd_rp->pd_hca_hdl = ia_rp->ia_hca_hdl;
5468 	status = daplka_ibt_alloc_pd(pd_rp, pd_rp->pd_hca_hdl,
5469 	    IBT_PD_NO_FLAGS, &pd_rp->pd_hdl);
5470 	if (status != IBT_SUCCESS) {
5471 		DERR("pd_alloc: ibt_alloc_pd returned %d\n", status);
5472 		*rvalp = (int)status;
5473 		retval = 0;
5474 		goto cleanup;
5475 	}
5476 
5477 	/* insert into pd hash table */
5478 	retval = daplka_hash_insert(&ia_rp->ia_pd_htbl,
5479 	    &pd_hkey, (void *)pd_rp);
5480 	if (retval != 0) {
5481 		DERR("pd_alloc: cannot insert pd resource into pd_htbl\n");
5482 		goto cleanup;
5483 	}
5484 	inserted = B_TRUE;
5485 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*pd_rp))
5486 
5487 	/* return hkey to library */
5488 	args.pda_hkey = pd_hkey;
5489 
5490 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_pd_alloc_t),
5491 	    mode);
5492 	if (retval != 0) {
5493 		DERR("pd_alloc: copyout error %d\n", retval);
5494 		retval = EFAULT;
5495 		goto cleanup;
5496 	}
5497 	return (0);
5498 
5499 cleanup:;
5500 	if (inserted) {
5501 		daplka_pd_resource_t *free_rp = NULL;
5502 
5503 		(void) daplka_hash_remove(&ia_rp->ia_pd_htbl, pd_hkey,
5504 		    (void **)&free_rp);
5505 		if (free_rp != pd_rp) {
5506 			DERR("pd_alloc: cannot remove pd from hash table\n");
5507 			/*
5508 			 * we can only get here if another thread
5509 			 * has completed the cleanup in pd_free
5510 			 */
5511 			return (retval);
5512 		}
5513 	}
5514 	DAPLKA_RS_UNREF(pd_rp);
5515 	return (retval);
5516 }
5517 
5518 /*
5519  * destroys a protection domain.
5520  * called when refcnt drops to zero.
5521  */
5522 static int
daplka_pd_destroy(daplka_resource_t * gen_rp)5523 daplka_pd_destroy(daplka_resource_t *gen_rp)
5524 {
5525 	daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)gen_rp;
5526 	ibt_status_t status;
5527 
5528 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*pd_rp))
5529 	ASSERT(DAPLKA_RS_REFCNT(pd_rp) == 0);
5530 	D3("pd_destroy: entering, pd_rp %p, rnum %d\n",
5531 	    pd_rp, DAPLKA_RS_RNUM(pd_rp));
5532 
5533 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5534 	if (pd_rp->pd_hdl != NULL) {
5535 		status = daplka_ibt_free_pd(pd_rp, pd_rp->pd_hca_hdl,
5536 		    pd_rp->pd_hdl);
5537 		if (status != IBT_SUCCESS) {
5538 			DERR("pd_destroy: ibt_free_pd returned %d\n", status);
5539 		}
5540 	}
5541 	DAPLKA_RS_FINI(pd_rp);
5542 	kmem_free(pd_rp, sizeof (daplka_pd_resource_t));
5543 	D3("pd_destroy: exiting, pd_rp %p\n", pd_rp);
5544 	return (0);
5545 }
5546 
5547 static void
daplka_hash_pd_free(void * obj)5548 daplka_hash_pd_free(void *obj)
5549 {
5550 	daplka_pd_resource_t *pd_rp = (daplka_pd_resource_t *)obj;
5551 
5552 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5553 	DAPLKA_RS_UNREF(pd_rp);
5554 }
5555 
5556 /*
5557  * removes the pd reference from ia_pd_htbl and releases the
5558  * initial reference to the pd. also destroys the pd if the refcnt
5559  * is zero.
5560  */
5561 /* ARGSUSED */
5562 static int
daplka_pd_free(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5563 daplka_pd_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5564 	cred_t *cred, int *rvalp)
5565 {
5566 	daplka_pd_resource_t *pd_rp;
5567 	dapl_pd_free_t args;
5568 	int retval;
5569 
5570 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_pd_free_t), mode);
5571 	if (retval != 0) {
5572 		DERR("pd_free: copyin error %d\n", retval);
5573 		return (EINVAL);
5574 	}
5575 
5576 	retval = daplka_hash_remove(&ia_rp->ia_pd_htbl,
5577 	    args.pdf_hkey, (void **)&pd_rp);
5578 	if (retval != 0 || pd_rp == NULL) {
5579 		DERR("pd_free: cannot find pd resource\n");
5580 		return (EINVAL);
5581 	}
5582 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5583 
5584 	/* UNREF calls the actual free function when refcnt is zero */
5585 	DAPLKA_RS_UNREF(pd_rp);
5586 	return (0);
5587 }
5588 
5589 /*
5590  * allocates a memory window
5591  */
5592 /* ARGSUSED */
5593 static int
daplka_mw_alloc(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5594 daplka_mw_alloc(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5595 	cred_t *cred, int *rvalp)
5596 {
5597 	daplka_pd_resource_t	*pd_rp;
5598 	daplka_mw_resource_t	*mw_rp;
5599 	dapl_mw_alloc_t		args;
5600 	ibt_status_t		status;
5601 	boolean_t		inserted = B_FALSE;
5602 	uint64_t		mw_hkey;
5603 	ibt_rkey_t		mw_rkey;
5604 	int			retval;
5605 
5606 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_alloc_t), mode);
5607 	if (retval != 0) {
5608 		DERR("mw_alloc: copyin error %d\n", retval);
5609 		return (EFAULT);
5610 	}
5611 
5612 	/*
5613 	 * Allocate and initialize a MW resource
5614 	 */
5615 	mw_rp = kmem_zalloc(sizeof (daplka_mw_resource_t), daplka_km_flags);
5616 	if (mw_rp == NULL) {
5617 		DERR("mw_alloc: cannot allocate mw resource\n");
5618 		return (ENOMEM);
5619 	}
5620 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5621 	DAPLKA_RS_INIT(mw_rp, DAPL_TYPE_MW,
5622 	    DAPLKA_RS_RNUM(ia_rp), daplka_mw_destroy);
5623 
5624 	mutex_init(&mw_rp->mw_lock, NULL, MUTEX_DRIVER, NULL);
5625 	mw_rp->mw_hca = ia_rp->ia_hca;
5626 	mw_rp->mw_hca_hdl = ia_rp->ia_hca_hdl;
5627 
5628 	/* get pd handle */
5629 	pd_rp = (daplka_pd_resource_t *)
5630 	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.mw_pd_hkey);
5631 	if (pd_rp == NULL) {
5632 		DERR("mw_alloc: cannot find pd resource\n");
5633 		goto cleanup;
5634 	}
5635 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5636 
5637 	mw_rp->mw_pd_res = pd_rp;
5638 
5639 	status = daplka_ibt_alloc_mw(mw_rp, mw_rp->mw_hca_hdl,
5640 	    pd_rp->pd_hdl, IBT_MW_NOSLEEP, &mw_rp->mw_hdl, &mw_rkey);
5641 
5642 	if (status != IBT_SUCCESS) {
5643 		DERR("mw_alloc: ibt_alloc_mw returned %d\n", status);
5644 		*rvalp = (int)status;
5645 		retval = 0;
5646 		goto cleanup;
5647 	}
5648 
5649 	mutex_enter(&ia_rp->ia_lock);
5650 	switch (ia_rp->ia_state) {
5651 	case DAPLKA_IA_INIT:
5652 		ia_rp->ia_state = DAPLKA_IA_MW_ALLOC_IN_PROGRESS;
5653 		ia_rp->ia_mw_alloccnt++;
5654 		retval = 0;
5655 		break;
5656 	case DAPLKA_IA_MW_ALLOC_IN_PROGRESS:
5657 		/* another mw_alloc is already in progress increase cnt */
5658 		ia_rp->ia_mw_alloccnt++;
5659 		retval = 0;
5660 		break;
5661 	case DAPLKA_IA_MW_FREEZE_IN_PROGRESS:
5662 		/* FALLTHRU */
5663 	case DAPLKA_IA_MW_FROZEN:
5664 		/*
5665 		 * IA is being or already frozen don't allow more MWs to be
5666 		 * allocated.
5667 		 */
5668 		DERR("mw_alloc:	IA is freezing MWs (state=%d)\n",
5669 		    ia_rp->ia_state);
5670 		retval = EINVAL;
5671 		break;
5672 	default:
5673 		ASSERT(!"Invalid IA state in mw_alloc");
5674 		DERR("mw_alloc:	IA state=%d invalid\n", ia_rp->ia_state);
5675 		retval = EINVAL;
5676 		break;
5677 	}
5678 	mutex_exit(&ia_rp->ia_lock);
5679 	/* retval is 0 when ia_mw_alloccnt is incremented */
5680 	if (retval != 0) {
5681 		goto cleanup;
5682 	}
5683 
5684 	/* insert into mw hash table */
5685 	mw_hkey = 0;
5686 	retval = daplka_hash_insert(&ia_rp->ia_mw_htbl, &mw_hkey,
5687 	    (void *)mw_rp);
5688 	if (retval != 0) {
5689 		DERR("mw_alloc: cannot insert mw resource into mw_htbl\n");
5690 		mutex_enter(&ia_rp->ia_lock);
5691 		ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5692 		ia_rp->ia_mw_alloccnt--;
5693 		if (ia_rp->ia_mw_alloccnt == 0) {
5694 			ia_rp->ia_state = DAPLKA_IA_INIT;
5695 			cv_broadcast(&ia_rp->ia_cv);
5696 		}
5697 		mutex_exit(&ia_rp->ia_lock);
5698 		goto cleanup;
5699 	}
5700 	inserted = B_TRUE;
5701 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*mw_rp))
5702 
5703 	D3("mw_alloc: ibt_alloc_mw mw_hdl(%p) mw_rkey(0x%llx)\n",
5704 	    mw_rp->mw_hdl, (longlong_t)mw_rkey);
5705 
5706 	mutex_enter(&ia_rp->ia_lock);
5707 	/*
5708 	 * We are done with mw_alloc if this was the last mw_alloc
5709 	 * change state back to DAPLKA_IA_INIT and wake up waiters
5710 	 * specifically the unlock callback.
5711 	 */
5712 	ASSERT(ia_rp->ia_state == DAPLKA_IA_MW_ALLOC_IN_PROGRESS);
5713 	ia_rp->ia_mw_alloccnt--;
5714 	if (ia_rp->ia_mw_alloccnt == 0) {
5715 		ia_rp->ia_state = DAPLKA_IA_INIT;
5716 		cv_broadcast(&ia_rp->ia_cv);
5717 	}
5718 	mutex_exit(&ia_rp->ia_lock);
5719 
5720 	args.mw_hkey = mw_hkey;
5721 	args.mw_rkey = mw_rkey;
5722 
5723 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_mw_alloc_t),
5724 	    mode);
5725 	if (retval != 0) {
5726 		DERR("mw_alloc: copyout error %d\n", retval);
5727 		retval = EFAULT;
5728 		goto cleanup;
5729 	}
5730 	return (0);
5731 
5732 cleanup:;
5733 	if (inserted) {
5734 		daplka_mw_resource_t *free_rp = NULL;
5735 
5736 		(void) daplka_hash_remove(&ia_rp->ia_mw_htbl, mw_hkey,
5737 		    (void **)&free_rp);
5738 		if (free_rp != mw_rp) {
5739 			DERR("mw_alloc: cannot remove mw from hash table\n");
5740 			/*
5741 			 * we can only get here if another thread
5742 			 * has completed the cleanup in mw_free
5743 			 */
5744 			return (retval);
5745 		}
5746 	}
5747 	DAPLKA_RS_UNREF(mw_rp);
5748 	return (retval);
5749 }
5750 
5751 /*
5752  * removes the mw reference from ia_mw_htbl and releases the
5753  * initial reference to the mw. also destroys the mw if the refcnt
5754  * is zero.
5755  */
5756 /* ARGSUSED */
5757 static int
daplka_mw_free(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5758 daplka_mw_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5759 	cred_t *cred, int *rvalp)
5760 {
5761 	daplka_mw_resource_t	*mw_rp = NULL;
5762 	dapl_mw_free_t		args;
5763 	int			retval = 0;
5764 
5765 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_mw_free_t), mode);
5766 	if (retval != 0) {
5767 		DERR("mw_free: copyin error %d\n", retval);
5768 		return (EFAULT);
5769 	}
5770 
5771 	retval = daplka_hash_remove(&ia_rp->ia_mw_htbl, args.mw_hkey,
5772 	    (void **)&mw_rp);
5773 	if (retval != 0 || mw_rp == NULL) {
5774 		DERR("mw_free: cannot find mw resrc (0x%llx)\n",
5775 		    (longlong_t)args.mw_hkey);
5776 		return (EINVAL);
5777 	}
5778 
5779 	ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5780 
5781 	/* UNREF calls the actual free function when refcnt is zero */
5782 	DAPLKA_RS_UNREF(mw_rp);
5783 	return (retval);
5784 }
5785 
5786 /*
5787  * destroys the memory window.
5788  * called when refcnt drops to zero.
5789  */
5790 static int
daplka_mw_destroy(daplka_resource_t * gen_rp)5791 daplka_mw_destroy(daplka_resource_t *gen_rp)
5792 {
5793 	daplka_mw_resource_t	*mw_rp = (daplka_mw_resource_t *)gen_rp;
5794 	ibt_status_t		status;
5795 
5796 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw_rp))
5797 	ASSERT(DAPLKA_RS_REFCNT(mw_rp) == 0);
5798 	D3("mw_destroy: entering, mw_rp 0x%p, rnum %d\n",
5799 	    mw_rp, DAPLKA_RS_RNUM(mw_rp));
5800 
5801 	/*
5802 	 * free memory window
5803 	 */
5804 	if (mw_rp->mw_hdl) {
5805 		status = daplka_ibt_free_mw(mw_rp, mw_rp->mw_hca_hdl,
5806 		    mw_rp->mw_hdl);
5807 		if (status != IBT_SUCCESS) {
5808 			DERR("mw_destroy: ibt_free_mw returned %d\n", status);
5809 		}
5810 		mw_rp->mw_hdl = NULL;
5811 		D3("mw_destroy: mw freed\n");
5812 	}
5813 
5814 	/*
5815 	 * release reference on PD
5816 	 */
5817 	if (mw_rp->mw_pd_res != NULL) {
5818 		DAPLKA_RS_UNREF(mw_rp->mw_pd_res);
5819 		mw_rp->mw_pd_res = NULL;
5820 	}
5821 	mutex_destroy(&mw_rp->mw_lock);
5822 	DAPLKA_RS_FINI(mw_rp);
5823 	kmem_free(mw_rp, sizeof (daplka_mw_resource_t));
5824 	D3("mw_destroy: exiting, mw_rp 0x%p\n", mw_rp);
5825 	return (0);
5826 }
5827 
5828 static void
daplka_hash_mw_free(void * obj)5829 daplka_hash_mw_free(void *obj)
5830 {
5831 	daplka_mw_resource_t *mw_rp = (daplka_mw_resource_t *)obj;
5832 
5833 	ASSERT(DAPLKA_RS_TYPE(mw_rp) == DAPL_TYPE_MW);
5834 	DAPLKA_RS_UNREF(mw_rp);
5835 }
5836 
5837 /*
5838  * SRQ ioctls and supporting functions
5839  */
5840 /* ARGSUSED */
5841 static int
daplka_srq_create(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5842 daplka_srq_create(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5843     cred_t *cred, int *rvalp)
5844 {
5845 	daplka_srq_resource_t		*srq_rp;
5846 	daplka_pd_resource_t		*pd_rp;
5847 	dapl_srq_create_t		args;
5848 	ibt_srq_sizes_t			srq_sizes;
5849 	ibt_srq_sizes_t			srq_real_sizes;
5850 	ibt_hca_attr_t			*hca_attrp;
5851 	uint64_t			srq_hkey = 0;
5852 	boolean_t			inserted = B_FALSE;
5853 	int				retval;
5854 	ibt_status_t			status;
5855 
5856 	D3("srq_create: enter\n");
5857 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_create_t),
5858 	    mode);
5859 	if (retval != 0) {
5860 		DERR("srq_create: copyin error %d\n", retval);
5861 		return (EFAULT);
5862 	}
5863 	srq_rp = kmem_zalloc(sizeof (daplka_srq_resource_t), daplka_km_flags);
5864 	if (srq_rp == NULL) {
5865 		DERR("srq_create: cannot allocate ep_rp\n");
5866 		return (ENOMEM);
5867 	}
5868 	DAPLKA_RS_INIT(srq_rp, DAPL_TYPE_SRQ,
5869 	    DAPLKA_RS_RNUM(ia_rp), daplka_srq_destroy);
5870 
5871 	srq_rp->srq_hca = ia_rp->ia_hca;
5872 	srq_rp->srq_hca_hdl = ia_rp->ia_hca_hdl;
5873 	mutex_init(&srq_rp->srq_lock, NULL, MUTEX_DRIVER, NULL);
5874 
5875 	/* get pd handle */
5876 	pd_rp = (daplka_pd_resource_t *)
5877 	    daplka_hash_lookup(&ia_rp->ia_pd_htbl, args.srqc_pd_hkey);
5878 	if (pd_rp == NULL) {
5879 		DERR("srq_create: cannot find pd resource\n");
5880 		retval = EINVAL;
5881 		goto cleanup;
5882 	}
5883 	ASSERT(DAPLKA_RS_TYPE(pd_rp) == DAPL_TYPE_PD);
5884 	srq_rp->srq_pd_res = pd_rp;
5885 
5886 	/*
5887 	 * these checks ensure that the requested SRQ sizes
5888 	 * are within the limits supported by the chosen HCA.
5889 	 */
5890 	hca_attrp = &ia_rp->ia_hca->hca_attr;
5891 	if (args.srqc_sizes.srqs_sz > hca_attrp->hca_max_srqs_sz) {
5892 		DERR("srq_create: invalid srqs_sz %d\n",
5893 		    args.srqc_sizes.srqs_sz);
5894 		retval = EINVAL;
5895 		goto cleanup;
5896 	}
5897 	if (args.srqc_sizes.srqs_sgl > hca_attrp->hca_max_srq_sgl) {
5898 		DERR("srq_create: invalid srqs_sgl %d\n",
5899 		    args.srqc_sizes.srqs_sgl);
5900 		retval = EINVAL;
5901 		goto cleanup;
5902 	}
5903 
5904 	D3("srq_create: srq_sgl %d, srq_sz %d\n",
5905 	    args.srqc_sizes.srqs_sgl, args.srqc_sizes.srqs_sz);
5906 
5907 	srq_sizes.srq_wr_sz = args.srqc_sizes.srqs_sz;
5908 	srq_sizes.srq_sgl_sz = args.srqc_sizes.srqs_sgl;
5909 
5910 	/* create srq */
5911 	status = daplka_ibt_alloc_srq(srq_rp, ia_rp->ia_hca_hdl,
5912 	    IBT_SRQ_USER_MAP, pd_rp->pd_hdl, &srq_sizes, &srq_rp->srq_hdl,
5913 	    &srq_real_sizes);
5914 	if (status != IBT_SUCCESS) {
5915 		DERR("srq_create: alloc_srq returned %d\n", status);
5916 		*rvalp = (int)status;
5917 		retval = 0;
5918 		goto cleanup;
5919 	}
5920 
5921 	args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5922 	args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5923 
5924 	/* Get HCA-specific data_out info */
5925 	status = ibt_ci_data_out(ia_rp->ia_hca_hdl,
5926 	    IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
5927 	    &args.srqc_data_out, sizeof (args.srqc_data_out));
5928 
5929 	if (status != IBT_SUCCESS) {
5930 		DERR("srq_create: ibt_ci_data_out error(%d)\n", status);
5931 		*rvalp = (int)status;
5932 		retval = 0;
5933 		goto cleanup;
5934 	}
5935 
5936 	srq_rp->srq_real_size = srq_real_sizes.srq_wr_sz;
5937 
5938 	/* preparing to copyout map_data back to the library */
5939 	args.srqc_real_sizes.srqs_sz = srq_real_sizes.srq_wr_sz;
5940 	args.srqc_real_sizes.srqs_sgl = srq_real_sizes.srq_sgl_sz;
5941 
5942 	/* insert into srq hash table */
5943 	retval = daplka_hash_insert(&ia_rp->ia_srq_htbl,
5944 	    &srq_hkey, (void *)srq_rp);
5945 	if (retval != 0) {
5946 		DERR("srq_create: cannot insert srq resource into srq_htbl\n");
5947 		goto cleanup;
5948 	}
5949 	inserted = B_TRUE;
5950 
5951 	/* return hkey to library */
5952 	args.srqc_hkey = srq_hkey;
5953 
5954 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_create_t),
5955 	    mode);
5956 	if (retval != 0) {
5957 		DERR("srq_create: copyout error %d\n", retval);
5958 		retval = EFAULT;
5959 		goto cleanup;
5960 	}
5961 
5962 	D3("srq_create: %p, 0x%llx\n", srq_rp->srq_hdl, (longlong_t)srq_hkey);
5963 	D3("	sz(%d) sgl(%d)\n",
5964 	    args.srqc_real_sizes.srqs_sz, args.srqc_real_sizes.srqs_sgl);
5965 	D3("srq_create: exit\n");
5966 	return (0);
5967 
5968 cleanup:
5969 	if (inserted) {
5970 		daplka_srq_resource_t *free_rp = NULL;
5971 
5972 		(void) daplka_hash_remove(&ia_rp->ia_srq_htbl, srq_hkey,
5973 		    (void **)&free_rp);
5974 		if (free_rp != srq_rp) {
5975 			/*
5976 			 * this case is impossible because ep_free will
5977 			 * wait until our state transition is complete.
5978 			 */
5979 			DERR("srq_create: cannot remove srq from hash table\n");
5980 			ASSERT(B_FALSE);
5981 			return (retval);
5982 		}
5983 	}
5984 	DAPLKA_RS_UNREF(srq_rp);
5985 	return (retval);
5986 }
5987 
5988 /*
5989  * Resize an existing SRQ
5990  */
5991 /* ARGSUSED */
5992 static int
daplka_srq_resize(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)5993 daplka_srq_resize(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
5994     cred_t *cred, int *rvalp)
5995 {
5996 	daplka_srq_resource_t		*srq_rp = NULL;
5997 	ibt_hca_attr_t			*hca_attrp;
5998 	dapl_srq_resize_t		args;
5999 	ibt_status_t			status;
6000 	int				retval = 0;
6001 
6002 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_resize_t),
6003 	    mode);
6004 	if (retval != 0) {
6005 		DERR("srq_resize: copyin error %d\n", retval);
6006 		return (EFAULT);
6007 	}
6008 
6009 	/* get srq resource */
6010 	srq_rp = (daplka_srq_resource_t *)
6011 	    daplka_hash_lookup(&ia_rp->ia_srq_htbl, args.srqr_hkey);
6012 	if (srq_rp == NULL) {
6013 		DERR("srq_resize: cannot find srq resource\n");
6014 		return (EINVAL);
6015 	}
6016 	ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6017 
6018 	hca_attrp = &ia_rp->ia_hca->hca_attr;
6019 	if (args.srqr_new_size > hca_attrp->hca_max_srqs_sz) {
6020 		DERR("srq_resize: invalid srq size %d", args.srqr_new_size);
6021 		retval = EINVAL;
6022 		goto cleanup;
6023 	}
6024 
6025 	mutex_enter(&srq_rp->srq_lock);
6026 	/*
6027 	 * If ibt_resize_srq fails that it is primarily due to resource
6028 	 * shortage. Per IB spec resize will never loose events and
6029 	 * a resize error leaves the SRQ intact. Therefore even if the
6030 	 * resize request fails we proceed and get the mapping data
6031 	 * from the SRQ so that the library can mmap it.
6032 	 */
6033 	status = ibt_modify_srq(srq_rp->srq_hdl, IBT_SRQ_SET_SIZE,
6034 	    args.srqr_new_size, 0, &args.srqr_real_size);
6035 	if (status != IBT_SUCCESS) {
6036 		/* we return the size of the old CQ if resize fails */
6037 		args.srqr_real_size = srq_rp->srq_real_size;
6038 		ASSERT(status != IBT_SRQ_HDL_INVALID);
6039 		DERR("srq_resize: ibt_modify_srq failed:%d\n", status);
6040 	} else {
6041 		srq_rp->srq_real_size = args.srqr_real_size;
6042 	}
6043 	mutex_exit(&srq_rp->srq_lock);
6044 
6045 
6046 	D2("srq_resize(%d): done new_sz(%u) real_sz(%u)\n",
6047 	    DAPLKA_RS_RNUM(srq_rp), args.srqr_new_size, args.srqr_real_size);
6048 
6049 	/* Get HCA-specific data_out info */
6050 	status = ibt_ci_data_out(srq_rp->srq_hca_hdl,
6051 	    IBT_CI_NO_FLAGS, IBT_HDL_SRQ, (void *)srq_rp->srq_hdl,
6052 	    &args.srqr_data_out, sizeof (args.srqr_data_out));
6053 	if (status != IBT_SUCCESS) {
6054 		DERR("srq_resize: ibt_ci_data_out error(%d)\n", status);
6055 		/* return ibt_ci_data_out status */
6056 		*rvalp = (int)status;
6057 		retval = 0;
6058 		goto cleanup;
6059 	}
6060 
6061 	retval = ddi_copyout(&args, (void *)arg, sizeof (dapl_srq_resize_t),
6062 	    mode);
6063 	if (retval != 0) {
6064 		DERR("srq_resize: copyout error %d\n", retval);
6065 		retval = EFAULT;
6066 		goto cleanup;
6067 	}
6068 
6069 cleanup:;
6070 	if (srq_rp != NULL) {
6071 		DAPLKA_RS_UNREF(srq_rp);
6072 	}
6073 	return (retval);
6074 }
6075 
6076 /*
6077  * Frees an SRQ resource.
6078  */
6079 /* ARGSUSED */
6080 static int
daplka_srq_free(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)6081 daplka_srq_free(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6082     cred_t *cred, int *rvalp)
6083 {
6084 	daplka_srq_resource_t	*srq_rp = NULL;
6085 	dapl_srq_free_t		args;
6086 	int			retval;
6087 
6088 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_srq_free_t), mode);
6089 	if (retval != 0) {
6090 		DERR("srq_free: copyin error %d\n", retval);
6091 		return (EFAULT);
6092 	}
6093 
6094 	retval = daplka_hash_remove(&ia_rp->ia_srq_htbl,
6095 	    args.srqf_hkey, (void **)&srq_rp);
6096 	if (retval != 0 || srq_rp == NULL) {
6097 		/*
6098 		 * this is only possible if we have two threads
6099 		 * calling ep_free in parallel.
6100 		 */
6101 		DERR("srq_free: cannot find resource retval(%d) 0x%llx\n",
6102 		    retval, args.srqf_hkey);
6103 		return (EINVAL);
6104 	}
6105 
6106 	/* UNREF calls the actual free function when refcnt is zero */
6107 	DAPLKA_RS_UNREF(srq_rp);
6108 	return (0);
6109 }
6110 
6111 /*
6112  * destroys a SRQ resource.
6113  * called when refcnt drops to zero.
6114  */
6115 static int
daplka_srq_destroy(daplka_resource_t * gen_rp)6116 daplka_srq_destroy(daplka_resource_t *gen_rp)
6117 {
6118 	daplka_srq_resource_t	*srq_rp = (daplka_srq_resource_t *)gen_rp;
6119 	ibt_status_t		status;
6120 
6121 	ASSERT(DAPLKA_RS_REFCNT(srq_rp) == 0);
6122 
6123 	D3("srq_destroy: entering, srq_rp 0x%p, rnum %d\n",
6124 	    srq_rp, DAPLKA_RS_RNUM(srq_rp));
6125 	/*
6126 	 * destroy the srq
6127 	 */
6128 	if (srq_rp->srq_hdl != NULL) {
6129 		status = daplka_ibt_free_srq(srq_rp, srq_rp->srq_hdl);
6130 		if (status != IBT_SUCCESS) {
6131 			DERR("srq_destroy: ibt_free_srq returned %d\n",
6132 			    status);
6133 		}
6134 		srq_rp->srq_hdl = NULL;
6135 		D3("srq_destroy: srq freed, rnum %d\n", DAPLKA_RS_RNUM(srq_rp));
6136 	}
6137 	/*
6138 	 * release all references
6139 	 */
6140 	if (srq_rp->srq_pd_res != NULL) {
6141 		DAPLKA_RS_UNREF(srq_rp->srq_pd_res);
6142 		srq_rp->srq_pd_res = NULL;
6143 	}
6144 
6145 	mutex_destroy(&srq_rp->srq_lock);
6146 	DAPLKA_RS_FINI(srq_rp);
6147 	kmem_free(srq_rp, sizeof (daplka_srq_resource_t));
6148 	D3("srq_destroy: exiting, srq_rp 0x%p\n", srq_rp);
6149 	return (0);
6150 }
6151 
6152 static void
daplka_hash_srq_free(void * obj)6153 daplka_hash_srq_free(void *obj)
6154 {
6155 	daplka_srq_resource_t *srq_rp = (daplka_srq_resource_t *)obj;
6156 
6157 	ASSERT(DAPLKA_RS_TYPE(srq_rp) == DAPL_TYPE_SRQ);
6158 	DAPLKA_RS_UNREF(srq_rp);
6159 }
6160 
6161 /*
6162  * This function tells the CM to start listening on a service id.
6163  * It must be called by the passive side client before the client
6164  * can receive connection requests from remote endpoints. If the
6165  * client specifies a non-zero service id (connection qualifier in
6166  * dapl terms), this function will attempt to bind to this service
6167  * id and return an error if the id is already in use. If the client
6168  * specifies zero as the service id, this function will try to find
6169  * the next available service id and return it back to the client.
6170  * To support the cr_handoff function, this function will, in addition
6171  * to creating and inserting an SP resource into the per-IA SP hash
6172  * table, insert the SP resource into a global SP table. This table
6173  * maintains all active service points created by all dapl clients.
6174  * CR handoff locates the target SP by iterating through this global
6175  * table.
6176  */
6177 /* ARGSUSED */
6178 static int
daplka_service_register(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)6179 daplka_service_register(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6180 	cred_t *cred, int *rvalp)
6181 {
6182 	daplka_evd_resource_t	*evd_rp = NULL;
6183 	daplka_sp_resource_t	*sp_rp = NULL;
6184 	dapl_service_register_t	args;
6185 	ibt_srv_desc_t		sd_args;
6186 	ibt_srv_bind_t		sb_args;
6187 	ibt_status_t		status;
6188 	ib_svc_id_t		retsid = 0;
6189 	uint64_t		sp_hkey = 0;
6190 	boolean_t		bumped = B_FALSE;
6191 	int			backlog_size;
6192 	int			retval = 0;
6193 
6194 	retval = ddi_copyin((void *)arg, &args,
6195 	    sizeof (dapl_service_register_t), mode);
6196 	if (retval != 0) {
6197 		DERR("service_register: copyin error %d\n", retval);
6198 		return (EINVAL);
6199 	}
6200 
6201 	sp_rp = kmem_zalloc(sizeof (*sp_rp), daplka_km_flags);
6202 	if (sp_rp == NULL) {
6203 		DERR("service_register: cannot allocate sp resource\n");
6204 		return (ENOMEM);
6205 	}
6206 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6207 	DAPLKA_RS_INIT(sp_rp, DAPL_TYPE_SP,
6208 	    DAPLKA_RS_RNUM(ia_rp), daplka_sp_destroy);
6209 
6210 	/* check if evd exists */
6211 	evd_rp = (daplka_evd_resource_t *)
6212 	    daplka_hash_lookup(&ia_rp->ia_evd_htbl, args.sr_evd_hkey);
6213 	if (evd_rp == NULL) {
6214 		DERR("service_register: evd resource not found\n");
6215 		retval = EINVAL;
6216 		goto cleanup;
6217 	}
6218 	/*
6219 	 * initialize backlog size
6220 	 */
6221 	if (evd_rp && evd_rp->evd_cq_real_size > 0) {
6222 		backlog_size = evd_rp->evd_cq_real_size + 1;
6223 	} else {
6224 		backlog_size = DAPLKA_DEFAULT_SP_BACKLOG;
6225 	}
6226 	D2("service_register: args.sr_sid = %llu\n", (longlong_t)args.sr_sid);
6227 
6228 	/* save the userland sp ptr */
6229 	sp_rp->sp_cookie = args.sr_sp_cookie;
6230 	sp_rp->sp_backlog_size = backlog_size;
6231 	D3("service_register: backlog set to %d\n", sp_rp->sp_backlog_size);
6232 	sp_rp->sp_backlog = kmem_zalloc(sp_rp->sp_backlog_size *
6233 	    sizeof (daplka_sp_conn_pend_t), daplka_km_flags);
6234 
6235 	/* save evd resource pointer */
6236 	sp_rp->sp_evd_res = evd_rp;
6237 
6238 	/*
6239 	 * save ruid here so that we can do a comparison later
6240 	 * when someone does cr_handoff. the check will prevent
6241 	 * a malicious app from passing a CR to us.
6242 	 */
6243 	sp_rp->sp_ruid = crgetruid(cred);
6244 
6245 	/* fill in args for register_service */
6246 	sd_args.sd_ud_handler = NULL;
6247 	sd_args.sd_handler = daplka_cm_service_handler;
6248 	sd_args.sd_flags = IBT_SRV_NO_FLAGS;
6249 
6250 	status = ibt_register_service(daplka_dev->daplka_clnt_hdl,
6251 	    &sd_args, args.sr_sid, 1, &sp_rp->sp_srv_hdl, &retsid);
6252 
6253 	if (status != IBT_SUCCESS) {
6254 		DERR("service_register: ibt_register_service returned %d\n",
6255 		    status);
6256 		*rvalp = (int)status;
6257 		retval = 0;
6258 		goto cleanup;
6259 	}
6260 	/* save returned sid */
6261 	sp_rp->sp_conn_qual = retsid;
6262 	args.sr_retsid = retsid;
6263 
6264 	/* fill in args for bind_service */
6265 	sb_args.sb_pkey = ia_rp->ia_port_pkey;
6266 	sb_args.sb_lease = 0xffffffff;
6267 	sb_args.sb_key[0] = 0x1234;
6268 	sb_args.sb_key[1] = 0x5678;
6269 	sb_args.sb_name = DAPLKA_DRV_NAME;
6270 
6271 	D2("service_register: bind(0x%llx:0x%llx)\n",
6272 	    (longlong_t)ia_rp->ia_hca_sgid.gid_prefix,
6273 	    (longlong_t)ia_rp->ia_hca_sgid.gid_guid);
6274 
6275 	status = ibt_bind_service(sp_rp->sp_srv_hdl, ia_rp->ia_hca_sgid,
6276 	    &sb_args, (void *)sp_rp, &sp_rp->sp_bind_hdl);
6277 	if (status != IBT_SUCCESS) {
6278 		DERR("service_register: ibt_bind_service returned %d\n",
6279 		    status);
6280 		*rvalp = (int)status;
6281 		retval = 0;
6282 		goto cleanup;
6283 	}
6284 
6285 	/*
6286 	 * need to bump refcnt because the global hash table will
6287 	 * have a reference to sp_rp
6288 	 */
6289 	DAPLKA_RS_REF(sp_rp);
6290 	bumped = B_TRUE;
6291 
6292 	/* insert into global sp hash table */
6293 	sp_rp->sp_global_hkey = 0;
6294 	retval = daplka_hash_insert(&daplka_global_sp_htbl,
6295 	    &sp_rp->sp_global_hkey, (void *)sp_rp);
6296 	if (retval != 0) {
6297 		DERR("service_register: cannot insert sp resource\n");
6298 		goto cleanup;
6299 	}
6300 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*sp_rp))
6301 
6302 	/* insert into per-IA sp hash table */
6303 	retval = daplka_hash_insert(&ia_rp->ia_sp_htbl,
6304 	    &sp_hkey, (void *)sp_rp);
6305 	if (retval != 0) {
6306 		DERR("service_register: cannot insert sp resource\n");
6307 		goto cleanup;
6308 	}
6309 
6310 	/* pass index to application */
6311 	args.sr_sp_hkey = sp_hkey;
6312 	retval = ddi_copyout(&args, (void *)arg,
6313 	    sizeof (dapl_service_register_t), mode);
6314 	if (retval != 0) {
6315 		DERR("service_register: copyout error %d\n", retval);
6316 		retval = EFAULT;
6317 		goto cleanup;
6318 	}
6319 	return (0);
6320 
6321 cleanup:;
6322 	ASSERT(sp_rp != NULL);
6323 	/* remove from ia table */
6324 	if (sp_hkey != 0) {
6325 		daplka_sp_resource_t *free_rp = NULL;
6326 
6327 		(void) daplka_hash_remove(&ia_rp->ia_sp_htbl,
6328 		    sp_hkey, (void **)&free_rp);
6329 		if (free_rp != sp_rp) {
6330 			DERR("service_register: cannot remove sp\n");
6331 			/*
6332 			 * we can only get here if another thread
6333 			 * has completed the cleanup in svc_deregister
6334 			 */
6335 			return (retval);
6336 		}
6337 	}
6338 
6339 	/* remove from global table */
6340 	if (sp_rp->sp_global_hkey != 0) {
6341 		daplka_sp_resource_t *free_rp = NULL;
6342 
6343 		/*
6344 		 * we get here if either the hash_insert into
6345 		 * ia_sp_htbl failed or the ddi_copyout failed.
6346 		 * hash_insert failure implies that we are the
6347 		 * only thread with a reference to sp. ddi_copyout
6348 		 * failure implies that svc_deregister could have
6349 		 * picked up the sp and destroyed it. but since
6350 		 * we got to this point, we must have removed
6351 		 * the sp ourselves in hash_remove above and
6352 		 * that the sp can be destroyed by us.
6353 		 */
6354 		(void) daplka_hash_remove(&daplka_global_sp_htbl,
6355 		    sp_rp->sp_global_hkey, (void **)&free_rp);
6356 		if (free_rp != sp_rp) {
6357 			DERR("service_register: cannot remove sp\n");
6358 			/*
6359 			 * this case is impossible. see explanation above.
6360 			 */
6361 			ASSERT(B_FALSE);
6362 			return (retval);
6363 		}
6364 		sp_rp->sp_global_hkey = 0;
6365 	}
6366 	/* unreference sp */
6367 	if (bumped) {
6368 		DAPLKA_RS_UNREF(sp_rp);
6369 	}
6370 
6371 	/* destroy sp resource */
6372 	DAPLKA_RS_UNREF(sp_rp);
6373 	return (retval);
6374 }
6375 
6376 /*
6377  * deregisters the service and removes SP from the global table.
6378  */
6379 /* ARGSUSED */
6380 static int
daplka_service_deregister(daplka_ia_resource_t * ia_rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)6381 daplka_service_deregister(daplka_ia_resource_t *ia_rp, intptr_t arg, int mode,
6382 	cred_t *cred, int *rvalp)
6383 {
6384 	dapl_service_deregister_t	args;
6385 	daplka_sp_resource_t		*sp_rp = NULL, *g_sp_rp = NULL;
6386 	int				retval;
6387 
6388 	retval = ddi_copyin((void *)arg, &args,
6389 	    sizeof (dapl_service_deregister_t), mode);
6390 
6391 	if (retval != 0) {
6392 		DERR("service_deregister: copyin error %d\n", retval);
6393 		return (EINVAL);
6394 	}
6395 
6396 	retval = daplka_hash_remove(&ia_rp->ia_sp_htbl,
6397 	    args.sdr_sp_hkey, (void **)&sp_rp);
6398 	if (retval != 0 || sp_rp == NULL) {
6399 		DERR("service_deregister: cannot find sp resource\n");
6400 		return (EINVAL);
6401 	}
6402 
6403 	retval = daplka_hash_remove(&daplka_global_sp_htbl,
6404 	    sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6405 	if (retval != 0 || g_sp_rp == NULL) {
6406 		DERR("service_deregister: cannot find sp resource\n");
6407 	}
6408 
6409 	/* remove the global reference */
6410 	if (g_sp_rp == sp_rp) {
6411 		DAPLKA_RS_UNREF(g_sp_rp);
6412 	}
6413 
6414 	DAPLKA_RS_UNREF(sp_rp);
6415 	return (0);
6416 }
6417 
6418 /*
6419  * destroys a service point.
6420  * called when the refcnt drops to zero.
6421  */
6422 static int
daplka_sp_destroy(daplka_resource_t * gen_rp)6423 daplka_sp_destroy(daplka_resource_t *gen_rp)
6424 {
6425 	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)gen_rp;
6426 	ibt_status_t status;
6427 
6428 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*sp_rp))
6429 	ASSERT(DAPLKA_RS_REFCNT(sp_rp) == 0);
6430 	D3("sp_destroy: entering, sp_rp %p, rnum %d\n",
6431 	    sp_rp, DAPLKA_RS_RNUM(sp_rp));
6432 
6433 	/*
6434 	 * it is possible for pending connections to remain
6435 	 * on an SP. We need to clean them up here.
6436 	 */
6437 	if (sp_rp->sp_backlog != NULL) {
6438 		ibt_cm_proceed_reply_t proc_reply;
6439 		int i, cnt = 0;
6440 		void *spcp_sidp;
6441 
6442 		for (i = 0; i < sp_rp->sp_backlog_size; i++) {
6443 			if (sp_rp->sp_backlog[i].spcp_state ==
6444 			    DAPLKA_SPCP_PENDING) {
6445 				cnt++;
6446 				if (sp_rp->sp_backlog[i].spcp_sid == NULL) {
6447 					DERR("sp_destroy: "
6448 					    "spcp_sid == NULL!\n");
6449 					continue;
6450 				}
6451 				mutex_enter(&sp_rp->sp_lock);
6452 				spcp_sidp = sp_rp->sp_backlog[i].spcp_sid;
6453 				sp_rp->sp_backlog[i].spcp_state =
6454 				    DAPLKA_SPCP_INIT;
6455 				sp_rp->sp_backlog[i].spcp_sid = NULL;
6456 				sp_rp->sp_backlog[i].spcp_req_len = 0;
6457 				mutex_exit(&sp_rp->sp_lock);
6458 				status = ibt_cm_proceed(IBT_CM_EVENT_REQ_RCV,
6459 				    spcp_sidp,
6460 				    IBT_CM_NO_RESOURCE, &proc_reply, NULL, 0);
6461 				if (status != IBT_SUCCESS) {
6462 					DERR("sp_destroy: proceed failed %d\n",
6463 					    status);
6464 				}
6465 			}
6466 		}
6467 		if (cnt > 0) {
6468 			DERR("sp_destroy: found %d pending "
6469 			    "connections\n", cnt);
6470 		}
6471 	}
6472 
6473 	if (sp_rp->sp_srv_hdl != NULL && sp_rp->sp_bind_hdl != NULL) {
6474 		status = ibt_unbind_service(sp_rp->sp_srv_hdl,
6475 		    sp_rp->sp_bind_hdl);
6476 		if (status != IBT_SUCCESS) {
6477 			DERR("sp_destroy: ibt_unbind_service "
6478 			    "failed: %d\n", status);
6479 		}
6480 	}
6481 
6482 	if (sp_rp->sp_srv_hdl != NULL) {
6483 		status = ibt_deregister_service(daplka_dev->daplka_clnt_hdl,
6484 		    sp_rp->sp_srv_hdl);
6485 		if (status != IBT_SUCCESS) {
6486 			DERR("sp_destroy: ibt_deregister_service "
6487 			    "failed: %d\n", status);
6488 		}
6489 	}
6490 	if (sp_rp->sp_backlog != NULL) {
6491 		kmem_free(sp_rp->sp_backlog,
6492 		    sp_rp->sp_backlog_size * sizeof (daplka_sp_conn_pend_t));
6493 		sp_rp->sp_backlog = NULL;
6494 		sp_rp->sp_backlog_size = 0;
6495 	}
6496 
6497 	/*
6498 	 * release reference to evd
6499 	 */
6500 	if (sp_rp->sp_evd_res != NULL) {
6501 		DAPLKA_RS_UNREF(sp_rp->sp_evd_res);
6502 	}
6503 	sp_rp->sp_bind_hdl = NULL;
6504 	sp_rp->sp_srv_hdl = NULL;
6505 	DAPLKA_RS_FINI(sp_rp);
6506 	kmem_free(sp_rp, sizeof (*sp_rp));
6507 	D3("sp_destroy: exiting, sp_rp %p\n", sp_rp);
6508 	return (0);
6509 }
6510 
6511 /*
6512  * this function is called by daplka_hash_destroy for
6513  * freeing SP resource objects
6514  */
6515 static void
daplka_hash_sp_free(void * obj)6516 daplka_hash_sp_free(void *obj)
6517 {
6518 	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6519 	daplka_sp_resource_t *g_sp_rp;
6520 	int retval;
6521 
6522 	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6523 
6524 	retval = daplka_hash_remove(&daplka_global_sp_htbl,
6525 	    sp_rp->sp_global_hkey, (void **)&g_sp_rp);
6526 	if (retval != 0 || g_sp_rp == NULL) {
6527 		DERR("sp_free: cannot find sp resource\n");
6528 	}
6529 	if (g_sp_rp == sp_rp) {
6530 		DAPLKA_RS_UNREF(g_sp_rp);
6531 	}
6532 
6533 	DAPLKA_RS_UNREF(sp_rp);
6534 }
6535 
6536 static void
daplka_hash_sp_unref(void * obj)6537 daplka_hash_sp_unref(void *obj)
6538 {
6539 	daplka_sp_resource_t *sp_rp = (daplka_sp_resource_t *)obj;
6540 
6541 	ASSERT(DAPLKA_RS_TYPE(sp_rp) == DAPL_TYPE_SP);
6542 	DAPLKA_RS_UNREF(sp_rp);
6543 }
6544 
6545 /*
6546  * Passive side CM handlers
6547  */
6548 
6549 /*
6550  * processes the REQ_RCV event
6551  */
6552 /* ARGSUSED */
6553 static ibt_cm_status_t
daplka_cm_service_req(daplka_sp_resource_t * spp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * pr_data,ibt_priv_data_len_t pr_len)6554 daplka_cm_service_req(daplka_sp_resource_t *spp, ibt_cm_event_t *event,
6555     ibt_cm_return_args_t *ret_args, void *pr_data, ibt_priv_data_len_t pr_len)
6556 {
6557 	daplka_sp_conn_pend_t	*conn = NULL;
6558 	daplka_evd_event_t	*cr_ev = NULL;
6559 	ibt_cm_status_t		cm_status = IBT_CM_DEFAULT;
6560 	uint16_t		bkl_index;
6561 	ibt_status_t		status;
6562 
6563 	/*
6564 	 * acquire a slot in the connection backlog of this service point
6565 	 */
6566 	mutex_enter(&spp->sp_lock);
6567 	for (bkl_index = 0; bkl_index < spp->sp_backlog_size; bkl_index++) {
6568 		if (spp->sp_backlog[bkl_index].spcp_state == DAPLKA_SPCP_INIT) {
6569 			conn = &spp->sp_backlog[bkl_index];
6570 			ASSERT(conn->spcp_sid == NULL);
6571 			conn->spcp_state = DAPLKA_SPCP_PENDING;
6572 			conn->spcp_sid = event->cm_session_id;
6573 			break;
6574 		}
6575 	}
6576 	mutex_exit(&spp->sp_lock);
6577 
6578 	/*
6579 	 * too many pending connections
6580 	 */
6581 	if (bkl_index == spp->sp_backlog_size) {
6582 		DERR("service_req: connection pending exceeded %d limit\n",
6583 		    spp->sp_backlog_size);
6584 		return (IBT_CM_NO_RESOURCE);
6585 	}
6586 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*conn))
6587 
6588 	/*
6589 	 * save data for cr_handoff
6590 	 */
6591 	if (pr_data != NULL && pr_len > 0) {
6592 		int trunc_len = pr_len;
6593 
6594 		if (trunc_len > DAPL_MAX_PRIVATE_DATA_SIZE) {
6595 			DERR("service_req: private data truncated\n");
6596 			trunc_len = DAPL_MAX_PRIVATE_DATA_SIZE;
6597 		}
6598 		conn->spcp_req_len = trunc_len;
6599 		bcopy(pr_data, conn->spcp_req_data, trunc_len);
6600 	} else {
6601 		conn->spcp_req_len = 0;
6602 	}
6603 	conn->spcp_rdma_ra_in = event->cm_event.req.req_rdma_ra_in;
6604 	conn->spcp_rdma_ra_out = event->cm_event.req.req_rdma_ra_out;
6605 
6606 	/*
6607 	 * create a CR event
6608 	 */
6609 	cr_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6610 	if (cr_ev == NULL) {
6611 		DERR("service_req: could not alloc cr_ev\n");
6612 		cm_status = IBT_CM_NO_RESOURCE;
6613 		goto cleanup;
6614 	}
6615 
6616 	cr_ev->ee_next = NULL;
6617 	cr_ev->ee_cmev.ec_cm_cookie = spp->sp_cookie;
6618 	cr_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6619 	cr_ev->ee_cmev.ec_cm_psep_cookie = DAPLKA_CREATE_PSEP_COOKIE(bkl_index);
6620 	/*
6621 	 * save the requestor gid
6622 	 * daplka_event_poll needs this if this is a third party REQ_RCV
6623 	 */
6624 	cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_prefix =
6625 	    event->cm_event.req.req_prim_addr.av_dgid.gid_prefix;
6626 	cr_ev->ee_cmev.ec_cm_req_prim_addr.gid_guid =
6627 	    event->cm_event.req.req_prim_addr.av_dgid.gid_guid;
6628 
6629 	/*
6630 	 * set event type
6631 	 */
6632 	if (pr_len == 0) {
6633 		cr_ev->ee_cmev.ec_cm_ev_type =
6634 		    DAPL_IB_CME_CONNECTION_REQUEST_PENDING;
6635 	} else {
6636 		cr_ev->ee_cmev.ec_cm_ev_priv_data =
6637 		    kmem_zalloc(pr_len, KM_NOSLEEP);
6638 		if (cr_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6639 			DERR("service_req: could not alloc priv\n");
6640 			cm_status = IBT_CM_NO_RESOURCE;
6641 			goto cleanup;
6642 		}
6643 		bcopy(pr_data, cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6644 		cr_ev->ee_cmev.ec_cm_ev_type =
6645 		    DAPL_IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA;
6646 	}
6647 	cr_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6648 
6649 	/*
6650 	 * tell the active side to expect the processing time to be
6651 	 * at most equal to daplka_cm_delay
6652 	 */
6653 	status = ibt_cm_delay(IBT_CM_DELAY_REQ, event->cm_session_id,
6654 	    daplka_cm_delay, NULL, 0);
6655 	if (status != IBT_SUCCESS) {
6656 		DERR("service_req: ibt_cm_delay failed %d\n", status);
6657 		cm_status = IBT_CM_NO_RESOURCE;
6658 		goto cleanup;
6659 	}
6660 
6661 	/*
6662 	 * enqueue cr_ev onto the cr_events list of the EVD
6663 	 * corresponding to the SP
6664 	 */
6665 	D2("service_req: enqueue event(%p) evdp(%p) priv_data(%p) "
6666 	    "priv_len(%d) psep(0x%llx)\n", cr_ev, spp->sp_evd_res,
6667 	    cr_ev->ee_cmev.ec_cm_ev_priv_data,
6668 	    (int)cr_ev->ee_cmev.ec_cm_ev_priv_data_len,
6669 	    (longlong_t)cr_ev->ee_cmev.ec_cm_psep_cookie);
6670 
6671 	daplka_evd_wakeup(spp->sp_evd_res,
6672 	    &spp->sp_evd_res->evd_cr_events, cr_ev);
6673 
6674 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*conn))
6675 	return (IBT_CM_DEFER);
6676 
6677 cleanup:;
6678 	/*
6679 	 * free the cr event
6680 	 */
6681 	if (cr_ev != NULL) {
6682 		if (cr_ev->ee_cmev.ec_cm_ev_priv_data != NULL) {
6683 			kmem_free(cr_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6684 			cr_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6685 			cr_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6686 		}
6687 		kmem_free(cr_ev, sizeof (daplka_evd_event_t));
6688 	}
6689 	/*
6690 	 * release our slot in the backlog array
6691 	 */
6692 	if (conn != NULL) {
6693 		mutex_enter(&spp->sp_lock);
6694 		ASSERT(conn->spcp_state == DAPLKA_SPCP_PENDING);
6695 		ASSERT(conn->spcp_sid == event->cm_session_id);
6696 		conn->spcp_state = DAPLKA_SPCP_INIT;
6697 		conn->spcp_req_len = 0;
6698 		conn->spcp_sid = NULL;
6699 		mutex_exit(&spp->sp_lock);
6700 	}
6701 	return (cm_status);
6702 }
6703 
6704 /*
6705  * processes the CONN_CLOSED event
6706  */
6707 /* ARGSUSED */
6708 static ibt_cm_status_t
daplka_cm_service_conn_closed(daplka_sp_resource_t * sp_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)6709 daplka_cm_service_conn_closed(daplka_sp_resource_t *sp_rp,
6710     ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args,
6711     void *priv_data, ibt_priv_data_len_t len)
6712 {
6713 	daplka_ep_resource_t	*ep_rp;
6714 	daplka_evd_event_t	*disc_ev;
6715 	uint32_t		old_state, new_state;
6716 
6717 	ep_rp = (daplka_ep_resource_t *)
6718 	    ibt_get_chan_private(event->cm_channel);
6719 	if (ep_rp == NULL) {
6720 		DERR("service_conn_closed: ep_rp == NULL\n");
6721 		return (IBT_CM_ACCEPT);
6722 	}
6723 
6724 	/*
6725 	 * verify that the ep_state is either CONNECTED or
6726 	 * DISCONNECTING. if it is not in either states return
6727 	 * without generating an event.
6728 	 */
6729 	new_state = old_state = daplka_ep_get_state(ep_rp);
6730 	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
6731 	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6732 		/*
6733 		 * we can get here if the connection is being aborted
6734 		 */
6735 		D2("service_conn_closed: conn aborted, state = %d, "
6736 		    "closed = %d\n", old_state, (int)event->cm_event.closed);
6737 		daplka_ep_set_state(ep_rp, old_state, new_state);
6738 		return (IBT_CM_ACCEPT);
6739 	}
6740 
6741 	/*
6742 	 * create a DAPL_IB_CME_DISCONNECTED event
6743 	 */
6744 	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6745 	if (disc_ev == NULL) {
6746 		DERR("service_conn_closed: cannot alloc disc_ev\n");
6747 		daplka_ep_set_state(ep_rp, old_state, new_state);
6748 		return (IBT_CM_ACCEPT);
6749 	}
6750 
6751 	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
6752 	disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6753 	disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6754 	disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6755 	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6756 	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6757 
6758 	D2("service_conn_closed: enqueue event(%p) evdp(%p) psep(0x%llx)\n",
6759 	    disc_ev, sp_rp->sp_evd_res, (longlong_t)ep_rp->ep_psep_cookie);
6760 
6761 	/*
6762 	 * transition ep_state to DISCONNECTED
6763 	 */
6764 	new_state = DAPLKA_EP_STATE_DISCONNECTED;
6765 	daplka_ep_set_state(ep_rp, old_state, new_state);
6766 
6767 	/*
6768 	 * enqueue event onto the conn_evd owned by ep_rp
6769 	 */
6770 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
6771 	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6772 
6773 	return (IBT_CM_ACCEPT);
6774 }
6775 
6776 /*
6777  * processes the CONN_EST event
6778  */
6779 /* ARGSUSED */
6780 static ibt_cm_status_t
daplka_cm_service_conn_est(daplka_sp_resource_t * sp_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)6781 daplka_cm_service_conn_est(daplka_sp_resource_t *sp_rp, ibt_cm_event_t *event,
6782     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6783 {
6784 	daplka_ep_resource_t	*ep_rp;
6785 	daplka_evd_event_t	*conn_ev;
6786 	void			*pr_data = event->cm_priv_data;
6787 	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
6788 	uint32_t		old_state, new_state;
6789 
6790 	ep_rp = (daplka_ep_resource_t *)
6791 	    ibt_get_chan_private(event->cm_channel);
6792 	if (ep_rp == NULL) {
6793 		DERR("service_conn_est: ep_rp == NULL\n");
6794 		return (IBT_CM_ACCEPT);
6795 	}
6796 
6797 	/*
6798 	 * verify that ep_state is ACCEPTING. if it is not in this
6799 	 * state, return without generating an event.
6800 	 */
6801 	new_state = old_state = daplka_ep_get_state(ep_rp);
6802 	if (old_state != DAPLKA_EP_STATE_ACCEPTING) {
6803 		/*
6804 		 * we can get here if the connection is being aborted
6805 		 */
6806 		DERR("service_conn_est: conn aborted, state = %d\n",
6807 		    old_state);
6808 		daplka_ep_set_state(ep_rp, old_state, new_state);
6809 		return (IBT_CM_ACCEPT);
6810 	}
6811 
6812 	/*
6813 	 * create a DAPL_IB_CME_CONNECTED event
6814 	 */
6815 	conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6816 	if (conn_ev == NULL) {
6817 		DERR("service_conn_est: conn_ev alloc failed\n");
6818 		daplka_ep_set_state(ep_rp, old_state, new_state);
6819 		return (IBT_CM_ACCEPT);
6820 	}
6821 
6822 	conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
6823 	conn_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6824 	conn_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6825 	conn_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6826 
6827 	/*
6828 	 * copy private data into event
6829 	 */
6830 	if (pr_len > 0) {
6831 		conn_ev->ee_cmev.ec_cm_ev_priv_data =
6832 		    kmem_zalloc(pr_len, KM_NOSLEEP);
6833 		if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
6834 			DERR("service_conn_est: pr_data alloc failed\n");
6835 			daplka_ep_set_state(ep_rp, old_state, new_state);
6836 			kmem_free(conn_ev, sizeof (daplka_evd_event_t));
6837 			return (IBT_CM_ACCEPT);
6838 		}
6839 		bcopy(pr_data, conn_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
6840 	}
6841 	conn_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
6842 
6843 	D2("service_conn_est: enqueue event(%p) evdp(%p)\n",
6844 	    conn_ev, ep_rp->ep_conn_evd);
6845 
6846 	/*
6847 	 * transition ep_state to CONNECTED
6848 	 */
6849 	new_state = DAPLKA_EP_STATE_CONNECTED;
6850 	daplka_ep_set_state(ep_rp, old_state, new_state);
6851 
6852 	/*
6853 	 * enqueue event onto the conn_evd owned by ep_rp
6854 	 */
6855 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
6856 	    &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
6857 
6858 	return (IBT_CM_ACCEPT);
6859 }
6860 
6861 /*
6862  * processes the FAILURE event
6863  */
6864 /* ARGSUSED */
6865 static ibt_cm_status_t
daplka_cm_service_event_failure(daplka_sp_resource_t * sp_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)6866 daplka_cm_service_event_failure(daplka_sp_resource_t *sp_rp,
6867     ibt_cm_event_t *event, ibt_cm_return_args_t *ret_args, void *priv_data,
6868     ibt_priv_data_len_t len)
6869 {
6870 	daplka_evd_event_t	*disc_ev;
6871 	daplka_ep_resource_t	*ep_rp;
6872 	uint32_t		old_state, new_state;
6873 	ibt_rc_chan_query_attr_t chan_attrs;
6874 	ibt_status_t		status;
6875 
6876 	/*
6877 	 * check that we still have a valid cm_channel before continuing
6878 	 */
6879 	if (event->cm_channel == NULL) {
6880 		DERR("serice_event_failure: event->cm_channel == NULL\n");
6881 		return (IBT_CM_ACCEPT);
6882 	}
6883 	ep_rp = (daplka_ep_resource_t *)
6884 	    ibt_get_chan_private(event->cm_channel);
6885 	if (ep_rp == NULL) {
6886 		DERR("service_event_failure: ep_rp == NULL\n");
6887 		return (IBT_CM_ACCEPT);
6888 	}
6889 
6890 	/*
6891 	 * verify that ep_state is ACCEPTING or DISCONNECTING. if it
6892 	 * is not in either state, return without generating an event.
6893 	 */
6894 	new_state = old_state = daplka_ep_get_state(ep_rp);
6895 	if (old_state != DAPLKA_EP_STATE_ACCEPTING &&
6896 	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
6897 		/*
6898 		 * we can get here if the connection is being aborted
6899 		 */
6900 		DERR("service_event_failure: conn aborted, state = %d, "
6901 		    "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
6902 		    (int)event->cm_event.failed.cf_code,
6903 		    (int)event->cm_event.failed.cf_msg,
6904 		    (int)event->cm_event.failed.cf_reason);
6905 
6906 		daplka_ep_set_state(ep_rp, old_state, new_state);
6907 		return (IBT_CM_ACCEPT);
6908 	}
6909 
6910 	bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
6911 	status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
6912 
6913 	if ((status == IBT_SUCCESS) &&
6914 	    (chan_attrs.rc_state != IBT_STATE_ERROR)) {
6915 		DERR("service_event_failure: conn abort qpn %d state %d\n",
6916 		    chan_attrs.rc_qpn, chan_attrs.rc_state);
6917 
6918 		/* explicit transition the QP to ERROR state */
6919 		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
6920 	}
6921 
6922 	/*
6923 	 * create an event
6924 	 */
6925 	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
6926 	if (disc_ev == NULL) {
6927 		DERR("service_event_failure: cannot alloc disc_ev\n");
6928 		daplka_ep_set_state(ep_rp, old_state, new_state);
6929 		return (IBT_CM_ACCEPT);
6930 	}
6931 
6932 	/*
6933 	 * fill in the appropriate event type
6934 	 */
6935 	if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
6936 		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
6937 	} else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
6938 		switch (event->cm_event.failed.cf_reason) {
6939 		case IBT_CM_INVALID_CID:
6940 			disc_ev->ee_cmev.ec_cm_ev_type =
6941 			    DAPL_IB_CME_DESTINATION_REJECT;
6942 			break;
6943 		default:
6944 			disc_ev->ee_cmev.ec_cm_ev_type =
6945 			    DAPL_IB_CME_LOCAL_FAILURE;
6946 			break;
6947 		}
6948 	} else {
6949 		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
6950 	}
6951 	disc_ev->ee_cmev.ec_cm_cookie = sp_rp->sp_cookie;
6952 	disc_ev->ee_cmev.ec_cm_is_passive = B_TRUE;
6953 	disc_ev->ee_cmev.ec_cm_psep_cookie = ep_rp->ep_psep_cookie;
6954 	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
6955 	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
6956 
6957 	D2("service_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
6958 	    "cf_msg(%d) cf_reason(%d) psep(0x%llx)\n", disc_ev,
6959 	    ep_rp->ep_conn_evd, (int)event->cm_event.failed.cf_code,
6960 	    (int)event->cm_event.failed.cf_msg,
6961 	    (int)event->cm_event.failed.cf_reason,
6962 	    (longlong_t)ep_rp->ep_psep_cookie);
6963 
6964 	/*
6965 	 * transition ep_state to DISCONNECTED
6966 	 */
6967 	new_state = DAPLKA_EP_STATE_DISCONNECTED;
6968 	daplka_ep_set_state(ep_rp, old_state, new_state);
6969 
6970 	/*
6971 	 * enqueue event onto the conn_evd owned by ep_rp
6972 	 */
6973 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
6974 	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
6975 
6976 	return (IBT_CM_ACCEPT);
6977 }
6978 
6979 /*
6980  * this is the passive side CM handler. it gets registered
6981  * when an SP resource is created in daplka_service_register.
6982  */
6983 static ibt_cm_status_t
daplka_cm_service_handler(void * cm_private,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)6984 daplka_cm_service_handler(void *cm_private, ibt_cm_event_t *event,
6985 ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
6986 {
6987 	daplka_sp_resource_t	*sp_rp = (daplka_sp_resource_t *)cm_private;
6988 
6989 	if (sp_rp == NULL) {
6990 		DERR("service_handler: sp_rp == NULL\n");
6991 		return (IBT_CM_NO_RESOURCE);
6992 	}
6993 	/*
6994 	 * default is not to return priv data
6995 	 */
6996 	if (ret_args != NULL) {
6997 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
6998 		ret_args->cm_ret_len = 0;
6999 	}
7000 
7001 	switch (event->cm_type) {
7002 	case IBT_CM_EVENT_REQ_RCV:
7003 		D2("service_handler: IBT_CM_EVENT_REQ_RCV\n");
7004 		return (daplka_cm_service_req(sp_rp, event, ret_args,
7005 		    event->cm_priv_data, event->cm_priv_data_len));
7006 
7007 	case IBT_CM_EVENT_REP_RCV:
7008 		/* passive side should not receive this event */
7009 		D2("service_handler: IBT_CM_EVENT_REP_RCV\n");
7010 		return (IBT_CM_DEFAULT);
7011 
7012 	case IBT_CM_EVENT_CONN_CLOSED:
7013 		D2("service_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7014 		    event->cm_event.closed);
7015 		return (daplka_cm_service_conn_closed(sp_rp, event, ret_args,
7016 		    priv_data, len));
7017 
7018 	case IBT_CM_EVENT_MRA_RCV:
7019 		/* passive side does default processing MRA event */
7020 		D2("service_handler: IBT_CM_EVENT_MRA_RCV\n");
7021 		return (IBT_CM_DEFAULT);
7022 
7023 	case IBT_CM_EVENT_CONN_EST:
7024 		D2("service_handler: IBT_CM_EVENT_CONN_EST\n");
7025 		return (daplka_cm_service_conn_est(sp_rp, event, ret_args,
7026 		    priv_data, len));
7027 
7028 	case IBT_CM_EVENT_FAILURE:
7029 		D2("service_handler: IBT_CM_EVENT_FAILURE\n");
7030 		return (daplka_cm_service_event_failure(sp_rp, event, ret_args,
7031 		    priv_data, len));
7032 	case IBT_CM_EVENT_LAP_RCV:
7033 		/* active side had initiated a path migration operation */
7034 		D2("service_handler: IBT_CM_EVENT_LAP_RCV\n");
7035 		return (IBT_CM_ACCEPT);
7036 	default:
7037 		DERR("service_handler: invalid event %d\n", event->cm_type);
7038 		break;
7039 	}
7040 	return (IBT_CM_DEFAULT);
7041 }
7042 
7043 /*
7044  * Active side CM handlers
7045  */
7046 
7047 /*
7048  * Processes the REP_RCV event. When the passive side accepts the
7049  * connection, this handler is called. We make a copy of the private
7050  * data into the ep so that it can be passed back to userland in when
7051  * the CONN_EST event occurs.
7052  */
7053 /* ARGSUSED */
7054 static ibt_cm_status_t
daplka_cm_rc_rep_rcv(daplka_ep_resource_t * ep_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)7055 daplka_cm_rc_rep_rcv(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7056     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7057 {
7058 	void			*pr_data = event->cm_priv_data;
7059 	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
7060 	uint32_t		old_state, new_state;
7061 
7062 	D2("rc_rep_rcv: pr_data(0x%p), pr_len(%d)\n", pr_data,
7063 	    (int)pr_len);
7064 
7065 	ASSERT(ep_rp != NULL);
7066 	new_state = old_state = daplka_ep_get_state(ep_rp);
7067 	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7068 		/*
7069 		 * we can get here if the connection is being aborted
7070 		 */
7071 		DERR("rc_rep_rcv: conn aborted, state = %d\n", old_state);
7072 		daplka_ep_set_state(ep_rp, old_state, new_state);
7073 		return (IBT_CM_NO_CHANNEL);
7074 	}
7075 
7076 	/*
7077 	 * we do not cancel the timer here because the connection
7078 	 * handshake is still in progress.
7079 	 */
7080 
7081 	/*
7082 	 * save the private data. it will be passed up when
7083 	 * the connection is established.
7084 	 */
7085 	if (pr_len > 0) {
7086 		ep_rp->ep_priv_len = pr_len;
7087 		bcopy(pr_data, ep_rp->ep_priv_data, (size_t)pr_len);
7088 	}
7089 
7090 	/*
7091 	 * we do not actually transition to a different state.
7092 	 * the state will change when we get a conn_est, failure,
7093 	 * closed, or timeout event.
7094 	 */
7095 	daplka_ep_set_state(ep_rp, old_state, new_state);
7096 	return (IBT_CM_ACCEPT);
7097 }
7098 
7099 /*
7100  * Processes the CONN_CLOSED event. This gets called when either
7101  * the active or passive side closes the rc channel.
7102  */
7103 /* ARGSUSED */
7104 static ibt_cm_status_t
daplka_cm_rc_conn_closed(daplka_ep_resource_t * ep_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)7105 daplka_cm_rc_conn_closed(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7106     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7107 {
7108 	daplka_evd_event_t	*disc_ev;
7109 	uint32_t		old_state, new_state;
7110 
7111 	ASSERT(ep_rp != NULL);
7112 	old_state = new_state = daplka_ep_get_state(ep_rp);
7113 	if (old_state != DAPLKA_EP_STATE_CONNECTED &&
7114 	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7115 		/*
7116 		 * we can get here if the connection is being aborted
7117 		 */
7118 		D2("rc_conn_closed: conn aborted, state = %d, "
7119 		    "closed = %d\n", old_state, (int)event->cm_event.closed);
7120 		daplka_ep_set_state(ep_rp, old_state, new_state);
7121 		return (IBT_CM_ACCEPT);
7122 	}
7123 
7124 	/*
7125 	 * it's ok for the timer to fire at this point. the
7126 	 * taskq thread that processes the timer will just wait
7127 	 * until we are done with our state transition.
7128 	 */
7129 	if (daplka_cancel_timer(ep_rp) != 0) {
7130 		/*
7131 		 * daplka_cancel_timer returns -1 if the timer is
7132 		 * being processed and 0 for all other cases.
7133 		 * we need to reset ep_state to allow timer processing
7134 		 * to continue.
7135 		 */
7136 		DERR("rc_conn_closed: timer is being processed\n");
7137 		daplka_ep_set_state(ep_rp, old_state, new_state);
7138 		return (IBT_CM_ACCEPT);
7139 	}
7140 
7141 	/*
7142 	 * create a DAPL_IB_CME_DISCONNECTED event
7143 	 */
7144 	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7145 	if (disc_ev == NULL) {
7146 		DERR("rc_conn_closed: could not alloc ev\n");
7147 		daplka_ep_set_state(ep_rp, old_state, new_state);
7148 		return (IBT_CM_ACCEPT);
7149 	}
7150 
7151 	disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_DISCONNECTED;
7152 	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7153 	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7154 	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7155 	disc_ev->ee_cmev.ec_cm_ev_priv_data = NULL;
7156 	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = 0;
7157 
7158 	D2("rc_conn_closed: enqueue event(%p) evdp(%p) closed(%d)\n",
7159 	    disc_ev, ep_rp->ep_conn_evd, (int)event->cm_event.closed);
7160 
7161 	/*
7162 	 * transition ep_state to DISCONNECTED
7163 	 */
7164 	new_state = DAPLKA_EP_STATE_DISCONNECTED;
7165 	daplka_ep_set_state(ep_rp, old_state, new_state);
7166 
7167 	/*
7168 	 * enqueue event onto the conn_evd owned by ep_rp
7169 	 */
7170 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
7171 	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7172 
7173 	return (IBT_CM_ACCEPT);
7174 }
7175 
7176 /*
7177  * processes the CONN_EST event
7178  */
7179 /* ARGSUSED */
7180 static ibt_cm_status_t
daplka_cm_rc_conn_est(daplka_ep_resource_t * ep_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)7181 daplka_cm_rc_conn_est(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7182     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7183 {
7184 	daplka_evd_event_t	*conn_ev;
7185 	uint32_t		old_state, new_state;
7186 
7187 	ASSERT(ep_rp != NULL);
7188 	old_state = new_state = daplka_ep_get_state(ep_rp);
7189 	if (old_state != DAPLKA_EP_STATE_CONNECTING) {
7190 		/*
7191 		 * we can get here if the connection is being aborted
7192 		 */
7193 		DERR("rc_conn_est: conn aborted, state = %d\n", old_state);
7194 		daplka_ep_set_state(ep_rp, old_state, new_state);
7195 		return (IBT_CM_ACCEPT);
7196 	}
7197 
7198 	/*
7199 	 * it's ok for the timer to fire at this point. the
7200 	 * taskq thread that processes the timer will just wait
7201 	 * until we are done with our state transition.
7202 	 */
7203 	if (daplka_cancel_timer(ep_rp) != 0) {
7204 		/*
7205 		 * daplka_cancel_timer returns -1 if the timer is
7206 		 * being processed and 0 for all other cases.
7207 		 * we need to reset ep_state to allow timer processing
7208 		 * to continue.
7209 		 */
7210 		DERR("rc_conn_est: timer is being processed\n");
7211 		daplka_ep_set_state(ep_rp, old_state, new_state);
7212 		return (IBT_CM_ACCEPT);
7213 	}
7214 
7215 	/*
7216 	 * create a DAPL_IB_CME_CONNECTED event
7217 	 */
7218 	conn_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7219 	if (conn_ev == NULL) {
7220 		DERR("rc_conn_est: could not alloc ev\n");
7221 		daplka_ep_set_state(ep_rp, old_state, new_state);
7222 		return (IBT_CM_ACCEPT);
7223 	}
7224 
7225 	conn_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_CONNECTED;
7226 	conn_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7227 	conn_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7228 	conn_ev->ee_cmev.ec_cm_psep_cookie = 0;
7229 
7230 	/*
7231 	 * The private data passed back in the connection established
7232 	 * event is what was recvd in the daplka_cm_rc_rep_rcv handler and
7233 	 * saved in ep resource structure.
7234 	 */
7235 	if (ep_rp->ep_priv_len > 0) {
7236 		conn_ev->ee_cmev.ec_cm_ev_priv_data =
7237 		    kmem_zalloc(ep_rp->ep_priv_len, KM_NOSLEEP);
7238 
7239 		if (conn_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7240 			DERR("rc_conn_est: could not alloc pr_data\n");
7241 			kmem_free(conn_ev, sizeof (daplka_evd_event_t));
7242 			daplka_ep_set_state(ep_rp, old_state, new_state);
7243 			return (IBT_CM_ACCEPT);
7244 		}
7245 		bcopy(ep_rp->ep_priv_data, conn_ev->ee_cmev.ec_cm_ev_priv_data,
7246 		    ep_rp->ep_priv_len);
7247 	}
7248 	conn_ev->ee_cmev.ec_cm_ev_priv_data_len = ep_rp->ep_priv_len;
7249 
7250 	D2("rc_conn_est: enqueue event(%p) evdp(%p) pr_data(0x%p), "
7251 	    "pr_len(%d)\n", conn_ev, ep_rp->ep_conn_evd,
7252 	    conn_ev->ee_cmev.ec_cm_ev_priv_data,
7253 	    (int)conn_ev->ee_cmev.ec_cm_ev_priv_data_len);
7254 
7255 	/*
7256 	 * transition ep_state to CONNECTED
7257 	 */
7258 	new_state = DAPLKA_EP_STATE_CONNECTED;
7259 	daplka_ep_set_state(ep_rp, old_state, new_state);
7260 
7261 	/*
7262 	 * enqueue event onto the conn_evd owned by ep_rp
7263 	 */
7264 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
7265 	    &ep_rp->ep_conn_evd->evd_conn_events, conn_ev);
7266 
7267 	return (IBT_CM_ACCEPT);
7268 }
7269 
7270 /*
7271  * processes the FAILURE event
7272  */
7273 /* ARGSUSED */
7274 static ibt_cm_status_t
daplka_cm_rc_event_failure(daplka_ep_resource_t * ep_rp,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)7275 daplka_cm_rc_event_failure(daplka_ep_resource_t *ep_rp, ibt_cm_event_t *event,
7276     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7277 {
7278 	daplka_evd_event_t	*disc_ev;
7279 	ibt_priv_data_len_t	pr_len = event->cm_priv_data_len;
7280 	void			*pr_data = event->cm_priv_data;
7281 	uint32_t		old_state, new_state;
7282 	ibt_rc_chan_query_attr_t chan_attrs;
7283 	ibt_status_t		status;
7284 
7285 	ASSERT(ep_rp != NULL);
7286 	old_state = new_state = daplka_ep_get_state(ep_rp);
7287 	if (old_state != DAPLKA_EP_STATE_CONNECTING &&
7288 	    old_state != DAPLKA_EP_STATE_DISCONNECTING) {
7289 		/*
7290 		 * we can get here if the connection is being aborted
7291 		 */
7292 		DERR("rc_event_failure: conn aborted, state = %d, "
7293 		    "cf_code = %d, cf_msg = %d, cf_reason = %d\n", old_state,
7294 		    (int)event->cm_event.failed.cf_code,
7295 		    (int)event->cm_event.failed.cf_msg,
7296 		    (int)event->cm_event.failed.cf_reason);
7297 
7298 		daplka_ep_set_state(ep_rp, old_state, new_state);
7299 		return (IBT_CM_ACCEPT);
7300 	}
7301 
7302 	/*
7303 	 * it's ok for the timer to fire at this point. the
7304 	 * taskq thread that processes the timer will just wait
7305 	 * until we are done with our state transition.
7306 	 */
7307 	if (daplka_cancel_timer(ep_rp) != 0) {
7308 		/*
7309 		 * daplka_cancel_timer returns -1 if the timer is
7310 		 * being processed and 0 for all other cases.
7311 		 * we need to reset ep_state to allow timer processing
7312 		 * to continue.
7313 		 */
7314 		DERR("rc_event_failure: timer is being processed\n");
7315 		daplka_ep_set_state(ep_rp, old_state, new_state);
7316 		return (IBT_CM_ACCEPT);
7317 	}
7318 
7319 	bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
7320 	status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
7321 
7322 	if ((status == IBT_SUCCESS) &&
7323 	    (chan_attrs.rc_state != IBT_STATE_ERROR)) {
7324 		DERR("rc_event_failure: conn abort qpn %d state %d\n",
7325 		    chan_attrs.rc_qpn, chan_attrs.rc_state);
7326 
7327 		/* explicit transition the QP to ERROR state */
7328 		status = ibt_flush_channel(ep_rp->ep_chan_hdl);
7329 	}
7330 
7331 	/*
7332 	 * create an event
7333 	 */
7334 	disc_ev = kmem_zalloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7335 	if (disc_ev == NULL) {
7336 		DERR("rc_event_failure: cannot alloc disc_ev\n");
7337 		daplka_ep_set_state(ep_rp, old_state, new_state);
7338 		return (IBT_CM_ACCEPT);
7339 	}
7340 
7341 	/*
7342 	 * copy private data into event
7343 	 */
7344 	if (pr_len > 0) {
7345 		disc_ev->ee_cmev.ec_cm_ev_priv_data =
7346 		    kmem_zalloc(pr_len, KM_NOSLEEP);
7347 
7348 		if (disc_ev->ee_cmev.ec_cm_ev_priv_data == NULL) {
7349 			DERR("rc_event_failure: cannot alloc pr data\n");
7350 			kmem_free(disc_ev, sizeof (daplka_evd_event_t));
7351 			daplka_ep_set_state(ep_rp, old_state, new_state);
7352 			return (IBT_CM_ACCEPT);
7353 		}
7354 		bcopy(pr_data, disc_ev->ee_cmev.ec_cm_ev_priv_data, pr_len);
7355 	}
7356 	disc_ev->ee_cmev.ec_cm_ev_priv_data_len = pr_len;
7357 
7358 	/*
7359 	 * fill in the appropriate event type
7360 	 */
7361 	if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_REJ_RCV) {
7362 		switch (event->cm_event.failed.cf_reason) {
7363 		case IBT_CM_CONSUMER:
7364 			disc_ev->ee_cmev.ec_cm_ev_type =
7365 			    DAPL_IB_CME_DESTINATION_REJECT_PRIVATE_DATA;
7366 			break;
7367 		case IBT_CM_NO_CHAN:
7368 		case IBT_CM_NO_RESC:
7369 			disc_ev->ee_cmev.ec_cm_ev_type =
7370 			    DAPL_IB_CME_DESTINATION_REJECT;
7371 			break;
7372 		default:
7373 			disc_ev->ee_cmev.ec_cm_ev_type =
7374 			    DAPL_IB_CME_DESTINATION_REJECT;
7375 			break;
7376 		}
7377 	} else if (event->cm_event.failed.cf_code == IBT_CM_FAILURE_TIMEOUT) {
7378 		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_TIMED_OUT;
7379 	} else {
7380 		/* others we'll mark as local failure */
7381 		disc_ev->ee_cmev.ec_cm_ev_type = DAPL_IB_CME_LOCAL_FAILURE;
7382 	}
7383 	disc_ev->ee_cmev.ec_cm_cookie = ep_rp->ep_cookie;
7384 	disc_ev->ee_cmev.ec_cm_is_passive = B_FALSE;
7385 	disc_ev->ee_cmev.ec_cm_psep_cookie = 0;
7386 
7387 	D2("rc_event_failure: enqueue event(%p) evdp(%p) cf_code(%d) "
7388 	    "cf_msg(%d) cf_reason(%d)\n", disc_ev, ep_rp->ep_conn_evd,
7389 	    (int)event->cm_event.failed.cf_code,
7390 	    (int)event->cm_event.failed.cf_msg,
7391 	    (int)event->cm_event.failed.cf_reason);
7392 
7393 	/*
7394 	 * transition ep_state to DISCONNECTED
7395 	 */
7396 	new_state = DAPLKA_EP_STATE_DISCONNECTED;
7397 	daplka_ep_set_state(ep_rp, old_state, new_state);
7398 
7399 	/*
7400 	 * enqueue event onto the conn_evd owned by ep_rp
7401 	 */
7402 	daplka_evd_wakeup(ep_rp->ep_conn_evd,
7403 	    &ep_rp->ep_conn_evd->evd_conn_events, disc_ev);
7404 
7405 	return (IBT_CM_ACCEPT);
7406 }
7407 
7408 /*
7409  * This is the active side CM handler. It gets registered when
7410  * ibt_open_rc_channel is called.
7411  */
7412 static ibt_cm_status_t
daplka_cm_rc_handler(void * cm_private,ibt_cm_event_t * event,ibt_cm_return_args_t * ret_args,void * priv_data,ibt_priv_data_len_t len)7413 daplka_cm_rc_handler(void *cm_private, ibt_cm_event_t *event,
7414     ibt_cm_return_args_t *ret_args, void *priv_data, ibt_priv_data_len_t len)
7415 {
7416 	daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)cm_private;
7417 
7418 	if (ep_rp == NULL) {
7419 		DERR("rc_handler: ep_rp == NULL\n");
7420 		return (IBT_CM_NO_CHANNEL);
7421 	}
7422 	/*
7423 	 * default is not to return priv data
7424 	 */
7425 	if (ret_args != NULL) {
7426 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ret_args))
7427 		ret_args->cm_ret_len = 0;
7428 	}
7429 
7430 	switch (event->cm_type) {
7431 	case IBT_CM_EVENT_REQ_RCV:
7432 		/* active side should not receive this event */
7433 		D2("rc_handler: IBT_CM_EVENT_REQ_RCV\n");
7434 		break;
7435 
7436 	case IBT_CM_EVENT_REP_RCV:
7437 		/* connection accepted by passive side */
7438 		D2("rc_handler: IBT_CM_EVENT_REP_RCV\n");
7439 		return (daplka_cm_rc_rep_rcv(ep_rp, event, ret_args,
7440 		    priv_data, len));
7441 
7442 	case IBT_CM_EVENT_CONN_CLOSED:
7443 		D2("rc_handler: IBT_CM_EVENT_CONN_CLOSED %d\n",
7444 		    event->cm_event.closed);
7445 		return (daplka_cm_rc_conn_closed(ep_rp, event, ret_args,
7446 		    priv_data, len));
7447 
7448 	case IBT_CM_EVENT_MRA_RCV:
7449 		/* passive side does default processing MRA event */
7450 		D2("rc_handler: IBT_CM_EVENT_MRA_RCV\n");
7451 		return (IBT_CM_DEFAULT);
7452 
7453 	case IBT_CM_EVENT_CONN_EST:
7454 		D2("rc_handler: IBT_CM_EVENT_CONN_EST\n");
7455 		return (daplka_cm_rc_conn_est(ep_rp, event, ret_args,
7456 		    priv_data, len));
7457 
7458 	case IBT_CM_EVENT_FAILURE:
7459 		D2("rc_handler: IBT_CM_EVENT_FAILURE\n");
7460 		return (daplka_cm_rc_event_failure(ep_rp, event, ret_args,
7461 		    priv_data, len));
7462 
7463 	default:
7464 		D2("rc_handler: invalid event %d\n", event->cm_type);
7465 		break;
7466 	}
7467 	return (IBT_CM_DEFAULT);
7468 }
7469 
7470 /*
7471  * creates an IA resource and inserts it into the global resource table.
7472  */
7473 /* ARGSUSED */
7474 static int
daplka_ia_create(minor_t rnum,intptr_t arg,int mode,cred_t * cred,int * rvalp)7475 daplka_ia_create(minor_t rnum, intptr_t arg, int mode,
7476 	cred_t *cred, int *rvalp)
7477 {
7478 	daplka_ia_resource_t	*ia_rp, *tmp_rp;
7479 	boolean_t		inserted = B_FALSE;
7480 	dapl_ia_create_t	args;
7481 	ibt_hca_hdl_t		hca_hdl;
7482 	ibt_status_t		status;
7483 	ib_gid_t		sgid;
7484 	int			retval;
7485 	ibt_hca_portinfo_t	*pinfop;
7486 	uint_t			pinfon;
7487 	uint_t			size;
7488 	ibt_ar_t		ar_s;
7489 	daplka_hca_t		*hca;
7490 
7491 	retval = ddi_copyin((void *)arg, &args, sizeof (dapl_ia_create_t),
7492 	    mode);
7493 	if (retval != 0) {
7494 		DERR("ia_create: copyin error %d\n", retval);
7495 		return (EFAULT);
7496 	}
7497 	if (args.ia_version != DAPL_IF_VERSION) {
7498 		DERR("ia_create: invalid version %d, expected version %d\n",
7499 		    args.ia_version, DAPL_IF_VERSION);
7500 		return (EINVAL);
7501 	}
7502 
7503 	/*
7504 	 * find the hca with the matching guid
7505 	 */
7506 	mutex_enter(&daplka_dev->daplka_mutex);
7507 	for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7508 	    hca = hca->hca_next) {
7509 		if (hca->hca_guid == args.ia_guid) {
7510 			DAPLKA_HOLD_HCA_WITHOUT_LOCK(hca);
7511 			break;
7512 		}
7513 	}
7514 	mutex_exit(&daplka_dev->daplka_mutex);
7515 
7516 	if (hca == NULL) {
7517 		DERR("ia_create: guid 0x%016llx not found\n",
7518 		    (longlong_t)args.ia_guid);
7519 		return (EINVAL);
7520 	}
7521 
7522 	/*
7523 	 * check whether port number is valid and whether it is up
7524 	 */
7525 	if (args.ia_port > hca->hca_nports) {
7526 		DERR("ia_create: invalid hca_port %d\n", args.ia_port);
7527 		DAPLKA_RELE_HCA(daplka_dev, hca);
7528 		return (EINVAL);
7529 	}
7530 	hca_hdl = hca->hca_hdl;
7531 	if (hca_hdl == NULL) {
7532 		DERR("ia_create: hca_hdl == NULL\n");
7533 		DAPLKA_RELE_HCA(daplka_dev, hca);
7534 		return (EINVAL);
7535 	}
7536 	status = ibt_query_hca_ports(hca_hdl, (uint8_t)args.ia_port,
7537 	    &pinfop, &pinfon, &size);
7538 	if (status != IBT_SUCCESS) {
7539 		DERR("ia_create: ibt_query_hca_ports returned %d\n", status);
7540 		*rvalp = (int)status;
7541 		DAPLKA_RELE_HCA(daplka_dev, hca);
7542 		return (0);
7543 	}
7544 	sgid = pinfop->p_sgid_tbl[0];
7545 	ibt_free_portinfo(pinfop, size);
7546 
7547 	ia_rp = kmem_zalloc(sizeof (daplka_ia_resource_t), daplka_km_flags);
7548 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7549 	DAPLKA_RS_INIT(ia_rp, DAPL_TYPE_IA, rnum, daplka_ia_destroy);
7550 
7551 	mutex_init(&ia_rp->ia_lock, NULL, MUTEX_DRIVER, NULL);
7552 	cv_init(&ia_rp->ia_cv, NULL, CV_DRIVER, NULL);
7553 	ia_rp->ia_hca_hdl = hca_hdl;
7554 	ia_rp->ia_hca_sgid = sgid;
7555 	ia_rp->ia_hca = hca;
7556 	ia_rp->ia_port_num = args.ia_port;
7557 	ia_rp->ia_port_pkey = args.ia_pkey;
7558 	ia_rp->ia_pid = ddi_get_pid();
7559 	ia_rp->ia_async_evd_hkeys = NULL;
7560 	ia_rp->ia_ar_registered = B_FALSE;
7561 	bcopy(args.ia_sadata, ia_rp->ia_sadata, DAPL_ATS_NBYTES);
7562 
7563 	/* register Address Record */
7564 	ar_s.ar_gid = ia_rp->ia_hca_sgid;
7565 	ar_s.ar_pkey = ia_rp->ia_port_pkey;
7566 	bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7567 #define	UC(b) ar_s.ar_data[(b)]
7568 	D3("daplka_ia_create: SA[8] %d.%d.%d.%d\n",
7569 	    UC(8), UC(9), UC(10), UC(11));
7570 	D3("daplka_ia_create: SA[12] %d.%d.%d.%d\n",
7571 	    UC(12), UC(13), UC(14), UC(15));
7572 	retval = ibt_register_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7573 	if (retval != IBT_SUCCESS) {
7574 		DERR("ia_create: failed to register Address Record.\n");
7575 		retval = EINVAL;
7576 		goto cleanup;
7577 	}
7578 	ia_rp->ia_ar_registered = B_TRUE;
7579 
7580 	/*
7581 	 * create hash tables for all object types
7582 	 */
7583 	retval = daplka_hash_create(&ia_rp->ia_ep_htbl, DAPLKA_EP_HTBL_SZ,
7584 	    daplka_hash_ep_free, daplka_hash_generic_lookup);
7585 	if (retval != 0) {
7586 		DERR("ia_create: cannot create ep hash table\n");
7587 		goto cleanup;
7588 	}
7589 	retval = daplka_hash_create(&ia_rp->ia_mr_htbl, DAPLKA_MR_HTBL_SZ,
7590 	    daplka_hash_mr_free, daplka_hash_generic_lookup);
7591 	if (retval != 0) {
7592 		DERR("ia_create: cannot create mr hash table\n");
7593 		goto cleanup;
7594 	}
7595 	retval = daplka_hash_create(&ia_rp->ia_mw_htbl, DAPLKA_MW_HTBL_SZ,
7596 	    daplka_hash_mw_free, daplka_hash_generic_lookup);
7597 	if (retval != 0) {
7598 		DERR("ia_create: cannot create mw hash table\n");
7599 		goto cleanup;
7600 	}
7601 	retval = daplka_hash_create(&ia_rp->ia_pd_htbl, DAPLKA_PD_HTBL_SZ,
7602 	    daplka_hash_pd_free, daplka_hash_generic_lookup);
7603 	if (retval != 0) {
7604 		DERR("ia_create: cannot create pd hash table\n");
7605 		goto cleanup;
7606 	}
7607 	retval = daplka_hash_create(&ia_rp->ia_evd_htbl, DAPLKA_EVD_HTBL_SZ,
7608 	    daplka_hash_evd_free, daplka_hash_generic_lookup);
7609 	if (retval != 0) {
7610 		DERR("ia_create: cannot create evd hash table\n");
7611 		goto cleanup;
7612 	}
7613 	retval = daplka_hash_create(&ia_rp->ia_cno_htbl, DAPLKA_CNO_HTBL_SZ,
7614 	    daplka_hash_cno_free, daplka_hash_generic_lookup);
7615 	if (retval != 0) {
7616 		DERR("ia_create: cannot create cno hash table\n");
7617 		goto cleanup;
7618 	}
7619 	retval = daplka_hash_create(&ia_rp->ia_sp_htbl, DAPLKA_SP_HTBL_SZ,
7620 	    daplka_hash_sp_free, daplka_hash_generic_lookup);
7621 	if (retval != 0) {
7622 		DERR("ia_create: cannot create sp hash table\n");
7623 		goto cleanup;
7624 	}
7625 	retval = daplka_hash_create(&ia_rp->ia_srq_htbl, DAPLKA_SRQ_HTBL_SZ,
7626 	    daplka_hash_srq_free, daplka_hash_generic_lookup);
7627 	if (retval != 0) {
7628 		DERR("ia_create: cannot create srq hash table\n");
7629 		goto cleanup;
7630 	}
7631 	/*
7632 	 * insert ia_rp into the global resource table
7633 	 */
7634 	retval = daplka_resource_insert(rnum, (daplka_resource_t *)ia_rp);
7635 	if (retval != 0) {
7636 		DERR("ia_create: cannot insert resource\n");
7637 		goto cleanup;
7638 	}
7639 	inserted = B_TRUE;
7640 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*ia_rp))
7641 
7642 	args.ia_resnum = rnum;
7643 	retval = copyout(&args, (void *)arg, sizeof (dapl_ia_create_t));
7644 	if (retval != 0) {
7645 		DERR("ia_create: copyout error %d\n", retval);
7646 		retval = EFAULT;
7647 		goto cleanup;
7648 	}
7649 	return (0);
7650 
7651 cleanup:;
7652 	if (inserted) {
7653 		tmp_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
7654 		if (tmp_rp != ia_rp) {
7655 			/*
7656 			 * we can return here because another thread must
7657 			 * have freed up the resource
7658 			 */
7659 			DERR("ia_create: cannot remove resource\n");
7660 			return (retval);
7661 		}
7662 	}
7663 	DAPLKA_RS_UNREF(ia_rp);
7664 	return (retval);
7665 }
7666 
7667 /*
7668  * destroys an IA resource
7669  */
7670 static int
daplka_ia_destroy(daplka_resource_t * gen_rp)7671 daplka_ia_destroy(daplka_resource_t *gen_rp)
7672 {
7673 	daplka_ia_resource_t	*ia_rp = (daplka_ia_resource_t *)gen_rp;
7674 	daplka_async_evd_hkey_t *hkp;
7675 	int			cnt;
7676 	ibt_ar_t		ar_s;
7677 
7678 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ia_rp))
7679 	D3("ia_destroy: entering, ia_rp 0x%p\n", ia_rp);
7680 
7681 	/* deregister Address Record */
7682 	if (ia_rp->ia_ar_registered) {
7683 		ar_s.ar_gid = ia_rp->ia_hca_sgid;
7684 		ar_s.ar_pkey = ia_rp->ia_port_pkey;
7685 		bcopy(ia_rp->ia_sadata, ar_s.ar_data, DAPL_ATS_NBYTES);
7686 		(void) ibt_deregister_ar(daplka_dev->daplka_clnt_hdl, &ar_s);
7687 		ia_rp->ia_ar_registered = B_FALSE;
7688 	}
7689 
7690 	/*
7691 	 * destroy hash tables. make sure resources are
7692 	 * destroyed in the correct order.
7693 	 */
7694 	daplka_hash_destroy(&ia_rp->ia_mw_htbl);
7695 	daplka_hash_destroy(&ia_rp->ia_mr_htbl);
7696 	daplka_hash_destroy(&ia_rp->ia_ep_htbl);
7697 	daplka_hash_destroy(&ia_rp->ia_srq_htbl);
7698 	daplka_hash_destroy(&ia_rp->ia_evd_htbl);
7699 	daplka_hash_destroy(&ia_rp->ia_cno_htbl);
7700 	daplka_hash_destroy(&ia_rp->ia_pd_htbl);
7701 	daplka_hash_destroy(&ia_rp->ia_sp_htbl);
7702 
7703 	/*
7704 	 * free the async evd list
7705 	 */
7706 	cnt = 0;
7707 	hkp = ia_rp->ia_async_evd_hkeys;
7708 	while (hkp != NULL) {
7709 		daplka_async_evd_hkey_t	*free_hkp;
7710 
7711 		cnt++;
7712 		free_hkp = hkp;
7713 		hkp = hkp->aeh_next;
7714 		kmem_free(free_hkp, sizeof (*free_hkp));
7715 	}
7716 	if (cnt > 0) {
7717 		D3("ia_destroy: freed %d hkeys\n", cnt);
7718 	}
7719 	mutex_destroy(&ia_rp->ia_lock);
7720 	cv_destroy(&ia_rp->ia_cv);
7721 	ia_rp->ia_hca_hdl = NULL;
7722 
7723 	DAPLKA_RS_FINI(ia_rp);
7724 
7725 	if (ia_rp->ia_hca)
7726 		DAPLKA_RELE_HCA(daplka_dev, ia_rp->ia_hca);
7727 
7728 	kmem_free(ia_rp, sizeof (daplka_ia_resource_t));
7729 	D3("ia_destroy: exiting, ia_rp 0x%p\n", ia_rp);
7730 	return (0);
7731 }
7732 
7733 static void
daplka_async_event_create(ibt_async_code_t code,ibt_async_event_t * event,uint64_t cookie,daplka_ia_resource_t * ia_rp)7734 daplka_async_event_create(ibt_async_code_t code, ibt_async_event_t *event,
7735     uint64_t cookie, daplka_ia_resource_t *ia_rp)
7736 {
7737 	daplka_evd_event_t	*evp;
7738 	daplka_evd_resource_t	*async_evd;
7739 	daplka_async_evd_hkey_t	*curr;
7740 
7741 	mutex_enter(&ia_rp->ia_lock);
7742 	curr = ia_rp->ia_async_evd_hkeys;
7743 	while (curr != NULL) {
7744 		/*
7745 		 * Note: this allocation does not zero out the buffer
7746 		 * since we init all the fields.
7747 		 */
7748 		evp = kmem_alloc(sizeof (daplka_evd_event_t), KM_NOSLEEP);
7749 		if (evp == NULL) {
7750 			DERR("async_event_enqueue: event alloc failed"
7751 			    "!found\n", ia_rp, curr->aeh_evd_hkey);
7752 			curr = curr->aeh_next;
7753 			continue;
7754 		}
7755 		evp->ee_next = NULL;
7756 		evp->ee_aev.ibae_type = code;
7757 		evp->ee_aev.ibae_hca_guid = event->ev_hca_guid;
7758 		evp->ee_aev.ibae_cookie = cookie;
7759 		evp->ee_aev.ibae_port = event->ev_port;
7760 
7761 		/*
7762 		 * Lookup the async evd corresponding to this ia and enqueue
7763 		 * evp and wakeup any waiter.
7764 		 */
7765 		async_evd = (daplka_evd_resource_t *)
7766 		    daplka_hash_lookup(&ia_rp->ia_evd_htbl, curr->aeh_evd_hkey);
7767 		if (async_evd == NULL) { /* async evd is being freed */
7768 			DERR("async_event_enqueue: ia_rp(%p) asycn_evd %llx "
7769 			    "!found\n", ia_rp, (longlong_t)curr->aeh_evd_hkey);
7770 			kmem_free(evp, sizeof (daplka_evd_event_t));
7771 			curr = curr->aeh_next;
7772 			continue;
7773 		}
7774 		daplka_evd_wakeup(async_evd, &async_evd->evd_async_events, evp);
7775 
7776 		/* decrement refcnt on async_evd */
7777 		DAPLKA_RS_UNREF(async_evd);
7778 		curr = curr->aeh_next;
7779 	}
7780 	mutex_exit(&ia_rp->ia_lock);
7781 }
7782 /*
7783  * This routine is called in kernel context
7784  */
7785 
7786 /* ARGSUSED */
7787 static void
daplka_rc_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)7788 daplka_rc_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7789     ibt_async_code_t code, ibt_async_event_t *event)
7790 {
7791 	daplka_ep_resource_t		*epp;
7792 	daplka_ia_resource_t		*ia_rp;
7793 	minor_t				ia_rnum;
7794 
7795 	if (event->ev_chan_hdl == NULL) {
7796 		DERR("daplka_rc_async_handler: ev_chan_hdl is NULL\n");
7797 		return;
7798 	}
7799 
7800 	mutex_enter(&daplka_dev->daplka_mutex);
7801 	epp = ibt_get_chan_private(event->ev_chan_hdl);
7802 	if (epp == NULL) {
7803 		mutex_exit(&daplka_dev->daplka_mutex);
7804 		DERR("daplka_rc_async_handler: chan_private is NULL\n");
7805 		return;
7806 	}
7807 
7808 	/* grab a reference to this ep */
7809 	DAPLKA_RS_REF(epp);
7810 	mutex_exit(&daplka_dev->daplka_mutex);
7811 
7812 	/*
7813 	 * The endpoint resource has the resource number corresponding to
7814 	 * the IA resource. Use that to lookup the ia resource entry
7815 	 */
7816 	ia_rnum = DAPLKA_RS_RNUM(epp);
7817 	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7818 	if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7819 		D2("daplka_rc_async_handler: resource (%d) not found\n",
7820 		    ia_rnum);
7821 		DAPLKA_RS_UNREF(epp);
7822 		return;
7823 	}
7824 
7825 	/*
7826 	 * Create an async event and chain it to the async evd
7827 	 */
7828 	daplka_async_event_create(code, event, epp->ep_cookie, ia_rp);
7829 
7830 	DAPLKA_RS_UNREF(ia_rp);
7831 	DAPLKA_RS_UNREF(epp);
7832 }
7833 
7834 /*
7835  * This routine is called in kernel context
7836  */
7837 
7838 /* ARGSUSED */
7839 static void
daplka_cq_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)7840 daplka_cq_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7841     ibt_async_code_t code, ibt_async_event_t *event)
7842 {
7843 	daplka_evd_resource_t		*evdp;
7844 	daplka_ia_resource_t		*ia_rp;
7845 	minor_t				ia_rnum;
7846 
7847 	if (event->ev_cq_hdl == NULL)
7848 		return;
7849 
7850 	mutex_enter(&daplka_dev->daplka_mutex);
7851 	evdp = ibt_get_cq_private(event->ev_cq_hdl);
7852 	if (evdp == NULL) {
7853 		mutex_exit(&daplka_dev->daplka_mutex);
7854 		DERR("daplka_cq_async_handler: get cq private(%p) failed\n",
7855 		    event->ev_cq_hdl);
7856 		return;
7857 	}
7858 	/* grab a reference to this evd resource */
7859 	DAPLKA_RS_REF(evdp);
7860 	mutex_exit(&daplka_dev->daplka_mutex);
7861 
7862 	/*
7863 	 * The endpoint resource has the resource number corresponding to
7864 	 * the IA resource. Use that to lookup the ia resource entry
7865 	 */
7866 	ia_rnum = DAPLKA_RS_RNUM(evdp);
7867 	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(ia_rnum);
7868 	if ((ia_rp == NULL) || DAPLKA_RS_RESERVED(ia_rp)) {
7869 		DERR("daplka_cq_async_handler: resource (%d) not found\n",
7870 		    ia_rnum);
7871 		DAPLKA_RS_UNREF(evdp);
7872 		return;
7873 	}
7874 
7875 	/*
7876 	 * Create an async event and chain it to the async evd
7877 	 */
7878 	daplka_async_event_create(code, event, evdp->evd_cookie, ia_rp);
7879 
7880 	/* release all the refcount that were acquired */
7881 	DAPLKA_RS_UNREF(ia_rp);
7882 	DAPLKA_RS_UNREF(evdp);
7883 }
7884 
7885 /*
7886  * This routine is called in kernel context, handles unaffiliated async errors
7887  */
7888 
7889 /* ARGSUSED */
7890 static void
daplka_un_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)7891 daplka_un_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7892     ibt_async_code_t code, ibt_async_event_t *event)
7893 {
7894 	int			i, j;
7895 	daplka_resource_blk_t	*blk;
7896 	daplka_resource_t	*rp;
7897 	daplka_ia_resource_t	*ia_rp;
7898 
7899 	/*
7900 	 * Walk the resource table looking for an ia that matches the
7901 	 * hca_hdl.
7902 	 */
7903 	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
7904 	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
7905 		blk = daplka_resource.daplka_rc_root[i];
7906 		if (blk == NULL)
7907 			continue;
7908 		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
7909 			rp = blk->daplka_rcblk_blks[j];
7910 			if ((rp == NULL) ||
7911 			    ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
7912 			    (rp->rs_type != DAPL_TYPE_IA)) {
7913 				continue;
7914 			}
7915 			/*
7916 			 * rp is an IA resource check if it belongs
7917 			 * to the hca/port for which we got the event
7918 			 */
7919 			ia_rp = (daplka_ia_resource_t *)rp;
7920 			DAPLKA_RS_REF(ia_rp);
7921 			if ((hca_hdl == ia_rp->ia_hca_hdl) &&
7922 			    (event->ev_port == ia_rp->ia_port_num)) {
7923 				/*
7924 				 * walk the ep hash table. Acquire a
7925 				 * reader lock. NULL dgid indicates
7926 				 * local port up event.
7927 				 */
7928 				daplka_hash_walk(&ia_rp->ia_ep_htbl,
7929 				    daplka_ep_failback, NULL, RW_READER);
7930 			}
7931 			DAPLKA_RS_UNREF(ia_rp);
7932 		}
7933 	}
7934 	rw_exit(&daplka_resource.daplka_rct_lock);
7935 }
7936 
7937 static int
daplka_handle_hca_detach_event(ibt_async_event_t * event)7938 daplka_handle_hca_detach_event(ibt_async_event_t *event)
7939 {
7940 	daplka_hca_t	*hca;
7941 
7942 	/*
7943 	 * find the hca with the matching guid
7944 	 */
7945 	mutex_enter(&daplka_dev->daplka_mutex);
7946 	for (hca = daplka_dev->daplka_hca_list_head; hca != NULL;
7947 	    hca = hca->hca_next) {
7948 		if (hca->hca_guid == event->ev_hca_guid) {
7949 			if (DAPLKA_HCA_BUSY(hca)) {
7950 				mutex_exit(&daplka_dev->daplka_mutex);
7951 				return (IBT_HCA_RESOURCES_NOT_FREED);
7952 			}
7953 			daplka_dequeue_hca(daplka_dev, hca);
7954 			break;
7955 		}
7956 	}
7957 	mutex_exit(&daplka_dev->daplka_mutex);
7958 
7959 	if (hca == NULL)
7960 		return (IBT_FAILURE);
7961 
7962 	return (daplka_fini_hca(daplka_dev, hca));
7963 }
7964 
7965 /*
7966  * This routine is called in kernel context
7967  */
7968 static void
daplka_async_handler(void * clnt_private,ibt_hca_hdl_t hca_hdl,ibt_async_code_t code,ibt_async_event_t * event)7969 daplka_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl,
7970     ibt_async_code_t code, ibt_async_event_t *event)
7971 {
7972 	switch (code) {
7973 	case IBT_ERROR_CATASTROPHIC_CHAN:
7974 	case IBT_ERROR_INVALID_REQUEST_CHAN:
7975 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
7976 	case IBT_ERROR_PATH_MIGRATE_REQ:
7977 		D2("daplka_async_handler(): Channel affiliated=0x%x\n", code);
7978 		/* These events are affiliated with a the RC channel */
7979 		daplka_rc_async_handler(clnt_private, hca_hdl, code, event);
7980 		break;
7981 	case IBT_ERROR_CQ:
7982 		/* This event is affiliated with a the CQ */
7983 		D2("daplka_async_handler(): IBT_ERROR_CQ\n");
7984 		daplka_cq_async_handler(clnt_private, hca_hdl, code, event);
7985 		break;
7986 	case IBT_ERROR_PORT_DOWN:
7987 		D2("daplka_async_handler(): IBT_PORT_DOWN\n");
7988 		break;
7989 	case IBT_EVENT_PORT_UP:
7990 		D2("daplka_async_handler(): IBT_PORT_UP\n");
7991 		if (daplka_apm) {
7992 			daplka_un_async_handler(clnt_private, hca_hdl, code,
7993 			    event);
7994 		}
7995 		break;
7996 	case IBT_HCA_ATTACH_EVENT:
7997 		/*
7998 		 * NOTE: In some error recovery paths, it is possible to
7999 		 * receive IBT_HCA_ATTACH_EVENTs on already known HCAs.
8000 		 */
8001 		D2("daplka_async_handler(): IBT_HCA_ATTACH\n");
8002 		(void) daplka_init_hca(daplka_dev, event->ev_hca_guid);
8003 		break;
8004 	case IBT_HCA_DETACH_EVENT:
8005 		D2("daplka_async_handler(): IBT_HCA_DETACH\n");
8006 		/* Free all hca resources and close the HCA. */
8007 		(void) daplka_handle_hca_detach_event(event);
8008 		break;
8009 	case IBT_EVENT_PATH_MIGRATED:
8010 		/* This event is affiliated with APM */
8011 		D2("daplka_async_handler(): IBT_PATH_MIGRATED.\n");
8012 		break;
8013 	default:
8014 		D2("daplka_async_handler(): unhandled code = 0x%x\n", code);
8015 		break;
8016 	}
8017 }
8018 
8019 /*
8020  * This routine is called in kernel context related to Subnet events
8021  */
8022 /*ARGSUSED*/
8023 static void
daplka_sm_notice_handler(void * arg,ib_gid_t gid,ibt_subnet_event_code_t code,ibt_subnet_event_t * event)8024 daplka_sm_notice_handler(void *arg, ib_gid_t gid, ibt_subnet_event_code_t code,
8025 	ibt_subnet_event_t *event)
8026 {
8027 	ib_gid_t *sgid = &gid;
8028 	ib_gid_t *dgid;
8029 
8030 	dgid = &event->sm_notice_gid;
8031 	switch (code) {
8032 	case IBT_SM_EVENT_GID_AVAIL:
8033 		/* This event is affiliated with remote port up */
8034 		D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_AVAIL\n");
8035 		if (daplka_apm)
8036 			daplka_sm_gid_avail(sgid, dgid);
8037 		return;
8038 	case IBT_SM_EVENT_GID_UNAVAIL:
8039 		/* This event is affiliated with remote port down */
8040 		D2("daplka_sm_notice_handler(): IBT_SM_EVENT_GID_UNAVAIL\n");
8041 		return;
8042 	default:
8043 		D2("daplka_sm_notice_handler(): unhandled IBT_SM_EVENT_[%d]\n",
8044 		    code);
8045 		return;
8046 	}
8047 }
8048 
8049 /*
8050  * This routine is called in kernel context, handles Subnet GID avail events
8051  * which correspond to remote port up. Setting up alternate path or path
8052  * migration (failback) has to be initiated from the active side of the
8053  * original connect.
8054  */
8055 static void
daplka_sm_gid_avail(ib_gid_t * sgid,ib_gid_t * dgid)8056 daplka_sm_gid_avail(ib_gid_t *sgid, ib_gid_t *dgid)
8057 {
8058 	int			i, j;
8059 	daplka_resource_blk_t	*blk;
8060 	daplka_resource_t	*rp;
8061 	daplka_ia_resource_t	*ia_rp;
8062 
8063 	D2("daplka_sm_gid_avail: sgid=%llx:%llx dgid=%llx:%llx\n",
8064 	    (longlong_t)sgid->gid_prefix, (longlong_t)sgid->gid_guid,
8065 	    (longlong_t)dgid->gid_prefix, (longlong_t)dgid->gid_guid);
8066 
8067 	/*
8068 	 * Walk the resource table looking for an ia that matches the sgid
8069 	 */
8070 	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
8071 	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
8072 		blk = daplka_resource.daplka_rc_root[i];
8073 		if (blk == NULL)
8074 			continue;
8075 		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
8076 			rp = blk->daplka_rcblk_blks[j];
8077 			if ((rp == NULL) ||
8078 			    ((intptr_t)rp == DAPLKA_RC_RESERVED) ||
8079 			    (rp->rs_type != DAPL_TYPE_IA)) {
8080 				continue;
8081 			}
8082 			/*
8083 			 * rp is an IA resource check if its gid
8084 			 * matches with the calling sgid
8085 			 */
8086 			ia_rp = (daplka_ia_resource_t *)rp;
8087 			DAPLKA_RS_REF(ia_rp);
8088 			if ((sgid->gid_prefix ==
8089 			    ia_rp->ia_hca_sgid.gid_prefix) &&
8090 			    (sgid->gid_guid == ia_rp->ia_hca_sgid.gid_guid)) {
8091 				/*
8092 				 * walk the ep hash table. Acquire a
8093 				 * reader lock.
8094 				 */
8095 				daplka_hash_walk(&ia_rp->ia_ep_htbl,
8096 				    daplka_ep_failback,
8097 				    (void *)dgid, RW_READER);
8098 			}
8099 			DAPLKA_RS_UNREF(ia_rp);
8100 		}
8101 	}
8102 	rw_exit(&daplka_resource.daplka_rct_lock);
8103 }
8104 
8105 /*
8106  * This routine is called in kernel context to get and set an alternate path
8107  */
8108 static int
daplka_ep_altpath(daplka_ep_resource_t * ep_rp,ib_gid_t * dgid)8109 daplka_ep_altpath(daplka_ep_resource_t *ep_rp, ib_gid_t *dgid)
8110 {
8111 	ibt_alt_path_info_t path_info;
8112 	ibt_alt_path_attr_t path_attr;
8113 	ibt_ap_returns_t ap_rets;
8114 	ibt_status_t status;
8115 
8116 	D2("daplka_ep_altpath : ibt_get_alt_path()\n");
8117 	bzero(&path_info, sizeof (ibt_alt_path_info_t));
8118 	bzero(&path_attr, sizeof (ibt_alt_path_attr_t));
8119 	if (dgid != NULL) {
8120 		path_attr.apa_sgid = ep_rp->ep_sgid;
8121 		path_attr.apa_dgid = *dgid;
8122 	}
8123 	status = ibt_get_alt_path(ep_rp->ep_chan_hdl, IBT_PATH_AVAIL,
8124 	    &path_attr, &path_info);
8125 	if (status != IBT_SUCCESS) {
8126 		DERR("daplka_ep_altpath : ibt_get_alt_path failed %d\n",
8127 		    status);
8128 		return (1);
8129 	}
8130 
8131 	D2("daplka_ep_altpath : ibt_set_alt_path()\n");
8132 	bzero(&ap_rets, sizeof (ibt_ap_returns_t));
8133 	status = ibt_set_alt_path(ep_rp->ep_chan_hdl, IBT_BLOCKING,
8134 	    &path_info, NULL, 0, &ap_rets);
8135 	if ((status != IBT_SUCCESS) ||
8136 	    (ap_rets.ap_status != IBT_CM_AP_LOADED)) {
8137 		DERR("daplka_ep_altpath : ibt_set_alt_path failed "
8138 		    "status %d ap_status %d\n", status, ap_rets.ap_status);
8139 		return (1);
8140 	}
8141 	return (0);
8142 }
8143 
8144 /*
8145  * This routine is called in kernel context to failback to the original path
8146  */
8147 static int
daplka_ep_failback(void * objp,void * arg)8148 daplka_ep_failback(void *objp, void *arg)
8149 {
8150 	daplka_ep_resource_t *ep_rp = (daplka_ep_resource_t *)objp;
8151 	ib_gid_t *dgid;
8152 	ibt_status_t status;
8153 	ibt_rc_chan_query_attr_t chan_attrs;
8154 	int i;
8155 
8156 	ASSERT(DAPLKA_RS_TYPE(ep_rp) == DAPL_TYPE_EP);
8157 	D2("daplka_ep_failback ep : sgid=%llx:%llx dgid=%llx:%llx\n",
8158 	    (longlong_t)ep_rp->ep_sgid.gid_prefix,
8159 	    (longlong_t)ep_rp->ep_sgid.gid_guid,
8160 	    (longlong_t)ep_rp->ep_dgid.gid_prefix,
8161 	    (longlong_t)ep_rp->ep_dgid.gid_guid);
8162 
8163 	/*
8164 	 * daplka_ep_failback is called from daplka_hash_walk
8165 	 * which holds the read lock on hash table to protect
8166 	 * the endpoint resource from removal
8167 	 */
8168 	mutex_enter(&ep_rp->ep_lock);
8169 	/* check for unconnected endpoints */
8170 	/* first check for ep state */
8171 	if (ep_rp->ep_state != DAPLKA_EP_STATE_CONNECTED) {
8172 		mutex_exit(&ep_rp->ep_lock);
8173 		D2("daplka_ep_failback : endpoints not connected\n");
8174 		return (0);
8175 	}
8176 
8177 	/* second check for gids */
8178 	if (((ep_rp->ep_sgid.gid_prefix == 0) &&
8179 	    (ep_rp->ep_sgid.gid_guid == 0)) ||
8180 	    ((ep_rp->ep_dgid.gid_prefix == 0) &&
8181 	    (ep_rp->ep_dgid.gid_guid == 0))) {
8182 		mutex_exit(&ep_rp->ep_lock);
8183 		D2("daplka_ep_failback : skip unconnected endpoints\n");
8184 		return (0);
8185 	}
8186 
8187 	/*
8188 	 * matching destination ep
8189 	 * when dgid is NULL, the async event is a local port up.
8190 	 * dgid becomes wild card, i.e. all endpoints match
8191 	 */
8192 	dgid = (ib_gid_t *)arg;
8193 	if (dgid == NULL) {
8194 		/* ignore loopback ep */
8195 		if ((ep_rp->ep_sgid.gid_prefix == ep_rp->ep_dgid.gid_prefix) &&
8196 		    (ep_rp->ep_sgid.gid_guid == ep_rp->ep_dgid.gid_guid)) {
8197 			mutex_exit(&ep_rp->ep_lock);
8198 			D2("daplka_ep_failback : skip loopback endpoints\n");
8199 			return (0);
8200 		}
8201 	} else {
8202 		/* matching remote ep */
8203 		if ((ep_rp->ep_dgid.gid_prefix != dgid->gid_prefix) ||
8204 		    (ep_rp->ep_dgid.gid_guid != dgid->gid_guid)) {
8205 			mutex_exit(&ep_rp->ep_lock);
8206 			D2("daplka_ep_failback : unrelated endpoints\n");
8207 			return (0);
8208 		}
8209 	}
8210 
8211 	/* call get and set altpath with original dgid used in ep_connect */
8212 	if (daplka_ep_altpath(ep_rp, &ep_rp->ep_dgid)) {
8213 		mutex_exit(&ep_rp->ep_lock);
8214 		return (0);
8215 	}
8216 
8217 	/*
8218 	 * wait for migration state to be ARMed
8219 	 * e.g. a post_send msg will transit mig_state from REARM to ARM
8220 	 */
8221 	for (i = 0; i < daplka_query_aft_setaltpath; i++) {
8222 		bzero(&chan_attrs, sizeof (ibt_rc_chan_query_attr_t));
8223 		status = ibt_query_rc_channel(ep_rp->ep_chan_hdl, &chan_attrs);
8224 		if (status != IBT_SUCCESS) {
8225 			mutex_exit(&ep_rp->ep_lock);
8226 			DERR("daplka_ep_altpath : ibt_query_rc_channel err\n");
8227 			return (0);
8228 		}
8229 		if (chan_attrs.rc_mig_state == IBT_STATE_ARMED)
8230 			break;
8231 	}
8232 
8233 	D2("daplka_ep_altpath : query[%d] mig_st=%d\n",
8234 	    i, chan_attrs.rc_mig_state);
8235 	D2("daplka_ep_altpath : P sgid=%llx:%llx dgid=%llx:%llx\n",
8236 	    (longlong_t)
8237 	    chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_prefix,
8238 	    (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_sgid.gid_guid,
8239 	    (longlong_t)
8240 	    chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_prefix,
8241 	    (longlong_t)chan_attrs.rc_prim_path.cep_adds_vect.av_dgid.gid_guid);
8242 	D2("daplka_ep_altpath : A sgid=%llx:%llx dgid=%llx:%llx\n",
8243 	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_prefix,
8244 	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_sgid.gid_guid,
8245 	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_prefix,
8246 	    (longlong_t)chan_attrs.rc_alt_path.cep_adds_vect.av_dgid.gid_guid);
8247 
8248 	/* skip failback on ARMed state not reached or env override */
8249 	if ((i >= daplka_query_aft_setaltpath) || (daplka_failback == 0)) {
8250 		mutex_exit(&ep_rp->ep_lock);
8251 		DERR("daplka_ep_altpath : ARMed state not reached\n");
8252 		return (0);
8253 	}
8254 
8255 	D2("daplka_ep_failback : ibt_migrate_path() to original ep\n");
8256 	status = ibt_migrate_path(ep_rp->ep_chan_hdl);
8257 	if (status != IBT_SUCCESS) {
8258 		mutex_exit(&ep_rp->ep_lock);
8259 		DERR("daplka_ep_failback : migration failed "
8260 		    "status %d\n", status);
8261 		return (0);
8262 	}
8263 
8264 	/* call get and altpath with NULL dgid to indicate unspecified dgid */
8265 	(void) daplka_ep_altpath(ep_rp, NULL);
8266 	mutex_exit(&ep_rp->ep_lock);
8267 	return (0);
8268 }
8269 
8270 /*
8271  * IBTF wrappers used for resource accounting
8272  */
8273 static ibt_status_t
daplka_ibt_alloc_rc_channel(daplka_ep_resource_t * ep_rp,ibt_hca_hdl_t hca_hdl,ibt_chan_alloc_flags_t flags,ibt_rc_chan_alloc_args_t * args,ibt_channel_hdl_t * chan_hdl_p,ibt_chan_sizes_t * sizes)8274 daplka_ibt_alloc_rc_channel(daplka_ep_resource_t *ep_rp, ibt_hca_hdl_t hca_hdl,
8275     ibt_chan_alloc_flags_t flags, ibt_rc_chan_alloc_args_t *args,
8276     ibt_channel_hdl_t *chan_hdl_p, ibt_chan_sizes_t *sizes)
8277 {
8278 	daplka_hca_t	*hca_p;
8279 	uint32_t	max_qps;
8280 	boolean_t	acct_enabled;
8281 	ibt_status_t	status;
8282 
8283 	acct_enabled = daplka_accounting_enabled;
8284 	hca_p = ep_rp->ep_hca;
8285 	max_qps = daplka_max_qp_percent * hca_p->hca_attr.hca_max_chans / 100;
8286 
8287 	if (acct_enabled) {
8288 		if (daplka_max_qp_percent != 0 &&
8289 		    max_qps <= hca_p->hca_qp_count) {
8290 			DERR("ibt_alloc_rc_channel: resource limit exceeded "
8291 			    "(limit %d, count %d)\n", max_qps,
8292 			    hca_p->hca_qp_count);
8293 			return (IBT_INSUFF_RESOURCE);
8294 		}
8295 		DAPLKA_RS_ACCT_INC(ep_rp, 1);
8296 		atomic_inc_32(&hca_p->hca_qp_count);
8297 	}
8298 	status = ibt_alloc_rc_channel(hca_hdl, flags, args, chan_hdl_p, sizes);
8299 
8300 	if (status != IBT_SUCCESS && acct_enabled) {
8301 		DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8302 		atomic_dec_32(&hca_p->hca_qp_count);
8303 	}
8304 	return (status);
8305 }
8306 
8307 static ibt_status_t
daplka_ibt_free_channel(daplka_ep_resource_t * ep_rp,ibt_channel_hdl_t chan_hdl)8308 daplka_ibt_free_channel(daplka_ep_resource_t *ep_rp, ibt_channel_hdl_t chan_hdl)
8309 {
8310 	daplka_hca_t	*hca_p;
8311 	ibt_status_t	status;
8312 
8313 	hca_p = ep_rp->ep_hca;
8314 
8315 	status = ibt_free_channel(chan_hdl);
8316 	if (status != IBT_SUCCESS) {
8317 		return (status);
8318 	}
8319 	if (DAPLKA_RS_ACCT_CHARGED(ep_rp) > 0) {
8320 		DAPLKA_RS_ACCT_DEC(ep_rp, 1);
8321 		atomic_dec_32(&hca_p->hca_qp_count);
8322 	}
8323 	return (status);
8324 }
8325 
8326 static ibt_status_t
daplka_ibt_alloc_cq(daplka_evd_resource_t * evd_rp,ibt_hca_hdl_t hca_hdl,ibt_cq_attr_t * cq_attr,ibt_cq_hdl_t * ibt_cq_p,uint32_t * real_size)8327 daplka_ibt_alloc_cq(daplka_evd_resource_t *evd_rp, ibt_hca_hdl_t hca_hdl,
8328     ibt_cq_attr_t *cq_attr, ibt_cq_hdl_t *ibt_cq_p, uint32_t *real_size)
8329 {
8330 	daplka_hca_t	*hca_p;
8331 	uint32_t	max_cqs;
8332 	boolean_t	acct_enabled;
8333 	ibt_status_t	status;
8334 
8335 	acct_enabled = daplka_accounting_enabled;
8336 	hca_p = evd_rp->evd_hca;
8337 	max_cqs = daplka_max_cq_percent * hca_p->hca_attr.hca_max_cq / 100;
8338 
8339 	if (acct_enabled) {
8340 		if (daplka_max_cq_percent != 0 &&
8341 		    max_cqs <= hca_p->hca_cq_count) {
8342 			DERR("ibt_alloc_cq: resource limit exceeded "
8343 			    "(limit %d, count %d)\n", max_cqs,
8344 			    hca_p->hca_cq_count);
8345 			return (IBT_INSUFF_RESOURCE);
8346 		}
8347 		DAPLKA_RS_ACCT_INC(evd_rp, 1);
8348 		atomic_inc_32(&hca_p->hca_cq_count);
8349 	}
8350 	status = ibt_alloc_cq(hca_hdl, cq_attr, ibt_cq_p, real_size);
8351 
8352 	if (status != IBT_SUCCESS && acct_enabled) {
8353 		DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8354 		atomic_dec_32(&hca_p->hca_cq_count);
8355 	}
8356 	return (status);
8357 }
8358 
8359 static ibt_status_t
daplka_ibt_free_cq(daplka_evd_resource_t * evd_rp,ibt_cq_hdl_t cq_hdl)8360 daplka_ibt_free_cq(daplka_evd_resource_t *evd_rp, ibt_cq_hdl_t cq_hdl)
8361 {
8362 	daplka_hca_t	*hca_p;
8363 	ibt_status_t	status;
8364 
8365 	hca_p = evd_rp->evd_hca;
8366 
8367 	status = ibt_free_cq(cq_hdl);
8368 	if (status != IBT_SUCCESS) {
8369 		return (status);
8370 	}
8371 	if (DAPLKA_RS_ACCT_CHARGED(evd_rp) > 0) {
8372 		DAPLKA_RS_ACCT_DEC(evd_rp, 1);
8373 		atomic_dec_32(&hca_p->hca_cq_count);
8374 	}
8375 	return (status);
8376 }
8377 
8378 static ibt_status_t
daplka_ibt_alloc_pd(daplka_pd_resource_t * pd_rp,ibt_hca_hdl_t hca_hdl,ibt_pd_flags_t flags,ibt_pd_hdl_t * pd_hdl_p)8379 daplka_ibt_alloc_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8380     ibt_pd_flags_t flags, ibt_pd_hdl_t *pd_hdl_p)
8381 {
8382 	daplka_hca_t	*hca_p;
8383 	uint32_t	max_pds;
8384 	boolean_t	acct_enabled;
8385 	ibt_status_t	status;
8386 
8387 	acct_enabled = daplka_accounting_enabled;
8388 	hca_p = pd_rp->pd_hca;
8389 	max_pds = daplka_max_pd_percent * hca_p->hca_attr.hca_max_pd / 100;
8390 
8391 	if (acct_enabled) {
8392 		if (daplka_max_pd_percent != 0 &&
8393 		    max_pds <= hca_p->hca_pd_count) {
8394 			DERR("ibt_alloc_pd: resource limit exceeded "
8395 			    "(limit %d, count %d)\n", max_pds,
8396 			    hca_p->hca_pd_count);
8397 			return (IBT_INSUFF_RESOURCE);
8398 		}
8399 		DAPLKA_RS_ACCT_INC(pd_rp, 1);
8400 		atomic_inc_32(&hca_p->hca_pd_count);
8401 	}
8402 	status = ibt_alloc_pd(hca_hdl, flags, pd_hdl_p);
8403 
8404 	if (status != IBT_SUCCESS && acct_enabled) {
8405 		DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8406 		atomic_dec_32(&hca_p->hca_pd_count);
8407 	}
8408 	return (status);
8409 }
8410 
8411 static ibt_status_t
daplka_ibt_free_pd(daplka_pd_resource_t * pd_rp,ibt_hca_hdl_t hca_hdl,ibt_pd_hdl_t pd_hdl)8412 daplka_ibt_free_pd(daplka_pd_resource_t *pd_rp, ibt_hca_hdl_t hca_hdl,
8413     ibt_pd_hdl_t pd_hdl)
8414 {
8415 	daplka_hca_t	*hca_p;
8416 	ibt_status_t	status;
8417 
8418 	hca_p = pd_rp->pd_hca;
8419 
8420 	status = ibt_free_pd(hca_hdl, pd_hdl);
8421 	if (status != IBT_SUCCESS) {
8422 		return (status);
8423 	}
8424 	if (DAPLKA_RS_ACCT_CHARGED(pd_rp) > 0) {
8425 		DAPLKA_RS_ACCT_DEC(pd_rp, 1);
8426 		atomic_dec_32(&hca_p->hca_pd_count);
8427 	}
8428 	return (status);
8429 }
8430 
8431 static ibt_status_t
daplka_ibt_alloc_mw(daplka_mw_resource_t * mw_rp,ibt_hca_hdl_t hca_hdl,ibt_pd_hdl_t pd_hdl,ibt_mw_flags_t flags,ibt_mw_hdl_t * mw_hdl_p,ibt_rkey_t * rkey_p)8432 daplka_ibt_alloc_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8433     ibt_pd_hdl_t pd_hdl, ibt_mw_flags_t flags, ibt_mw_hdl_t *mw_hdl_p,
8434     ibt_rkey_t *rkey_p)
8435 {
8436 	daplka_hca_t	*hca_p;
8437 	uint32_t	max_mws;
8438 	boolean_t	acct_enabled;
8439 	ibt_status_t	status;
8440 
8441 	acct_enabled = daplka_accounting_enabled;
8442 	hca_p = mw_rp->mw_hca;
8443 	max_mws = daplka_max_mw_percent * hca_p->hca_attr.hca_max_mem_win / 100;
8444 
8445 	if (acct_enabled) {
8446 		if (daplka_max_mw_percent != 0 &&
8447 		    max_mws <= hca_p->hca_mw_count) {
8448 			DERR("ibt_alloc_mw: resource limit exceeded "
8449 			    "(limit %d, count %d)\n", max_mws,
8450 			    hca_p->hca_mw_count);
8451 			return (IBT_INSUFF_RESOURCE);
8452 		}
8453 		DAPLKA_RS_ACCT_INC(mw_rp, 1);
8454 		atomic_inc_32(&hca_p->hca_mw_count);
8455 	}
8456 	status = ibt_alloc_mw(hca_hdl, pd_hdl, flags, mw_hdl_p, rkey_p);
8457 
8458 	if (status != IBT_SUCCESS && acct_enabled) {
8459 		DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8460 		atomic_dec_32(&hca_p->hca_mw_count);
8461 	}
8462 	return (status);
8463 }
8464 
8465 static ibt_status_t
daplka_ibt_free_mw(daplka_mw_resource_t * mw_rp,ibt_hca_hdl_t hca_hdl,ibt_mw_hdl_t mw_hdl)8466 daplka_ibt_free_mw(daplka_mw_resource_t *mw_rp, ibt_hca_hdl_t hca_hdl,
8467     ibt_mw_hdl_t mw_hdl)
8468 {
8469 	daplka_hca_t	*hca_p;
8470 	ibt_status_t	status;
8471 
8472 	hca_p = mw_rp->mw_hca;
8473 
8474 	status = ibt_free_mw(hca_hdl, mw_hdl);
8475 	if (status != IBT_SUCCESS) {
8476 		return (status);
8477 	}
8478 	if (DAPLKA_RS_ACCT_CHARGED(mw_rp) > 0) {
8479 		DAPLKA_RS_ACCT_DEC(mw_rp, 1);
8480 		atomic_dec_32(&hca_p->hca_mw_count);
8481 	}
8482 	return (status);
8483 }
8484 
8485 static ibt_status_t
daplka_ibt_register_mr(daplka_mr_resource_t * mr_rp,ibt_hca_hdl_t hca_hdl,ibt_pd_hdl_t pd_hdl,ibt_mr_attr_t * mr_attr,ibt_mr_hdl_t * mr_hdl_p,ibt_mr_desc_t * mr_desc_p)8486 daplka_ibt_register_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8487     ibt_pd_hdl_t pd_hdl, ibt_mr_attr_t *mr_attr, ibt_mr_hdl_t *mr_hdl_p,
8488     ibt_mr_desc_t *mr_desc_p)
8489 {
8490 	daplka_hca_t	*hca_p;
8491 	uint32_t	max_mrs;
8492 	boolean_t	acct_enabled;
8493 	ibt_status_t	status;
8494 
8495 	acct_enabled = daplka_accounting_enabled;
8496 	hca_p = mr_rp->mr_hca;
8497 	max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8498 
8499 	if (acct_enabled) {
8500 		if (daplka_max_mr_percent != 0 &&
8501 		    max_mrs <= hca_p->hca_mr_count) {
8502 			DERR("ibt_register_mr: resource limit exceeded "
8503 			    "(limit %d, count %d)\n", max_mrs,
8504 			    hca_p->hca_mr_count);
8505 			return (IBT_INSUFF_RESOURCE);
8506 		}
8507 		DAPLKA_RS_ACCT_INC(mr_rp, 1);
8508 		atomic_inc_32(&hca_p->hca_mr_count);
8509 	}
8510 	status = ibt_register_mr(hca_hdl, pd_hdl, mr_attr, mr_hdl_p, mr_desc_p);
8511 
8512 	if (status != IBT_SUCCESS && acct_enabled) {
8513 		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8514 		atomic_dec_32(&hca_p->hca_mr_count);
8515 	}
8516 	return (status);
8517 }
8518 
8519 static ibt_status_t
daplka_ibt_register_shared_mr(daplka_mr_resource_t * mr_rp,ibt_hca_hdl_t hca_hdl,ibt_mr_hdl_t mr_hdl,ibt_pd_hdl_t pd_hdl,ibt_smr_attr_t * smr_attr_p,ibt_mr_hdl_t * mr_hdl_p,ibt_mr_desc_t * mr_desc_p)8520 daplka_ibt_register_shared_mr(daplka_mr_resource_t *mr_rp,
8521     ibt_hca_hdl_t hca_hdl, ibt_mr_hdl_t mr_hdl, ibt_pd_hdl_t pd_hdl,
8522     ibt_smr_attr_t *smr_attr_p, ibt_mr_hdl_t *mr_hdl_p,
8523     ibt_mr_desc_t *mr_desc_p)
8524 {
8525 	daplka_hca_t	*hca_p;
8526 	uint32_t	max_mrs;
8527 	boolean_t	acct_enabled;
8528 	ibt_status_t	status;
8529 
8530 	acct_enabled = daplka_accounting_enabled;
8531 	hca_p = mr_rp->mr_hca;
8532 	max_mrs = daplka_max_mr_percent * hca_p->hca_attr.hca_max_memr / 100;
8533 
8534 	if (acct_enabled) {
8535 		if (daplka_max_mr_percent != 0 &&
8536 		    max_mrs <= hca_p->hca_mr_count) {
8537 			DERR("ibt_register_shared_mr: resource limit exceeded "
8538 			    "(limit %d, count %d)\n", max_mrs,
8539 			    hca_p->hca_mr_count);
8540 			return (IBT_INSUFF_RESOURCE);
8541 		}
8542 		DAPLKA_RS_ACCT_INC(mr_rp, 1);
8543 		atomic_inc_32(&hca_p->hca_mr_count);
8544 	}
8545 	status = ibt_register_shared_mr(hca_hdl, mr_hdl, pd_hdl,
8546 	    smr_attr_p, mr_hdl_p, mr_desc_p);
8547 
8548 	if (status != IBT_SUCCESS && acct_enabled) {
8549 		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8550 		atomic_dec_32(&hca_p->hca_mr_count);
8551 	}
8552 	return (status);
8553 }
8554 
8555 static ibt_status_t
daplka_ibt_deregister_mr(daplka_mr_resource_t * mr_rp,ibt_hca_hdl_t hca_hdl,ibt_mr_hdl_t mr_hdl)8556 daplka_ibt_deregister_mr(daplka_mr_resource_t *mr_rp, ibt_hca_hdl_t hca_hdl,
8557     ibt_mr_hdl_t mr_hdl)
8558 {
8559 	daplka_hca_t	*hca_p;
8560 	ibt_status_t	status;
8561 
8562 	hca_p = mr_rp->mr_hca;
8563 
8564 	status = ibt_deregister_mr(hca_hdl, mr_hdl);
8565 	if (status != IBT_SUCCESS) {
8566 		return (status);
8567 	}
8568 	if (DAPLKA_RS_ACCT_CHARGED(mr_rp) > 0) {
8569 		DAPLKA_RS_ACCT_DEC(mr_rp, 1);
8570 		atomic_dec_32(&hca_p->hca_mr_count);
8571 	}
8572 	return (status);
8573 }
8574 
8575 static ibt_status_t
daplka_ibt_alloc_srq(daplka_srq_resource_t * srq_rp,ibt_hca_hdl_t hca_hdl,ibt_srq_flags_t flags,ibt_pd_hdl_t pd,ibt_srq_sizes_t * reqsz,ibt_srq_hdl_t * srq_hdl_p,ibt_srq_sizes_t * realsz)8576 daplka_ibt_alloc_srq(daplka_srq_resource_t *srq_rp, ibt_hca_hdl_t hca_hdl,
8577     ibt_srq_flags_t flags, ibt_pd_hdl_t pd, ibt_srq_sizes_t *reqsz,
8578     ibt_srq_hdl_t *srq_hdl_p, ibt_srq_sizes_t *realsz)
8579 {
8580 	daplka_hca_t	*hca_p;
8581 	uint32_t	max_srqs;
8582 	boolean_t	acct_enabled;
8583 	ibt_status_t	status;
8584 
8585 	acct_enabled = daplka_accounting_enabled;
8586 	hca_p = srq_rp->srq_hca;
8587 	max_srqs = daplka_max_srq_percent * hca_p->hca_attr.hca_max_srqs / 100;
8588 
8589 	if (acct_enabled) {
8590 		if (daplka_max_srq_percent != 0 &&
8591 		    max_srqs <= hca_p->hca_srq_count) {
8592 			DERR("ibt_alloc_srq: resource limit exceeded "
8593 			    "(limit %d, count %d)\n", max_srqs,
8594 			    hca_p->hca_srq_count);
8595 			return (IBT_INSUFF_RESOURCE);
8596 		}
8597 		DAPLKA_RS_ACCT_INC(srq_rp, 1);
8598 		atomic_inc_32(&hca_p->hca_srq_count);
8599 	}
8600 	status = ibt_alloc_srq(hca_hdl, flags, pd, reqsz, srq_hdl_p, realsz);
8601 
8602 	if (status != IBT_SUCCESS && acct_enabled) {
8603 		DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8604 		atomic_dec_32(&hca_p->hca_srq_count);
8605 	}
8606 	return (status);
8607 }
8608 
8609 static ibt_status_t
daplka_ibt_free_srq(daplka_srq_resource_t * srq_rp,ibt_srq_hdl_t srq_hdl)8610 daplka_ibt_free_srq(daplka_srq_resource_t *srq_rp, ibt_srq_hdl_t srq_hdl)
8611 {
8612 	daplka_hca_t	*hca_p;
8613 	ibt_status_t	status;
8614 
8615 	hca_p = srq_rp->srq_hca;
8616 
8617 	D3("ibt_free_srq: %p %p\n", srq_rp, srq_hdl);
8618 
8619 	status = ibt_free_srq(srq_hdl);
8620 	if (status != IBT_SUCCESS) {
8621 		return (status);
8622 	}
8623 	if (DAPLKA_RS_ACCT_CHARGED(srq_rp) > 0) {
8624 		DAPLKA_RS_ACCT_DEC(srq_rp, 1);
8625 		atomic_dec_32(&hca_p->hca_srq_count);
8626 	}
8627 	return (status);
8628 }
8629 
8630 
8631 static int
daplka_common_ioctl(int cmd,minor_t rnum,intptr_t arg,int mode,cred_t * cred,int * rvalp)8632 daplka_common_ioctl(int cmd, minor_t rnum, intptr_t arg, int mode,
8633 	cred_t *cred, int *rvalp)
8634 {
8635 	int error;
8636 
8637 	switch (cmd) {
8638 	case DAPL_IA_CREATE:
8639 		error = daplka_ia_create(rnum, arg, mode, cred, rvalp);
8640 		break;
8641 
8642 	/* can potentially add other commands here */
8643 
8644 	default:
8645 		DERR("daplka_common_ioctl: cmd not supported\n");
8646 		error = DDI_FAILURE;
8647 	}
8648 	return (error);
8649 }
8650 
8651 static int
daplka_evd_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8652 daplka_evd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8653 	cred_t *cred, int *rvalp)
8654 {
8655 	int error;
8656 
8657 	switch (cmd) {
8658 	case DAPL_EVD_CREATE:
8659 		error = daplka_evd_create(rp, arg, mode, cred, rvalp);
8660 		break;
8661 
8662 	case DAPL_CQ_RESIZE:
8663 		error = daplka_cq_resize(rp, arg, mode, cred, rvalp);
8664 		break;
8665 
8666 	case DAPL_EVENT_POLL:
8667 		error = daplka_event_poll(rp, arg, mode, cred, rvalp);
8668 		break;
8669 
8670 	case DAPL_EVENT_WAKEUP:
8671 		error = daplka_event_wakeup(rp, arg, mode, cred, rvalp);
8672 		break;
8673 
8674 	case DAPL_EVD_MODIFY_CNO:
8675 		error = daplka_evd_modify_cno(rp, arg, mode, cred, rvalp);
8676 		break;
8677 
8678 	case DAPL_EVD_FREE:
8679 		error = daplka_evd_free(rp, arg, mode, cred, rvalp);
8680 		break;
8681 
8682 	default:
8683 		DERR("daplka_evd_ioctl: cmd not supported\n");
8684 		error = DDI_FAILURE;
8685 	}
8686 	return (error);
8687 }
8688 
8689 static int
daplka_ep_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8690 daplka_ep_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8691 	cred_t *cred, int *rvalp)
8692 {
8693 	int error;
8694 
8695 	switch (cmd) {
8696 	case DAPL_EP_MODIFY:
8697 		error = daplka_ep_modify(rp, arg, mode, cred, rvalp);
8698 		break;
8699 
8700 	case DAPL_EP_FREE:
8701 		error = daplka_ep_free(rp, arg, mode, cred, rvalp);
8702 		break;
8703 
8704 	case DAPL_EP_CONNECT:
8705 		error = daplka_ep_connect(rp, arg, mode, cred, rvalp);
8706 		break;
8707 
8708 	case DAPL_EP_DISCONNECT:
8709 		error = daplka_ep_disconnect(rp, arg, mode, cred, rvalp);
8710 		break;
8711 
8712 	case DAPL_EP_REINIT:
8713 		error = daplka_ep_reinit(rp, arg, mode, cred, rvalp);
8714 		break;
8715 
8716 	case DAPL_EP_CREATE:
8717 		error = daplka_ep_create(rp, arg, mode, cred, rvalp);
8718 		break;
8719 
8720 	default:
8721 		DERR("daplka_ep_ioctl: cmd not supported\n");
8722 		error = DDI_FAILURE;
8723 	}
8724 	return (error);
8725 }
8726 
8727 static int
daplka_mr_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8728 daplka_mr_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8729 	cred_t *cred, int *rvalp)
8730 {
8731 	int error;
8732 
8733 	switch (cmd) {
8734 	case DAPL_MR_REGISTER:
8735 		error = daplka_mr_register(rp, arg, mode, cred, rvalp);
8736 		break;
8737 
8738 	case DAPL_MR_REGISTER_LMR:
8739 		error = daplka_mr_register_lmr(rp, arg, mode, cred, rvalp);
8740 		break;
8741 
8742 	case DAPL_MR_REGISTER_SHARED:
8743 		error = daplka_mr_register_shared(rp, arg, mode, cred, rvalp);
8744 		break;
8745 
8746 	case DAPL_MR_DEREGISTER:
8747 		error = daplka_mr_deregister(rp, arg, mode, cred, rvalp);
8748 		break;
8749 
8750 	case DAPL_MR_SYNC:
8751 		error = daplka_mr_sync(rp, arg, mode, cred, rvalp);
8752 		break;
8753 
8754 	default:
8755 		DERR("daplka_mr_ioctl: cmd not supported\n");
8756 		error = DDI_FAILURE;
8757 	}
8758 	return (error);
8759 }
8760 
8761 static int
daplka_mw_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8762 daplka_mw_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8763 	cred_t *cred, int *rvalp)
8764 {
8765 	int error;
8766 
8767 	switch (cmd) {
8768 	case DAPL_MW_ALLOC:
8769 		error = daplka_mw_alloc(rp, arg, mode, cred, rvalp);
8770 		break;
8771 
8772 	case DAPL_MW_FREE:
8773 		error = daplka_mw_free(rp, arg, mode, cred, rvalp);
8774 		break;
8775 
8776 	default:
8777 		DERR("daplka_mw_ioctl: cmd not supported\n");
8778 		error = DDI_FAILURE;
8779 	}
8780 	return (error);
8781 }
8782 
8783 static int
daplka_cno_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8784 daplka_cno_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8785 	cred_t *cred, int *rvalp)
8786 {
8787 	int error;
8788 
8789 	switch (cmd) {
8790 	case DAPL_CNO_ALLOC:
8791 		error = daplka_cno_alloc(rp, arg, mode, cred, rvalp);
8792 		break;
8793 
8794 	case DAPL_CNO_FREE:
8795 		error = daplka_cno_free(rp, arg, mode, cred, rvalp);
8796 		break;
8797 
8798 	case DAPL_CNO_WAIT:
8799 		error = daplka_cno_wait(rp, arg, mode, cred, rvalp);
8800 		break;
8801 
8802 	default:
8803 		DERR("daplka_cno_ioctl: cmd not supported\n");
8804 		error = DDI_FAILURE;
8805 	}
8806 	return (error);
8807 }
8808 
8809 static int
daplka_pd_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8810 daplka_pd_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8811 	cred_t *cred, int *rvalp)
8812 {
8813 	int error;
8814 
8815 	switch (cmd) {
8816 	case DAPL_PD_ALLOC:
8817 		error = daplka_pd_alloc(rp, arg, mode, cred, rvalp);
8818 		break;
8819 
8820 	case DAPL_PD_FREE:
8821 		error = daplka_pd_free(rp, arg, mode, cred, rvalp);
8822 		break;
8823 
8824 	default:
8825 		DERR("daplka_pd_ioctl: cmd not supported\n");
8826 		error = DDI_FAILURE;
8827 	}
8828 	return (error);
8829 }
8830 
8831 static int
daplka_sp_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8832 daplka_sp_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8833 	cred_t *cred, int *rvalp)
8834 {
8835 	int error;
8836 
8837 	switch (cmd) {
8838 	case DAPL_SERVICE_REGISTER:
8839 		error = daplka_service_register(rp, arg, mode, cred, rvalp);
8840 		break;
8841 
8842 	case DAPL_SERVICE_DEREGISTER:
8843 		error = daplka_service_deregister(rp, arg, mode, cred, rvalp);
8844 		break;
8845 
8846 	default:
8847 		DERR("daplka_sp_ioctl: cmd not supported\n");
8848 		error = DDI_FAILURE;
8849 	}
8850 	return (error);
8851 }
8852 
8853 static int
daplka_srq_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8854 daplka_srq_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8855 	cred_t *cred, int *rvalp)
8856 {
8857 	int error;
8858 
8859 	switch (cmd) {
8860 	case DAPL_SRQ_CREATE:
8861 		error = daplka_srq_create(rp, arg, mode, cred, rvalp);
8862 		break;
8863 
8864 	case DAPL_SRQ_RESIZE:
8865 		error = daplka_srq_resize(rp, arg, mode, cred, rvalp);
8866 		break;
8867 
8868 	case DAPL_SRQ_FREE:
8869 		error = daplka_srq_free(rp, arg, mode, cred, rvalp);
8870 		break;
8871 
8872 	default:
8873 		DERR("daplka_srq_ioctl: cmd(%d) not supported\n", cmd);
8874 		error = DDI_FAILURE;
8875 		break;
8876 	}
8877 	return (error);
8878 }
8879 
8880 static int
daplka_misc_ioctl(int cmd,daplka_ia_resource_t * rp,intptr_t arg,int mode,cred_t * cred,int * rvalp)8881 daplka_misc_ioctl(int cmd, daplka_ia_resource_t *rp, intptr_t arg, int mode,
8882 	cred_t *cred, int *rvalp)
8883 {
8884 	int error;
8885 
8886 	switch (cmd) {
8887 	case DAPL_CR_ACCEPT:
8888 		error = daplka_cr_accept(rp, arg, mode, cred, rvalp);
8889 		break;
8890 
8891 	case DAPL_CR_REJECT:
8892 		error = daplka_cr_reject(rp, arg, mode, cred, rvalp);
8893 		break;
8894 
8895 	case DAPL_IA_QUERY:
8896 		error = daplka_ia_query(rp, arg, mode, cred, rvalp);
8897 		break;
8898 
8899 	case DAPL_CR_HANDOFF:
8900 		error = daplka_cr_handoff(rp, arg, mode, cred, rvalp);
8901 		break;
8902 
8903 	default:
8904 		DERR("daplka_misc_ioctl: cmd not supported\n");
8905 		error = DDI_FAILURE;
8906 	}
8907 	return (error);
8908 }
8909 
8910 /*ARGSUSED*/
8911 static int
daplka_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rvalp)8912 daplka_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
8913 	int *rvalp)
8914 {
8915 	daplka_ia_resource_t	*ia_rp;
8916 	minor_t			rnum;
8917 	int			error = 0;
8918 
8919 	rnum = getminor(dev);
8920 	ia_rp = (daplka_ia_resource_t *)daplka_resource_lookup(rnum);
8921 	if (ia_rp == NULL) {
8922 		DERR("ioctl: resource not found, rnum %d\n", rnum);
8923 		return (ENXIO);
8924 	}
8925 
8926 	D4("ioctl: rnum = %d, cmd = 0x%x\n", rnum, cmd);
8927 	if (DAPLKA_RS_RESERVED(ia_rp)) {
8928 		error = daplka_common_ioctl(cmd, rnum, arg, mode, cred, rvalp);
8929 		return (error);
8930 	}
8931 	if (DAPLKA_RS_TYPE(ia_rp) != DAPL_TYPE_IA) {
8932 		DERR("ioctl: invalid type %d\n", DAPLKA_RS_TYPE(ia_rp));
8933 		error = EINVAL;
8934 		goto cleanup;
8935 	}
8936 	if (ia_rp->ia_pid != ddi_get_pid()) {
8937 		DERR("ioctl: ia_pid %d != pid %d\n",
8938 		    ia_rp->ia_pid, ddi_get_pid());
8939 		error = EINVAL;
8940 		goto cleanup;
8941 	}
8942 
8943 	switch (cmd & DAPL_TYPE_MASK) {
8944 	case DAPL_TYPE_EVD:
8945 		error = daplka_evd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8946 		break;
8947 
8948 	case DAPL_TYPE_EP:
8949 		error = daplka_ep_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8950 		break;
8951 
8952 	case DAPL_TYPE_MR:
8953 		error = daplka_mr_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8954 		break;
8955 
8956 	case DAPL_TYPE_MW:
8957 		error = daplka_mw_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8958 		break;
8959 
8960 	case DAPL_TYPE_PD:
8961 		error = daplka_pd_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8962 		break;
8963 
8964 	case DAPL_TYPE_SP:
8965 		error = daplka_sp_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8966 		break;
8967 
8968 	case DAPL_TYPE_CNO:
8969 		error = daplka_cno_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8970 		break;
8971 
8972 	case DAPL_TYPE_MISC:
8973 		error = daplka_misc_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8974 		break;
8975 
8976 	case DAPL_TYPE_SRQ:
8977 		error = daplka_srq_ioctl(cmd, ia_rp, arg, mode, cred, rvalp);
8978 		break;
8979 
8980 	default:
8981 		DERR("ioctl: invalid dapl type = %d\n", DAPLKA_RS_TYPE(ia_rp));
8982 		error = DDI_FAILURE;
8983 	}
8984 
8985 cleanup:;
8986 	DAPLKA_RS_UNREF(ia_rp);
8987 	return (error);
8988 }
8989 
8990 /* ARGSUSED */
8991 static int
daplka_open(dev_t * devp,int flag,int otyp,struct cred * cred)8992 daplka_open(dev_t *devp, int flag, int otyp, struct cred *cred)
8993 {
8994 	minor_t rnum;
8995 
8996 	/*
8997 	 * Char only
8998 	 */
8999 	if (otyp != OTYP_CHR) {
9000 		return (EINVAL);
9001 	}
9002 
9003 	/*
9004 	 * Only zero can be opened, clones are used for resources.
9005 	 */
9006 	if (getminor(*devp) != DAPLKA_DRIVER_MINOR) {
9007 		DERR("daplka_open: bad minor %d\n", getminor(*devp));
9008 		return (ENODEV);
9009 	}
9010 
9011 	/*
9012 	 * - allocate new minor number
9013 	 * - update devp argument to new device
9014 	 */
9015 	if (daplka_resource_reserve(&rnum) == 0) {
9016 		*devp = makedevice(getmajor(*devp), rnum);
9017 	} else {
9018 		return (ENOMEM);
9019 	}
9020 
9021 	return (DDI_SUCCESS);
9022 }
9023 
9024 /* ARGSUSED */
9025 static int
daplka_close(dev_t dev,int flag,int otyp,struct cred * cred)9026 daplka_close(dev_t dev, int flag, int otyp, struct cred *cred)
9027 {
9028 	daplka_ia_resource_t	*ia_rp;
9029 	minor_t			rnum = getminor(dev);
9030 
9031 	/*
9032 	 * Char only
9033 	 */
9034 	if (otyp != OTYP_CHR) {
9035 		return (EINVAL);
9036 	}
9037 	D2("daplka_close: closing rnum = %d\n", rnum);
9038 	atomic_inc_32(&daplka_pending_close);
9039 
9040 	/*
9041 	 * remove from resource table.
9042 	 */
9043 	ia_rp = (daplka_ia_resource_t *)daplka_resource_remove(rnum);
9044 
9045 	/*
9046 	 * remove the initial reference
9047 	 */
9048 	if (ia_rp != NULL) {
9049 		DAPLKA_RS_UNREF(ia_rp);
9050 	}
9051 	atomic_dec_32(&daplka_pending_close);
9052 	return (DDI_SUCCESS);
9053 }
9054 
9055 
9056 /*
9057  * Resource management routines
9058  *
9059  * We start with no resource array. Each time we run out of slots, we
9060  * reallocate a new larger array and copy the pointer to the new array and
9061  * a new resource blk is allocated and added to the hash table.
9062  *
9063  * The resource control block contains:
9064  *      root    - array of pointer of resource blks
9065  *      sz      - current size of array.
9066  *      len     - last valid entry in array.
9067  *
9068  * A search operation based on a resource number is as follows:
9069  *      index = rnum / RESOURCE_BLKSZ;
9070  *      ASSERT(index < resource_block.len);
9071  *      ASSERT(index < resource_block.sz);
9072  *      offset = rnum % RESOURCE_BLKSZ;
9073  *      ASSERT(offset >= resource_block.root[index]->base);
9074  *      ASSERT(offset < resource_block.root[index]->base + RESOURCE_BLKSZ);
9075  *      return resource_block.root[index]->blks[offset];
9076  *
9077  * A resource blk is freed when its used count reaches zero.
9078  */
9079 
9080 /*
9081  * initializes the global resource table
9082  */
9083 static void
daplka_resource_init(void)9084 daplka_resource_init(void)
9085 {
9086 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(daplka_resource))
9087 	rw_init(&daplka_resource.daplka_rct_lock, NULL, RW_DRIVER, NULL);
9088 	daplka_resource.daplka_rc_len = 0;
9089 	daplka_resource.daplka_rc_sz = 0;
9090 	daplka_resource.daplka_rc_cnt = 0;
9091 	daplka_resource.daplka_rc_flag = 0;
9092 	daplka_resource.daplka_rc_root = NULL;
9093 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(daplka_resource))
9094 }
9095 
9096 /*
9097  * destroys the global resource table
9098  */
9099 static void
daplka_resource_fini(void)9100 daplka_resource_fini(void)
9101 {
9102 	int	i;
9103 
9104 	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9105 	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9106 		daplka_resource_blk_t	*blk;
9107 		int			j;
9108 
9109 		blk = daplka_resource.daplka_rc_root[i];
9110 		if (blk == NULL) {
9111 			continue;
9112 		}
9113 		for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9114 			if (blk->daplka_rcblk_blks[j] != NULL) {
9115 				DERR("resource_fini: non-null slot %d, %p\n",
9116 				    j, blk->daplka_rcblk_blks[j]);
9117 			}
9118 		}
9119 		kmem_free(blk, sizeof (*blk));
9120 		daplka_resource.daplka_rc_root[i] = NULL;
9121 	}
9122 	if (daplka_resource.daplka_rc_root != NULL) {
9123 		uint_t	sz;
9124 
9125 		sz = daplka_resource.daplka_rc_sz *
9126 		    sizeof (daplka_resource_blk_t *);
9127 		kmem_free(daplka_resource.daplka_rc_root, (uint_t)sz);
9128 		daplka_resource.daplka_rc_root = NULL;
9129 		daplka_resource.daplka_rc_len = 0;
9130 		daplka_resource.daplka_rc_sz = 0;
9131 	}
9132 	rw_exit(&daplka_resource.daplka_rct_lock);
9133 	rw_destroy(&daplka_resource.daplka_rct_lock);
9134 }
9135 
9136 /*
9137  * reserves a slot in the global resource table.
9138  * this is called by the open() syscall. it is needed because
9139  * at open() time, we do not have sufficient information to
9140  * create an IA resource. the library needs to subsequently
9141  * call daplka_ia_create to insert an IA resource into this
9142  * reserved slot.
9143  */
9144 static int
daplka_resource_reserve(minor_t * rnum)9145 daplka_resource_reserve(minor_t *rnum)
9146 {
9147 	int i, j, empty = -1;
9148 	daplka_resource_blk_t *blk;
9149 
9150 	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9151 	/*
9152 	 * Try to find an empty slot
9153 	 */
9154 	for (i = 0; i < daplka_resource.daplka_rc_len; i++) {
9155 		blk = daplka_resource.daplka_rc_root[i];
9156 		if (blk != NULL && blk->daplka_rcblk_avail > 0) {
9157 
9158 			D3("resource_alloc: available blks %d\n",
9159 			    blk->daplka_rcblk_avail);
9160 
9161 			/*
9162 			 * found an empty slot in this blk
9163 			 */
9164 			for (j = 0; j < DAPLKA_RC_BLKSZ; j++) {
9165 				if (blk->daplka_rcblk_blks[j] == NULL) {
9166 					*rnum = (minor_t)
9167 					    (j + (i * DAPLKA_RC_BLKSZ));
9168 					blk->daplka_rcblk_blks[j] =
9169 					    (daplka_resource_t *)
9170 					    DAPLKA_RC_RESERVED;
9171 					blk->daplka_rcblk_avail--;
9172 					daplka_resource.daplka_rc_cnt++;
9173 					rw_exit(&daplka_resource.
9174 					    daplka_rct_lock);
9175 					return (0);
9176 				}
9177 			}
9178 		} else if (blk == NULL && empty < 0) {
9179 			/*
9180 			 * remember first empty slot
9181 			 */
9182 			empty = i;
9183 		}
9184 	}
9185 
9186 	/*
9187 	 * Couldn't find anything, allocate a new blk
9188 	 * Do we need to reallocate the root array
9189 	 */
9190 	if (empty < 0) {
9191 		if (daplka_resource.daplka_rc_len ==
9192 		    daplka_resource.daplka_rc_sz) {
9193 			/*
9194 			 * Allocate new array and copy current stuff into it
9195 			 */
9196 			daplka_resource_blk_t	**p;
9197 			uint_t newsz = (uint_t)daplka_resource.daplka_rc_sz +
9198 			    DAPLKA_RC_BLKSZ;
9199 
9200 			D3("resource_alloc: increasing no. of buckets to %d\n",
9201 			    newsz);
9202 
9203 			p = kmem_zalloc(newsz * sizeof (*p), daplka_km_flags);
9204 
9205 			if (daplka_resource.daplka_rc_root) {
9206 				uint_t oldsz;
9207 
9208 				oldsz = (uint_t)(daplka_resource.daplka_rc_sz *
9209 				    (int)sizeof (*p));
9210 
9211 				/*
9212 				 * Copy old data into new space and
9213 				 * free old stuff
9214 				 */
9215 				bcopy(daplka_resource.daplka_rc_root, p, oldsz);
9216 				kmem_free(daplka_resource.daplka_rc_root,
9217 				    oldsz);
9218 			}
9219 
9220 			daplka_resource.daplka_rc_root = p;
9221 			daplka_resource.daplka_rc_sz = (int)newsz;
9222 		}
9223 
9224 		empty = daplka_resource.daplka_rc_len;
9225 		daplka_resource.daplka_rc_len++;
9226 
9227 		D3("resource_alloc: daplka_rc_len %d\n",
9228 		    daplka_resource.daplka_rc_len);
9229 	}
9230 
9231 	/*
9232 	 * Allocate a new blk
9233 	 */
9234 	blk = kmem_zalloc(sizeof (*blk), daplka_km_flags);
9235 	ASSERT(daplka_resource.daplka_rc_root[empty] == NULL);
9236 	daplka_resource.daplka_rc_root[empty] = blk;
9237 	blk->daplka_rcblk_avail = DAPLKA_RC_BLKSZ - 1;
9238 
9239 	/*
9240 	 * Allocate slot
9241 	 */
9242 	*rnum = (minor_t)(empty * DAPLKA_RC_BLKSZ);
9243 	blk->daplka_rcblk_blks[0] = (daplka_resource_t *)DAPLKA_RC_RESERVED;
9244 	daplka_resource.daplka_rc_cnt++;
9245 	rw_exit(&daplka_resource.daplka_rct_lock);
9246 
9247 	return (0);
9248 }
9249 
9250 /*
9251  * removes resource from global resource table
9252  */
9253 static daplka_resource_t *
daplka_resource_remove(minor_t rnum)9254 daplka_resource_remove(minor_t rnum)
9255 {
9256 	int i, j;
9257 	daplka_resource_blk_t *blk;
9258 	daplka_resource_t *p;
9259 
9260 	i = (int)(rnum / DAPLKA_RC_BLKSZ);
9261 	j = (int)(rnum % DAPLKA_RC_BLKSZ);
9262 
9263 	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9264 	if (i >= daplka_resource.daplka_rc_len) {
9265 		rw_exit(&daplka_resource.daplka_rct_lock);
9266 		DERR("resource_remove: invalid rnum %d\n", rnum);
9267 		return (NULL);
9268 	}
9269 
9270 	ASSERT(daplka_resource.daplka_rc_root);
9271 	ASSERT(i < daplka_resource.daplka_rc_len);
9272 	ASSERT(i < daplka_resource.daplka_rc_sz);
9273 	blk = daplka_resource.daplka_rc_root[i];
9274 	if (blk == NULL) {
9275 		rw_exit(&daplka_resource.daplka_rct_lock);
9276 		DERR("resource_remove: invalid rnum %d\n", rnum);
9277 		return (NULL);
9278 	}
9279 
9280 	if (blk->daplka_rcblk_blks[j] == NULL) {
9281 		rw_exit(&daplka_resource.daplka_rct_lock);
9282 		DERR("resource_remove: blk->daplka_rcblk_blks[j] == NULL\n");
9283 		return (NULL);
9284 	}
9285 	p = blk->daplka_rcblk_blks[j];
9286 	blk->daplka_rcblk_blks[j] = NULL;
9287 	blk->daplka_rcblk_avail++;
9288 	if (blk->daplka_rcblk_avail == DAPLKA_RC_BLKSZ) {
9289 		/*
9290 		 * free this blk
9291 		 */
9292 		kmem_free(blk, sizeof (*blk));
9293 		daplka_resource.daplka_rc_root[i] = NULL;
9294 	}
9295 	daplka_resource.daplka_rc_cnt--;
9296 	rw_exit(&daplka_resource.daplka_rct_lock);
9297 
9298 	if ((intptr_t)p == DAPLKA_RC_RESERVED) {
9299 		return (NULL);
9300 	} else {
9301 		return (p);
9302 	}
9303 }
9304 
9305 /*
9306  * inserts resource into the slot designated by rnum
9307  */
9308 static int
daplka_resource_insert(minor_t rnum,daplka_resource_t * rp)9309 daplka_resource_insert(minor_t rnum, daplka_resource_t *rp)
9310 {
9311 	int i, j, error = -1;
9312 	daplka_resource_blk_t *blk;
9313 
9314 	/*
9315 	 * Find resource and lock it in WRITER mode
9316 	 * search for available resource slot
9317 	 */
9318 
9319 	i = (int)(rnum / DAPLKA_RC_BLKSZ);
9320 	j = (int)(rnum % DAPLKA_RC_BLKSZ);
9321 
9322 	rw_enter(&daplka_resource.daplka_rct_lock, RW_WRITER);
9323 	if (i >= daplka_resource.daplka_rc_len) {
9324 		rw_exit(&daplka_resource.daplka_rct_lock);
9325 		DERR("resource_insert: resource %d not found\n", rnum);
9326 		return (-1);
9327 	}
9328 
9329 	blk = daplka_resource.daplka_rc_root[i];
9330 	if (blk != NULL) {
9331 		ASSERT(i < daplka_resource.daplka_rc_len);
9332 		ASSERT(i < daplka_resource.daplka_rc_sz);
9333 
9334 		if ((intptr_t)blk->daplka_rcblk_blks[j] == DAPLKA_RC_RESERVED) {
9335 			blk->daplka_rcblk_blks[j] = rp;
9336 			error = 0;
9337 		} else {
9338 			DERR("resource_insert: %d not reserved, blk = %p\n",
9339 			    rnum, blk->daplka_rcblk_blks[j]);
9340 		}
9341 	} else {
9342 		DERR("resource_insert: resource %d not found\n", rnum);
9343 	}
9344 	rw_exit(&daplka_resource.daplka_rct_lock);
9345 	return (error);
9346 }
9347 
9348 /*
9349  * finds resource using minor device number
9350  */
9351 static daplka_resource_t *
daplka_resource_lookup(minor_t rnum)9352 daplka_resource_lookup(minor_t rnum)
9353 {
9354 	int i, j;
9355 	daplka_resource_blk_t *blk;
9356 	daplka_resource_t *rp;
9357 
9358 	/*
9359 	 * Find resource and lock it in READER mode
9360 	 * search for available resource slot
9361 	 */
9362 
9363 	i = (int)(rnum / DAPLKA_RC_BLKSZ);
9364 	j = (int)(rnum % DAPLKA_RC_BLKSZ);
9365 
9366 	rw_enter(&daplka_resource.daplka_rct_lock, RW_READER);
9367 	if (i >= daplka_resource.daplka_rc_len) {
9368 		rw_exit(&daplka_resource.daplka_rct_lock);
9369 		DERR("resource_lookup: resource %d not found\n", rnum);
9370 		return (NULL);
9371 	}
9372 
9373 	blk = daplka_resource.daplka_rc_root[i];
9374 	if (blk != NULL) {
9375 		ASSERT(i < daplka_resource.daplka_rc_len);
9376 		ASSERT(i < daplka_resource.daplka_rc_sz);
9377 
9378 		rp = blk->daplka_rcblk_blks[j];
9379 		if (rp == NULL || (intptr_t)rp == DAPLKA_RC_RESERVED) {
9380 			D3("resource_lookup: %d not found, blk = %p\n",
9381 			    rnum, blk->daplka_rcblk_blks[j]);
9382 		} else {
9383 			DAPLKA_RS_REF((daplka_ia_resource_t *)rp);
9384 		}
9385 	} else {
9386 		DERR("resource_lookup: resource %d not found\n", rnum);
9387 		rp = NULL;
9388 	}
9389 	rw_exit(&daplka_resource.daplka_rct_lock);
9390 	return (rp);
9391 }
9392 
9393 /*
9394  * generic hash table implementation
9395  */
9396 
9397 /*
9398  * daplka_hash_create:
9399  *	initializes a hash table with the specified parameters
9400  *
9401  * input:
9402  *	htblp			pointer to hash table
9403  *
9404  *	nbuckets		number of buckets (must be power of 2)
9405  *
9406  *	free_func		this function is called on each hash
9407  *				table element when daplka_hash_destroy
9408  *				is called
9409  *
9410  *	lookup_func		if daplka_hash_lookup is able to find
9411  *				the desired object, this function is
9412  *				applied on the object before
9413  *				daplka_hash_lookup returns
9414  * output:
9415  *	none
9416  *
9417  * return value(s):
9418  *	EINVAL			nbuckets is not a power of 2
9419  *	ENOMEM			cannot allocate buckets
9420  *	0			success
9421  */
9422 static int
daplka_hash_create(daplka_hash_table_t * htblp,uint_t nbuckets,void (* free_func)(void *),void (* lookup_func)(void *))9423 daplka_hash_create(daplka_hash_table_t *htblp, uint_t nbuckets,
9424 	void (*free_func)(void *), void (*lookup_func)(void *))
9425 {
9426 	int i;
9427 
9428 	if ((nbuckets & ~(nbuckets - 1)) != nbuckets) {
9429 		DERR("hash_create: nbuckets not power of 2\n");
9430 		return (EINVAL);
9431 	}
9432 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*htblp))
9433 
9434 	htblp->ht_buckets =
9435 	    kmem_zalloc(sizeof (daplka_hash_bucket_t) * nbuckets,
9436 	    daplka_km_flags);
9437 	if (htblp->ht_buckets == NULL) {
9438 		DERR("hash_create: cannot allocate buckets\n");
9439 		return (ENOMEM);
9440 	}
9441 	for (i = 0; i < nbuckets; i++) {
9442 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9443 		htblp->ht_buckets[i].hb_count = 0;
9444 		htblp->ht_buckets[i].hb_entries = NULL;
9445 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(htblp->ht_buckets[i]))
9446 	}
9447 	rw_init(&htblp->ht_table_lock, NULL, RW_DRIVER, NULL);
9448 	mutex_init(&htblp->ht_key_lock, NULL, MUTEX_DRIVER, NULL);
9449 
9450 	htblp->ht_count = 0;
9451 	htblp->ht_next_hkey = (uint64_t)gethrtime();
9452 	htblp->ht_nbuckets = nbuckets;
9453 	htblp->ht_free_func = free_func;
9454 	htblp->ht_lookup_func = lookup_func;
9455 	htblp->ht_initialized = B_TRUE;
9456 	D3("hash_create: done, buckets = %d\n", nbuckets);
9457 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*htblp))
9458 	return (0);
9459 }
9460 
9461 /*
9462  * daplka_hash_insert:
9463  *	inserts an object into a hash table
9464  *
9465  * input:
9466  *	htblp			pointer to hash table
9467  *
9468  *	hkeyp			pointer to hash key.
9469  *				*hkeyp being non-zero means that the caller
9470  *				has generated its own hkey. if *hkeyp is zero,
9471  *				this function will generate an hkey for the
9472  *				caller. it is recommended that the caller
9473  *				leave the hkey generation to this function
9474  *				because the hkey is more likely to be evenly
9475  *				distributed.
9476  *
9477  *	objp			pointer to object to be inserted into
9478  *				hash table
9479  *
9480  * output:
9481  *	hkeyp			the generated hkey is returned via this pointer
9482  *
9483  * return value(s):
9484  *	EINVAL			invalid parameter
9485  *	ENOMEM			cannot allocate hash entry
9486  *	0			successful
9487  */
9488 static int
daplka_hash_insert(daplka_hash_table_t * htblp,uint64_t * hkeyp,void * objp)9489 daplka_hash_insert(daplka_hash_table_t *htblp, uint64_t *hkeyp, void *objp)
9490 {
9491 	daplka_hash_entry_t *hep, *curr_hep;
9492 	daplka_hash_bucket_t *hbp;
9493 	uint32_t bucket;
9494 	uint64_t hkey;
9495 
9496 	if (hkeyp == NULL) {
9497 		DERR("hash_insert: hkeyp == NULL\n");
9498 		return (EINVAL);
9499 	}
9500 	hep = kmem_zalloc(sizeof (*hep), daplka_km_flags);
9501 	if (hep == NULL) {
9502 		DERR("hash_insert: cannot alloc hash_entry\n");
9503 		return (ENOMEM);
9504 	}
9505 	if (*hkeyp == 0) {
9506 		/* generate a new key */
9507 		mutex_enter(&htblp->ht_key_lock);
9508 		hkey = ++htblp->ht_next_hkey;
9509 		if (hkey == 0) {
9510 			hkey = htblp->ht_next_hkey = (uint64_t)gethrtime();
9511 		}
9512 		mutex_exit(&htblp->ht_key_lock);
9513 	} else {
9514 		/* use user generated key */
9515 		hkey = *hkeyp;
9516 	}
9517 
9518 	/* only works if ht_nbuckets is a power of 2 */
9519 	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9520 	ASSERT(objp != NULL);
9521 	ASSERT(bucket < htblp->ht_nbuckets);
9522 
9523 	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9524 	hep->he_hkey = hkey;
9525 	hep->he_objp = objp;
9526 
9527 	/* look for duplicate entries */
9528 	hbp = &htblp->ht_buckets[bucket];
9529 	curr_hep = hbp->hb_entries;
9530 	while (curr_hep != NULL) {
9531 		if (curr_hep->he_hkey == hep->he_hkey) {
9532 			break;
9533 		}
9534 		curr_hep = curr_hep->he_next;
9535 	}
9536 	if (curr_hep != NULL) {
9537 		DERR("hash_insert: found duplicate hash entry: "
9538 		    "bucket %d, hkey 0x%016llx\n",
9539 		    bucket, (longlong_t)hep->he_hkey);
9540 		kmem_free(hep, sizeof (*hep));
9541 		rw_exit(&htblp->ht_table_lock);
9542 		return (EINVAL);
9543 	}
9544 	hep->he_next = hbp->hb_entries;
9545 	hbp->hb_entries = hep;
9546 	hbp->hb_count++;
9547 	htblp->ht_count++;
9548 	rw_exit(&htblp->ht_table_lock);
9549 
9550 	if (*hkeyp == 0) {
9551 		*hkeyp = hkey;
9552 		ASSERT(*hkeyp != 0);
9553 	}
9554 	D3("hash_insert: htblp 0x%p, hkey = 0x%016llx, bucket = %d\n",
9555 	    htblp, (longlong_t)*hkeyp, bucket);
9556 	return (0);
9557 }
9558 
9559 /*
9560  * daplka_hash_remove:
9561  *	removes object identified by hkey from hash table
9562  *
9563  * input:
9564  *	htblp			pointer to hash table
9565  *
9566  *	hkey			hkey that identifies the object to be removed
9567  *
9568  * output:
9569  *	objpp			pointer to pointer to object.
9570  *				if remove is successful, the removed object
9571  *				will be returned via *objpp.
9572  *
9573  * return value(s):
9574  *	EINVAL			cannot find hash entry
9575  *	0			successful
9576  */
9577 static int
daplka_hash_remove(daplka_hash_table_t * htblp,uint64_t hkey,void ** objpp)9578 daplka_hash_remove(daplka_hash_table_t *htblp, uint64_t hkey, void **objpp)
9579 {
9580 	daplka_hash_entry_t	*free_hep, **curr_hepp;
9581 	daplka_hash_bucket_t	*hbp;
9582 	uint32_t		bucket;
9583 
9584 	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9585 
9586 	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9587 	hbp = &htblp->ht_buckets[bucket];
9588 
9589 	curr_hepp = &hbp->hb_entries;
9590 	while (*curr_hepp != NULL) {
9591 		if ((*curr_hepp)->he_hkey == hkey) {
9592 			break;
9593 		}
9594 		curr_hepp = &(*curr_hepp)->he_next;
9595 	}
9596 	if (*curr_hepp == NULL) {
9597 		DERR("hash_remove: cannot find hash entry: "
9598 		    "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9599 		rw_exit(&htblp->ht_table_lock);
9600 		return (EINVAL);
9601 	} else {
9602 		if (objpp != NULL) {
9603 			*objpp = (*curr_hepp)->he_objp;
9604 		}
9605 		free_hep = *curr_hepp;
9606 		*curr_hepp = (*curr_hepp)->he_next;
9607 		kmem_free(free_hep, sizeof (*free_hep));
9608 	}
9609 	hbp->hb_count--;
9610 	htblp->ht_count--;
9611 	D3("hash_remove: removed entry, hkey 0x%016llx, bucket %d, "
9612 	    "hb_count %d, hb_count %d\n",
9613 	    (longlong_t)hkey, bucket, hbp->hb_count, htblp->ht_count);
9614 	rw_exit(&htblp->ht_table_lock);
9615 	return (0);
9616 }
9617 
9618 /*
9619  * daplka_hash_walk:
9620  *	walks through the entire hash table. applying func on each of
9621  *	the inserted objects. stops walking if func returns non-zero.
9622  *
9623  * input:
9624  *	htblp			pointer to hash table
9625  *
9626  *	func			function to be applied on each object
9627  *
9628  *	farg			second argument to func
9629  *
9630  *	lockmode		can be RW_WRITER or RW_READER. this
9631  *				allows the caller to choose what type
9632  *				of lock to acquire before walking the
9633  *				table.
9634  *
9635  * output:
9636  *	none
9637  *
9638  * return value(s):
9639  *	none
9640  */
9641 static void
daplka_hash_walk(daplka_hash_table_t * htblp,int (* func)(void *,void *),void * farg,krw_t lockmode)9642 daplka_hash_walk(daplka_hash_table_t *htblp, int (*func)(void *, void *),
9643 	void *farg, krw_t lockmode)
9644 {
9645 	daplka_hash_entry_t *curr_hep;
9646 	daplka_hash_bucket_t *hbp;
9647 	uint32_t bucket, retval = 0;
9648 
9649 	ASSERT(lockmode == RW_WRITER || lockmode == RW_READER);
9650 
9651 	/* needed for warlock */
9652 	if (lockmode == RW_WRITER) {
9653 		rw_enter(&htblp->ht_table_lock, RW_WRITER);
9654 	} else {
9655 		rw_enter(&htblp->ht_table_lock, RW_READER);
9656 	}
9657 	for (bucket = 0; bucket < htblp->ht_nbuckets && retval == 0; bucket++) {
9658 		hbp = &htblp->ht_buckets[bucket];
9659 		curr_hep = hbp->hb_entries;
9660 		while (curr_hep != NULL) {
9661 			retval = (*func)(curr_hep->he_objp, farg);
9662 			if (retval != 0) {
9663 				break;
9664 			}
9665 			curr_hep = curr_hep->he_next;
9666 		}
9667 	}
9668 	rw_exit(&htblp->ht_table_lock);
9669 }
9670 
9671 /*
9672  * daplka_hash_lookup:
9673  *	finds object from hkey
9674  *
9675  * input:
9676  *	htblp			pointer to hash table
9677  *
9678  *	hkey			hkey that identifies the object to be looked up
9679  *
9680  * output:
9681  *	none
9682  *
9683  * return value(s):
9684  *	NULL			if not found
9685  *	object pointer		if found
9686  */
9687 static void *
daplka_hash_lookup(daplka_hash_table_t * htblp,uint64_t hkey)9688 daplka_hash_lookup(daplka_hash_table_t *htblp, uint64_t hkey)
9689 {
9690 	daplka_hash_entry_t *curr_hep;
9691 	uint32_t bucket;
9692 	void *objp;
9693 
9694 	bucket = (uint32_t)(hkey & (htblp->ht_nbuckets - 1));
9695 
9696 	rw_enter(&htblp->ht_table_lock, RW_READER);
9697 	curr_hep = htblp->ht_buckets[bucket].hb_entries;
9698 	while (curr_hep != NULL) {
9699 		if (curr_hep->he_hkey == hkey) {
9700 			break;
9701 		}
9702 		curr_hep = curr_hep->he_next;
9703 	}
9704 	if (curr_hep == NULL) {
9705 		DERR("hash_lookup: cannot find hash entry: "
9706 		    "bucket %d, hkey 0x%016llx\n", bucket, (longlong_t)hkey);
9707 		rw_exit(&htblp->ht_table_lock);
9708 		return (NULL);
9709 	}
9710 	objp = curr_hep->he_objp;
9711 	ASSERT(objp != NULL);
9712 	if (htblp->ht_lookup_func != NULL) {
9713 		(*htblp->ht_lookup_func)(objp);
9714 	}
9715 	rw_exit(&htblp->ht_table_lock);
9716 	return (objp);
9717 }
9718 
9719 /*
9720  * daplka_hash_destroy:
9721  *	destroys hash table. applies free_func on all inserted objects.
9722  *
9723  * input:
9724  *	htblp			pointer to hash table
9725  *
9726  * output:
9727  *	none
9728  *
9729  * return value(s):
9730  *	none
9731  */
9732 static void
daplka_hash_destroy(daplka_hash_table_t * htblp)9733 daplka_hash_destroy(daplka_hash_table_t *htblp)
9734 {
9735 	daplka_hash_entry_t *curr_hep, *free_hep;
9736 	daplka_hash_entry_t *free_list = NULL;
9737 	daplka_hash_bucket_t *hbp;
9738 	uint32_t bucket, cnt, total = 0;
9739 
9740 	if (!htblp->ht_initialized) {
9741 		DERR("hash_destroy: not initialized\n");
9742 		return;
9743 	}
9744 	/* free all elements from hash table */
9745 	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9746 	for (bucket = 0; bucket < htblp->ht_nbuckets; bucket++) {
9747 		hbp = &htblp->ht_buckets[bucket];
9748 
9749 		/* build list of elements to be freed */
9750 		curr_hep = hbp->hb_entries;
9751 		cnt = 0;
9752 		while (curr_hep != NULL) {
9753 			cnt++;
9754 			free_hep = curr_hep;
9755 			curr_hep = curr_hep->he_next;
9756 
9757 			free_hep->he_next = free_list;
9758 			free_list = free_hep;
9759 		}
9760 		ASSERT(cnt == hbp->hb_count);
9761 		total += cnt;
9762 		hbp->hb_count = 0;
9763 		hbp->hb_entries = NULL;
9764 	}
9765 	ASSERT(total == htblp->ht_count);
9766 	D3("hash_destroy: htblp 0x%p, nbuckets %d, freed %d hash entries\n",
9767 	    htblp, htblp->ht_nbuckets, total);
9768 	rw_exit(&htblp->ht_table_lock);
9769 
9770 	/* free all objects, now without holding the hash table lock */
9771 	cnt = 0;
9772 	while (free_list != NULL) {
9773 		cnt++;
9774 		free_hep = free_list;
9775 		free_list = free_list->he_next;
9776 		if (htblp->ht_free_func != NULL) {
9777 			(*htblp->ht_free_func)(free_hep->he_objp);
9778 		}
9779 		kmem_free(free_hep, sizeof (*free_hep));
9780 	}
9781 	ASSERT(total == cnt);
9782 
9783 	/* free hash buckets and destroy locks */
9784 	kmem_free(htblp->ht_buckets,
9785 	    sizeof (daplka_hash_bucket_t) * htblp->ht_nbuckets);
9786 
9787 	rw_enter(&htblp->ht_table_lock, RW_WRITER);
9788 	htblp->ht_buckets = NULL;
9789 	htblp->ht_count = 0;
9790 	htblp->ht_nbuckets = 0;
9791 	htblp->ht_free_func = NULL;
9792 	htblp->ht_lookup_func = NULL;
9793 	htblp->ht_initialized = B_FALSE;
9794 	rw_exit(&htblp->ht_table_lock);
9795 
9796 	mutex_destroy(&htblp->ht_key_lock);
9797 	rw_destroy(&htblp->ht_table_lock);
9798 }
9799 
9800 /*
9801  * daplka_hash_getsize:
9802  *	return the number of objects in hash table
9803  *
9804  * input:
9805  *	htblp			pointer to hash table
9806  *
9807  * output:
9808  *	none
9809  *
9810  * return value(s):
9811  *	number of objects in hash table
9812  */
9813 static uint32_t
daplka_hash_getsize(daplka_hash_table_t * htblp)9814 daplka_hash_getsize(daplka_hash_table_t *htblp)
9815 {
9816 	uint32_t sz;
9817 
9818 	rw_enter(&htblp->ht_table_lock, RW_READER);
9819 	sz = htblp->ht_count;
9820 	rw_exit(&htblp->ht_table_lock);
9821 
9822 	return (sz);
9823 }
9824 
9825 /*
9826  * this function is used as ht_lookup_func above when lookup is called.
9827  * other types of objs may use a more elaborate lookup_func.
9828  */
9829 static void
daplka_hash_generic_lookup(void * obj)9830 daplka_hash_generic_lookup(void *obj)
9831 {
9832 	daplka_resource_t	*rp = (daplka_resource_t *)obj;
9833 
9834 	mutex_enter(&rp->rs_reflock);
9835 	rp->rs_refcnt++;
9836 	ASSERT(rp->rs_refcnt != 0);
9837 	mutex_exit(&rp->rs_reflock);
9838 }
9839 
9840 /*
9841  * Generates a non-zero 32 bit hash key used for the timer hash table.
9842  */
9843 static uint32_t
daplka_timer_hkey_gen()9844 daplka_timer_hkey_gen()
9845 {
9846 	uint32_t new_hkey;
9847 
9848 	do {
9849 		new_hkey = atomic_inc_32_nv(&daplka_timer_hkey);
9850 	} while (new_hkey == 0);
9851 
9852 	return (new_hkey);
9853 }
9854 
9855 
9856 /*
9857  * The DAPL KA debug logging routines
9858  */
9859 
9860 /*
9861  * Add the string str to the end of the debug log, followed by a newline.
9862  */
9863 static void
daplka_dbglog(char * str)9864 daplka_dbglog(char *str)
9865 {
9866 	size_t	length;
9867 	size_t	remlen;
9868 
9869 	/*
9870 	 * If this is the first time we've written to the log, initialize it.
9871 	 */
9872 	if (!daplka_dbginit) {
9873 		return;
9874 	}
9875 	mutex_enter(&daplka_dbglock);
9876 	/*
9877 	 * Note the log is circular; if this string would run over the end,
9878 	 * we copy the first piece to the end and then the last piece to
9879 	 * the beginning of the log.
9880 	 */
9881 	length = strlen(str);
9882 
9883 	remlen = (size_t)sizeof (daplka_dbgbuf) - daplka_dbgnext - 1;
9884 
9885 	if (length > remlen) {
9886 		if (remlen)
9887 			bcopy(str, daplka_dbgbuf + daplka_dbgnext, remlen);
9888 		daplka_dbgbuf[sizeof (daplka_dbgbuf) - 1] = (char)NULL;
9889 		str += remlen;
9890 		length -= remlen;
9891 		daplka_dbgnext = 0;
9892 	}
9893 	bcopy(str, daplka_dbgbuf + daplka_dbgnext, length);
9894 	daplka_dbgnext += length;
9895 
9896 	if (daplka_dbgnext >= sizeof (daplka_dbgbuf))
9897 		daplka_dbgnext = 0;
9898 	mutex_exit(&daplka_dbglock);
9899 }
9900 
9901 
9902 /*
9903  * Add a printf-style message to whichever debug logs we're currently using.
9904  */
9905 static void
daplka_debug(const char * fmt,...)9906 daplka_debug(const char *fmt, ...)
9907 {
9908 	char	buff[512];
9909 	va_list	ap;
9910 	/*
9911 	 * The system prepends the thread id and high resolution time
9912 	 * (nanoseconds are dropped and so are the upper digits)
9913 	 * to the specified string.
9914 	 * The unit for timestamp is 10 microseconds.
9915 	 * It wraps around every 10000 seconds.
9916 	 * Ex: gethrtime() = X ns = X/1000 us = X/10000 10 micro sec.
9917 	 */
9918 	int	micro_time = (int)((gethrtime() / 10000) % 1000000000);
9919 	(void) sprintf(buff, "th %p tm %9d: ", (void *)curthread, micro_time);
9920 
9921 	va_start(ap, fmt);
9922 	(void) vsprintf(buff+strlen(buff), fmt, ap);
9923 	va_end(ap);
9924 
9925 	daplka_dbglog(buff);
9926 }
9927 
9928 static void
daplka_console(const char * fmt,...)9929 daplka_console(const char *fmt, ...)
9930 {
9931 	char buff[512];
9932 	va_list ap;
9933 
9934 	va_start(ap, fmt);
9935 	(void) vsprintf(buff, fmt, ap);
9936 	va_end(ap);
9937 
9938 	cmn_err(CE_CONT, "%s", buff);
9939 }
9940