xref: /illumos-gate/usr/src/lib/udapl/udapl_tavor/tavor/dapl_tavor_ibtf_util.c (revision 41d4805f3c6abade283ec1c338ef8c94ef6f4fd4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "dapl.h"
28 #include "dapl_adapter_util.h"
29 #include "dapl_evd_util.h"
30 #include "dapl_cr_util.h"
31 #include "dapl_lmr_util.h"
32 #include "dapl_rmr_util.h"
33 #include "dapl_cookie.h"
34 #include "dapl_ring_buffer_util.h"
35 #include "dapl_vendor.h"
36 #include "dapl_tavor_ibtf_impl.h"
37 
38 /* Function prototypes */
39 static DAT_RETURN dapli_ib_cq_resize_internal(DAPL_EVD *, DAT_COUNT);
40 
41 /*
42  * The following declarations/fn are to used by the base library
43  * place holder for now
44  */
45 
46 int	g_loopback_connection = 0;
47 
48 /*
49  * dapl_ib_cq_alloc
50  *
51  * Alloc a CQ
52  *
53  * Input:
54  *	ia_handle		IA handle
55  *	evd_ptr			pointer to EVD struct
56  *	cno_ptr			pointer to CNO struct
57  *	cqlen			minimum QLen
58  *
59  * Output:
60  * 	none
61  *
62  * Returns:
63  * 	DAT_SUCCESS
64  *	DAT_INSUFFICIENT_RESOURCES
65  *
66  */
67 DAT_RETURN
dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr,IN DAT_COUNT * cqlen)68 dapls_ib_cq_alloc(
69 	IN  DAPL_IA		*ia_ptr,
70 	IN  DAPL_EVD		*evd_ptr,
71 	IN  DAPL_CNO		*cno_ptr,
72 	IN  DAT_COUNT		*cqlen)
73 {
74 	dapl_evd_create_t	create_msg;
75 	dapl_evd_free_t		free_msg;
76 	ib_cq_handle_t		cq_handle = IB_INVALID_HANDLE;
77 	int			ia_fd;
78 	int			hca_fd;
79 	int			retval;
80 	mlnx_umap_cq_data_out_t	*mcq;
81 
82 	/* cq handle is created even for non-cq type events */
83 	/* since cq handle is where the evd fd gets stored. */
84 	cq_handle = (ib_cq_handle_t)
85 	    dapl_os_alloc(sizeof (struct dapls_ib_cq_handle));
86 	if (cq_handle == NULL) {
87 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
88 		    "cq_alloc: evd_ptr 0x%p, cq_handle == NULL\n",
89 		    evd_ptr);
90 		return (DAT_INSUFFICIENT_RESOURCES);
91 	}
92 
93 	(void) dapl_os_memzero(cq_handle, sizeof (*cq_handle));
94 
95 	/* get the hca information from ia_ptr */
96 	(void) dapl_os_memzero(&create_msg, sizeof (create_msg));
97 	create_msg.evd_flags = evd_ptr->evd_flags;
98 	create_msg.evd_cookie = (uintptr_t)evd_ptr;
99 	if (cno_ptr != NULL) {
100 		create_msg.evd_cno_hkey =
101 		    (uint64_t)cno_ptr->ib_cno_handle;
102 	}
103 	if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
104 		create_msg.evd_cq_size = (uint32_t)*cqlen;
105 	}
106 
107 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
108 	    "cq_alloc: evd 0x%p, flags 0x%x, cookie 0x%llx, hkey 0x%llx,\n"
109 	    "          cno_hkey 0x%llx, cq_size %d\n", evd_ptr,
110 	    create_msg.evd_flags, create_msg.evd_cookie, create_msg.evd_hkey,
111 	    create_msg.evd_cno_hkey, create_msg.evd_cq_size);
112 
113 	ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd;
114 	hca_fd = ia_ptr->hca_ptr->ib_hca_handle->hca_fd;
115 	mcq = (mlnx_umap_cq_data_out_t *)create_msg.evd_cq_data_out;
116 
117 	/* The next line is only needed for backward compatibility */
118 	mcq->mcq_rev = MLNX_UMAP_IF_VERSION;
119 
120 	/* call into driver to allocate cq */
121 	retval = ioctl(ia_fd, DAPL_EVD_CREATE, &create_msg);
122 	if (retval != 0 || mcq->mcq_rev != MLNX_UMAP_IF_VERSION) {
123 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
124 		    "cq_alloc: evd_create failed, %s\n", strerror(errno));
125 		dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
126 		return (dapls_convert_error(errno, retval));
127 	}
128 	(void) dapl_os_memzero(cq_handle, sizeof (struct dapls_ib_cq_handle));
129 	dapl_os_lock_init(&cq_handle->cq_wrid_wqhdr_lock);
130 
131 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
132 	    "cq_alloc: created, evd 0x%p, hkey 0x%016llx\n\n", evd_ptr,
133 	    create_msg.evd_hkey);
134 
135 	cq_handle->evd_hkey = create_msg.evd_hkey;
136 
137 	if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
138 
139 		/*
140 		 * allocate a hash table for wrid management, the key is
141 		 * a combination of QPnumber and SEND/RECV type. This is
142 		 * required only for evd which have a CQ mapped to
143 		 * it.
144 		 */
145 		if (DAT_SUCCESS != dapls_hash_create(DAPL_MED_HASHSIZE,
146 		    DAT_FALSE, &cq_handle->cq_wrid_wqhdr_list)) {
147 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
148 			    "cq_alloc: hash_create failed\n");
149 			dapl_os_free(cq_handle,
150 			    sizeof (struct dapls_ib_cq_handle));
151 			return (DAT_INSUFFICIENT_RESOURCES |
152 			    DAT_RESOURCE_MEMORY);
153 		}
154 
155 		dapl_os_assert(create_msg.evd_cq_real_size > 0);
156 
157 		/* In the case of Arbel or Hermon */
158 		if (mcq->mcq_polldbr_mapoffset != 0 ||
159 		    mcq->mcq_polldbr_maplen != 0)
160 			cq_handle->cq_poll_dbp = dapls_ib_get_dbp(
161 			    mcq->mcq_polldbr_maplen, hca_fd,
162 			    mcq->mcq_polldbr_mapoffset,
163 			    mcq->mcq_polldbr_offset);
164 		if (mcq->mcq_armdbr_mapoffset != 0 ||
165 		    mcq->mcq_armdbr_maplen != 0)
166 			cq_handle->cq_arm_dbp = dapls_ib_get_dbp(
167 			    mcq->mcq_armdbr_maplen, hca_fd,
168 			    mcq->mcq_armdbr_mapoffset,
169 			    mcq->mcq_armdbr_offset);
170 
171 		cq_handle->cq_addr = (tavor_hw_cqe_t *)(void *) mmap64(
172 		    (void *)0, mcq->mcq_maplen,
173 		    (PROT_READ | PROT_WRITE), MAP_SHARED, hca_fd,
174 		    mcq->mcq_mapoffset);
175 
176 		if (cq_handle->cq_addr == MAP_FAILED ||
177 		    cq_handle->cq_poll_dbp == MAP_FAILED ||
178 		    cq_handle->cq_arm_dbp == MAP_FAILED) {
179 			free_msg.evf_hkey = cq_handle->evd_hkey;
180 			retval = ioctl(ia_fd, DAPL_EVD_FREE, &free_msg);
181 			if (retval != 0) {
182 				dapl_dbg_log(DAPL_DBG_TYPE_ERR,
183 				    "cq_alloc: EVD_FREE err:%s\n",
184 				    strerror(errno));
185 			}
186 
187 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
188 			    "cq_alloc: DAPL_CQ_ALLOC failed\n");
189 			/* free the hash table we created */
190 			(void) dapls_hash_free(cq_handle->cq_wrid_wqhdr_list);
191 			dapl_os_free(cq_handle,
192 			    sizeof (struct dapls_ib_cq_handle));
193 			return (DAT_INSUFFICIENT_RESOURCES);
194 		}
195 
196 		cq_handle->cq_map_offset = mcq->mcq_mapoffset;
197 		cq_handle->cq_map_len = mcq->mcq_maplen;
198 		cq_handle->cq_num = mcq->mcq_cqnum;
199 		/*
200 		 * cq_size is the actual depth of the CQ which is 1 more
201 		 * than what ibt_alloc_cq reports. However the application
202 		 * can only use (cq_size - 1) entries.
203 		 */
204 		cq_handle->cq_size = create_msg.evd_cq_real_size + 1;
205 		cq_handle->cq_cqesz = mcq->mcq_cqesz;
206 		cq_handle->cq_iauar = ia_ptr->hca_ptr->ib_hca_handle->ia_uar;
207 		*cqlen = create_msg.evd_cq_real_size;
208 
209 		DAPL_INIT_CQ(ia_ptr)(cq_handle);
210 	}
211 
212 	evd_ptr->ib_cq_handle = cq_handle;
213 	return (DAT_SUCCESS);
214 }
215 
216 
217 /*
218  * dapl_ib_cq_resize
219  *
220  * Resize a CQ
221  *
222  * Input:
223  *	evd_ptr			pointer to EVD struct
224  *	cqlen			new length of the cq
225  * Output:
226  *	none
227  *
228  * Returns:
229  *	DAT_SUCCESS
230  *	DAT_INVALID_HANDLE
231  *	DAT_INTERNAL_ERROR
232  *	DAT_INSUFFICIENT_RESOURCES
233  *
234  */
235 DAT_RETURN
dapls_ib_cq_resize(IN DAPL_EVD * evd_ptr,IN DAT_COUNT cqlen)236 dapls_ib_cq_resize(
237 	IN  DAPL_EVD		*evd_ptr,
238 	IN  DAT_COUNT		cqlen)
239 {
240 	ib_cq_handle_t	cq_handle;
241 	DAT_RETURN	dat_status;
242 
243 	dat_status = dapli_ib_cq_resize_internal(evd_ptr, cqlen);
244 	if (DAT_INSUFFICIENT_RESOURCES == dat_status) {
245 		cq_handle = evd_ptr->ib_cq_handle;
246 		/* attempt to resize back to the current size */
247 		dat_status = dapli_ib_cq_resize_internal(evd_ptr,
248 		    cq_handle->cq_size - 1);
249 		if (DAT_SUCCESS != dat_status) {
250 			/*
251 			 * XXX this is catastrophic need to post an event
252 			 * to the async evd
253 			 */
254 			return (DAT_INTERNAL_ERROR);
255 		}
256 	}
257 
258 	return (dat_status);
259 }
260 
261 /*
262  * dapli_ib_cq_resize_internal
263  *
264  * An internal routine to resize a CQ.
265  *
266  * Input:
267  *	evd_ptr			pointer to EVD struct
268  *	cqlen			new length of the cq
269  * Output:
270  *	none
271  *
272  * Returns:
273  *	DAT_SUCCESS
274  *	DAT_INVALID_HANDLE
275  *	DAT_INSUFFICIENT_RESOURCES
276  *
277  */
278 static DAT_RETURN
dapli_ib_cq_resize_internal(IN DAPL_EVD * evd_ptr,IN DAT_COUNT cqlen)279 dapli_ib_cq_resize_internal(
280 	IN  DAPL_EVD		*evd_ptr,
281 	IN  DAT_COUNT		cqlen)
282 {
283 	ib_cq_handle_t		cq_handle;
284 	dapl_cq_resize_t	resize_msg;
285 	int			ia_fd;
286 	int			hca_fd;
287 	int			retval;
288 	mlnx_umap_cq_data_out_t	*mcq;
289 	DAPL_HCA		*hca_ptr;
290 	dapls_hw_cqe_t		cq_addr;
291 
292 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
293 	    "dapls_ib_cq_resize: evd 0x%p cq 0x%p "
294 	    "evd_hkey 0x%016llx cqlen %d\n",
295 	    evd_ptr, (void *)evd_ptr->ib_cq_handle,
296 	    evd_ptr->ib_cq_handle->evd_hkey, cqlen);
297 
298 	cq_handle = evd_ptr->ib_cq_handle;
299 	/*
300 	 * Since CQs are created in powers of 2 with one non-usable slot,
301 	 * its possible that the previously allocated CQ has sufficient
302 	 * entries. If the current cq is big enough and it is mapped in
303 	 * we are done.
304 	 */
305 	if ((cqlen < cq_handle->cq_size) && (cq_handle->cq_addr)) {
306 		return (DAT_SUCCESS);
307 	}
308 
309 	hca_ptr = evd_ptr->header.owner_ia->hca_ptr;
310 
311 	/* unmap the CQ before resizing it */
312 	if (hca_ptr->hermon_resize_cq == 0) {
313 		if ((cq_handle->cq_addr) &&
314 		    (munmap((char *)cq_handle->cq_addr,
315 		    cq_handle->cq_map_len) < 0)) {
316 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
317 			    "cq_resize: munmap(%p:0x%llx) failed(%d)\n",
318 			    cq_handle->cq_addr, cq_handle->cq_map_len, errno);
319 			return (DAT_INVALID_HANDLE);
320 		}
321 		/* cq_addr is unmapped and no longer valid */
322 		cq_handle->cq_addr = NULL;
323 	}
324 
325 	ia_fd = hca_ptr->ib_hca_handle->ia_fd;
326 	hca_fd = hca_ptr->ib_hca_handle->hca_fd;
327 
328 	(void) dapl_os_memzero(&resize_msg, sizeof (resize_msg));
329 	mcq = (mlnx_umap_cq_data_out_t *)resize_msg.cqr_cq_data_out;
330 	resize_msg.cqr_evd_hkey = cq_handle->evd_hkey;
331 	resize_msg.cqr_cq_new_size = cqlen;
332 
333 	/* The next line is only needed for backward compatibility */
334 	mcq->mcq_rev = MLNX_UMAP_IF_VERSION;
335 	retval = ioctl(ia_fd, DAPL_CQ_RESIZE, &resize_msg);
336 	if (retval != 0 || mcq->mcq_rev != MLNX_UMAP_IF_VERSION) {
337 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
338 		    "dapls_ib_cq_resize: evd 0x%p, err: %s\n",
339 		    evd_ptr, strerror(errno));
340 		if (errno == EINVAL) { /* Couldn't find evd for this cq */
341 			return (DAT_INVALID_HANDLE);
342 		} else { /* Need to retry resize with a smaller qlen */
343 			return (DAT_INSUFFICIENT_RESOURCES);
344 		}
345 	}
346 
347 	dapl_os_assert(cq_handle->cq_num == mcq->mcq_cqnum);
348 
349 	/* In the case of Arbel or Hermon */
350 	if (mcq->mcq_polldbr_mapoffset != 0 ||
351 	    mcq->mcq_polldbr_maplen != 0)
352 		cq_handle->cq_poll_dbp = dapls_ib_get_dbp(
353 		    mcq->mcq_polldbr_maplen, hca_fd,
354 		    mcq->mcq_polldbr_mapoffset,
355 		    mcq->mcq_polldbr_offset);
356 	if (mcq->mcq_armdbr_mapoffset != 0 ||
357 	    mcq->mcq_armdbr_maplen != 0)
358 		cq_handle->cq_arm_dbp = dapls_ib_get_dbp(
359 		    mcq->mcq_armdbr_maplen, hca_fd,
360 		    mcq->mcq_armdbr_mapoffset,
361 		    mcq->mcq_armdbr_offset);
362 
363 	cq_addr = (tavor_hw_cqe_t *)(void *)mmap64((void *)0,
364 	    mcq->mcq_maplen, (PROT_READ | PROT_WRITE),
365 	    MAP_SHARED, hca_fd, mcq->mcq_mapoffset);
366 
367 	if (cq_addr == MAP_FAILED ||
368 	    cq_handle->cq_poll_dbp == MAP_FAILED ||
369 	    cq_handle->cq_arm_dbp == MAP_FAILED) {
370 		if (hca_ptr->hermon_resize_cq == 0)
371 			cq_handle->cq_addr = NULL;
372 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
373 		    "cq_resize: mmap failed(%d)\n", errno);
374 		/* Need to retry resize with a smaller qlen */
375 		return (DAT_INSUFFICIENT_RESOURCES);
376 	}
377 
378 	if (hca_ptr->hermon_resize_cq == 0) {
379 		cq_handle->cq_addr = cq_addr;
380 		cq_handle->cq_map_offset = mcq->mcq_mapoffset;
381 		cq_handle->cq_map_len = mcq->mcq_maplen;
382 		cq_handle->cq_size = resize_msg.cqr_cq_real_size + 1;
383 		cq_handle->cq_cqesz = mcq->mcq_cqesz;
384 		/*
385 		 * upon resize the old events are moved to the start of the CQ
386 		 * hence we need to reset the consumer index too
387 		 */
388 		cq_handle->cq_consindx = 0;
389 	} else {	/* Hermon */
390 		cq_handle->cq_resize_addr = cq_addr;
391 		cq_handle->cq_resize_map_offset = mcq->mcq_mapoffset;
392 		cq_handle->cq_resize_map_len = mcq->mcq_maplen;
393 		cq_handle->cq_resize_size = resize_msg.cqr_cq_real_size + 1;
394 		cq_handle->cq_resize_cqesz = mcq->mcq_cqesz;
395 	}
396 
397 	return (DAT_SUCCESS);
398 }
399 
400 /*
401  * dapl_ib_cq_free
402  *
403  * Free a CQ
404  *
405  * Input:
406  *	ia_handle		IA handle
407  *	evd_ptr			pointer to EVD struct
408  * Output:
409  *	none
410  *
411  * Returns:
412  *	DAT_SUCCESS
413  *	DAT_INVALID_HANDLE
414  *	DAT_INSUFFICIENT_RESOURCES
415  *
416  */
417 DAT_RETURN
dapls_ib_cq_free(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr)418 dapls_ib_cq_free(
419 	IN  DAPL_IA		*ia_ptr,
420 	IN  DAPL_EVD		*evd_ptr)
421 {
422 	dapl_evd_free_t		args;
423 	int			retval;
424 	ib_cq_handle_t		cq_handle = evd_ptr->ib_cq_handle;
425 
426 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
427 	    "dapls_ib_cq_free: evd 0x%p cq 0x%p hkey %016llx\n", evd_ptr,
428 	    (void *)evd_ptr->ib_cq_handle, evd_ptr->ib_cq_handle->evd_hkey);
429 
430 	/* If the cq was mmap'd unmap it before freeing it */
431 	if ((cq_handle->cq_addr) &&
432 	    (munmap((char *)cq_handle->cq_addr, cq_handle->cq_map_len) < 0)) {
433 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
434 			    "cq_free: (%p:0x%llx)\n", cq_handle->cq_addr,
435 			    cq_handle->cq_map_len);
436 	}
437 
438 
439 	args.evf_hkey = cq_handle->evd_hkey;
440 
441 	retval = ioctl(ia_ptr->hca_ptr->ib_hca_handle->ia_fd,
442 	    DAPL_EVD_FREE, &args);
443 	if (retval != 0) {
444 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
445 		    "dapls_ib_cq_free: evd 0x%p, err: %s\n",
446 		    evd_ptr, strerror(errno));
447 		return (dapls_convert_error(errno, retval));
448 	}
449 
450 	dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
451 	evd_ptr->ib_cq_handle = NULL;
452 
453 	return (DAT_SUCCESS);
454 }
455 
456 /*
457  * dapl_set_cq_notify
458  *
459  * Set up CQ completion notifications
460  *
461  * Input:
462  *	ia_handle		IA handle
463  *	evd_ptr			pointer to EVD struct
464  *
465  * Output:
466  *	none
467  *
468  * Returns:
469  *	DAT_SUCCESS
470  *	DAT_INVALID_HANDLE
471  *	DAT_INSUFFICIENT_RESOURCES
472  *
473  */
474 /* ARGSUSED */
475 DAT_RETURN
dapls_set_cq_notify(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr)476 dapls_set_cq_notify(
477 	IN  DAPL_IA		*ia_ptr,
478 	IN  DAPL_EVD		*evd_ptr)
479 {
480 	int			retval;
481 	ib_cq_handle_t		cq_handle = evd_ptr->ib_cq_handle;
482 
483 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
484 	    "dapls_ib_cq_notify: evd 0x%p cq 0x%p\n", evd_ptr,
485 	    (void *)cq_handle);
486 
487 	retval = DAPL_NOTIFY(evd_ptr)(cq_handle, IB_NOTIFY_ON_NEXT_COMP, 0);
488 
489 	return (retval);
490 
491 }
492 
493 /* ARGSUSED */
494 DAT_RETURN
dapls_set_cqN_notify(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr,IN uint32_t num_events)495 dapls_set_cqN_notify(
496 	IN  DAPL_IA		*ia_ptr,
497 	IN  DAPL_EVD		*evd_ptr,
498 	IN  uint32_t		num_events)
499 {
500 	int			retval;
501 	ib_cq_handle_t		cq_handle = evd_ptr->ib_cq_handle;
502 
503 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
504 	    "dapls_set_cqN_notify:evd %p cq %p num_events %d\n", evd_ptr,
505 	    (void *)cq_handle, num_events);
506 
507 	retval = DAPL_NOTIFY(evd_ptr)(cq_handle, IB_NOTIFY_ON_NEXT_NCOMP,
508 	    num_events);
509 
510 	return (retval);
511 
512 }
513 
514 /*
515  * dapls_ib_cqd_create
516  *
517  * Set up CQ notification event thread
518  *
519  * Input:
520  *	ia_handle		IA handle
521  *
522  * Output:
523  * 	none
524  *
525  * Returns:
526  * 	DAT_SUCCESS
527  *	DAT_INVALID_HANDLE
528  *	DAT_INSUFFICIENT_RESOURCES
529  *
530  */
531 /* ARGSUSED */
532 DAT_RETURN
dapls_ib_cqd_create(IN DAPL_HCA * hca_ptr)533 dapls_ib_cqd_create(
534 	IN  DAPL_HCA		*hca_ptr)
535 {
536 	return (DAT_SUCCESS);
537 }
538 
539 
540 /*
541  * dapl_cqd_destroy
542  *
543  * Destroy CQ notification event thread
544  *
545  * Input:
546  *	ia_handle		IA handle
547  *
548  * Output:
549  * 	none
550  *
551  * Returns:
552  * 	DAT_SUCCESS
553  *	DAT_INVALID_HANDLE
554  *	DAT_INSUFFICIENT_RESOURCES
555  *
556  */
557 DAT_RETURN
dapls_ib_cqd_destroy(IN DAPL_HCA * hca_ptr)558 dapls_ib_cqd_destroy(
559 	IN  DAPL_HCA		*hca_ptr)
560 {
561 	dapl_evd_free_t		args;
562 	ib_cq_handle_t		cq_handle;
563 	int			retval;
564 
565 	if (hca_ptr->null_ib_cq_handle != IB_INVALID_HANDLE) {
566 		/* free up the dummy cq */
567 		cq_handle = hca_ptr->null_ib_cq_handle;
568 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
569 		    "dapls_ib_cqd_destroy: cq %p\n", (void *)cq_handle);
570 
571 		args.evf_hkey = cq_handle->evd_hkey;
572 
573 		retval = ioctl(hca_ptr->ib_hca_handle->ia_fd,
574 		    DAPL_EVD_FREE, &args);
575 		if (retval != 0) {
576 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
577 			    "dapls_ib_cqd_destroy: EVD_FREE err:%d errno:%d\n",
578 			    retval, errno);
579 		}
580 
581 		dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
582 		hca_ptr->null_ib_cq_handle = IB_INVALID_HANDLE;
583 	}
584 
585 	return (DAT_SUCCESS);
586 }
587 
588 
589 /*
590  * dapl_ib_pd_alloc
591  *
592  * Alloc a PD
593  *
594  * Input:
595  *	ia_handle		IA handle
596  *	PZ_ptr			pointer to PZEVD struct
597  *
598  * Output:
599  * 	none
600  *
601  * Returns:
602  * 	DAT_SUCCESS
603  *	DAT_INSUFFICIENT_RESOURCES
604  *
605  */
606 DAT_RETURN
dapls_ib_pd_alloc(IN DAPL_IA * ia,IN DAPL_PZ * pz)607 dapls_ib_pd_alloc(
608 	IN  DAPL_IA 		*ia,
609 	IN  DAPL_PZ 		*pz)
610 {
611 	struct dapls_ib_pd_handle *pd_p;
612 	dapl_pd_alloc_t args;
613 	int retval;
614 
615 	pd_p = (struct dapls_ib_pd_handle *)dapl_os_alloc(sizeof (*pd_p));
616 	if (pd_p == NULL) {
617 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
618 		    "pd_alloc: ia 0x%p, pz 0x%p, cannot allocate pd\n",
619 		    ia, pz);
620 		return (DAT_INSUFFICIENT_RESOURCES);
621 	}
622 	retval = ioctl(ia->hca_ptr->ib_hca_handle->ia_fd,
623 	    DAPL_PD_ALLOC, &args);
624 	if (retval != 0) {
625 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
626 		    "pd_alloc: ia 0x%p, pz 0x%p, cannot create pd, "
627 		    "err: %s\n", ia, pz, strerror(errno));
628 		dapl_os_free(pd_p, sizeof (*pd_p));
629 		return (dapls_convert_error(errno, retval));
630 	}
631 
632 	pd_p->pd_hkey = args.pda_hkey;
633 	pz->pd_handle = pd_p;
634 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
635 	    "pd_alloc: successful, ia 0x%p, pz 0x%p, hkey %016llx\n",
636 	    ia, pz, args.pda_hkey);
637 
638 	return (DAT_SUCCESS);
639 }
640 
641 
642 /*
643  * dapl_ib_pd_free
644  *
645  * Free a PD
646  *
647  * Input:
648  *	ia_handle		IA handle
649  *	PZ_ptr			pointer to PZ struct
650  *
651  * Output:
652  * 	none
653  *
654  * Returns:
655  * 	DAT_SUCCESS
656  *	DAT_INSUFFICIENT_RESOURCES
657  *
658  */
659 DAT_RETURN
dapls_ib_pd_free(IN DAPL_PZ * pz)660 dapls_ib_pd_free(
661 	IN  DAPL_PZ 		*pz)
662 {
663 	struct dapls_ib_pd_handle *pd_p;
664 	dapl_pd_free_t args;
665 	int retval;
666 
667 	pd_p = (struct dapls_ib_pd_handle *)pz->pd_handle;
668 	args.pdf_hkey = pd_p->pd_hkey;
669 
670 	retval = ioctl(pz->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd,
671 	    DAPL_PD_FREE, &args);
672 	if (retval != 0) {
673 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
674 		    "pd_free: pz 0x%p, cannot free pd\n", pz);
675 		return (dapls_convert_error(errno, retval));
676 	}
677 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
678 	    "pd_free: pz 0x%p, hkey %016llx, freed\n", pz, pd_p->pd_hkey);
679 	dapl_os_free((void *)pd_p, sizeof (*pd_p));
680 	pz->pd_handle = NULL;
681 	return (DAT_SUCCESS);
682 }
683 
684 
685 /*
686  * dapl_ib_mr_register
687  *
688  * Register a virtual memory region
689  *
690  * Input:
691  *	ia_handle		IA handle
692  *	lmr			pointer to dapl_lmr struct
693  *	virt_addr		virtual address of beginning of mem region
694  *	length			length of memory region
695  *
696  * Output:
697  * 	none
698  *
699  * Returns:
700  * 	DAT_SUCCESS
701  *	DAT_INSUFFICIENT_RESOURCES
702  *
703  */
704 DAT_RETURN
dapls_ib_mr_register(IN DAPL_IA * ia,IN DAPL_LMR * lmr,IN DAT_PVOID virt_addr,IN DAT_VLEN length,IN DAT_MEM_PRIV_FLAGS privileges)705 dapls_ib_mr_register(
706 	IN  DAPL_IA 		*ia,
707 	IN  DAPL_LMR		*lmr,
708 	IN  DAT_PVOID		virt_addr,
709 	IN  DAT_VLEN		length,
710 	IN  DAT_MEM_PRIV_FLAGS  privileges)
711 {
712 	dapl_mr_register_t	reg_msg;
713 	ib_mr_handle_t		mr_handle;
714 	DAPL_PZ *		pz_handle;
715 	int			ia_fd;
716 	int			retval;
717 
718 	ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
719 	mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
720 	if (mr_handle == NULL) {
721 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
722 		    "mr_register: lmr 0x%p, ia 0x%p, "
723 		    "cannot alloc mr_handle\n", lmr, ia);
724 		return (DAT_INSUFFICIENT_RESOURCES);
725 	}
726 	pz_handle = ((DAPL_PZ *)lmr->param.pz_handle);
727 	if (pz_handle == NULL) {
728 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
729 		    "mr_register: lmr 0x%p, ia 0x%p, "
730 		    "pz_handle == NULL!\n", lmr, ia);
731 		dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
732 		return (DAT_INVALID_PARAMETER);
733 	}
734 	reg_msg.mr_pd_hkey = pz_handle->pd_handle->pd_hkey;
735 	reg_msg.mr_vaddr = (ib_vaddr_t)(uintptr_t)virt_addr;
736 	reg_msg.mr_len = (ib_memlen_t)length;
737 	reg_msg.mr_flags = (ibt_mr_flags_t)
738 	    dapl_lmr_convert_privileges(privileges);
739 	reg_msg.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND;
740 
741 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
742 	    "mr_register: lmr 0x%p, pd_hkey 0x%016llx, vaddr 0x%016llx, "
743 	    "len %llu, flags 0x%x\n", lmr, reg_msg.mr_pd_hkey,
744 	    reg_msg.mr_vaddr, reg_msg.mr_len, reg_msg.mr_flags);
745 
746 	/* call into driver to allocate MR resource */
747 	retval = ioctl(ia_fd, DAPL_MR_REGISTER, &reg_msg);
748 	if (retval != 0) {
749 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
750 		    "mr_register: lmr 0x%p, failed (%s)\n",
751 		    lmr, strerror(errno));
752 		dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
753 		return (dapls_convert_error(errno, retval));
754 	}
755 	mr_handle->mr_hkey = reg_msg.mr_hkey;
756 	lmr->param.lmr_context = (DAT_LMR_CONTEXT)reg_msg.mr_lkey;
757 	lmr->param.rmr_context = (DAT_RMR_CONTEXT)reg_msg.mr_rkey;
758 	lmr->param.registered_address = reg_msg.mr_vaddr;
759 	lmr->param.registered_size = reg_msg.mr_len;
760 	lmr->mr_handle = mr_handle;
761 
762 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
763 	    "mr_register: successful, lmr 0x%p, mr_hkey 0x%016llx, "
764 	    "lmr_ctx 0x%08x\n\n", lmr, reg_msg.mr_hkey,
765 	    reg_msg.mr_lkey);
766 	return (DAT_SUCCESS);
767 }
768 
769 /*
770  * dapl_ib_mr_register_shared
771  *
772  * Register a shared virtual memory region
773  *
774  * Input:
775  *	ia_handle		IA handle
776  *	lmr			pointer to dapl_lmr struct
777  *	virt_addr		virtual address of beginning of mem region
778  *	cookie			shared memory identifer
779  *	length			length of memory region
780  *
781  * Output:
782  * 	none
783  *
784  * Returns:
785  * 	DAT_SUCCESS
786  *	DAT_INSUFFICIENT_RESOURCES
787  *
788  */
789 DAT_RETURN
dapls_ib_mr_register_shared(IN DAPL_IA * ia,IN DAPL_LMR * lmr,IN DAT_PVOID virt_addr,IN DAT_VLEN length,IN DAT_LMR_COOKIE cookie,IN DAT_MEM_PRIV_FLAGS privileges)790 dapls_ib_mr_register_shared(
791 	IN  DAPL_IA 		*ia,
792 	IN  DAPL_LMR		*lmr,
793 	IN  DAT_PVOID		virt_addr,
794 	IN  DAT_VLEN		length,
795 	IN  DAT_LMR_COOKIE	cookie,
796 	IN  DAT_MEM_PRIV_FLAGS  privileges)
797 {
798 	dapl_mr_register_shared_t	reg_msg;
799 	ib_mr_handle_t			mr_handle;
800 	DAPL_PZ				*pz_handle;
801 	int				ia_fd, i;
802 	int				retval;
803 
804 	ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
805 	mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
806 	if (mr_handle == NULL) {
807 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
808 		    "mr_register_shared: lmr 0x%p, ia 0x%p, "
809 		    "cannot alloc mr_handle\n", lmr, ia);
810 		return (DAT_INSUFFICIENT_RESOURCES);
811 	}
812 	pz_handle = ((DAPL_PZ *)lmr->param.pz_handle);
813 	if (pz_handle == NULL) {
814 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
815 		    "mr_register_shared: lmr 0x%p, ia 0x%p, "
816 		    "pz_handle == NULL!\n", lmr, ia);
817 		dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
818 		return (DAT_INVALID_PARAMETER);
819 	}
820 	reg_msg.mrs_pd_hkey = pz_handle->pd_handle->pd_hkey;
821 	reg_msg.mrs_vaddr = (ib_vaddr_t)(uintptr_t)virt_addr;
822 	reg_msg.mrs_len = (ib_memlen_t)length;
823 	reg_msg.mrs_flags = (ibt_mr_flags_t)
824 	    dapl_lmr_convert_privileges(privileges);
825 	reg_msg.mrs_flags |= IBT_MR_ENABLE_WINDOW_BIND;
826 	/*CONSTCOND*/
827 	dapl_os_assert(DAT_LMR_COOKIE_SIZE == sizeof (reg_msg.mrs_shm_cookie));
828 	(void) dapl_os_memcpy((void *)&reg_msg.mrs_shm_cookie, (void *)cookie,
829 	    DAT_LMR_COOKIE_SIZE);
830 
831 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
832 	    "mr_register_shared: lmr 0x%p, pd_hkey 0x%016llx, "
833 	    "vaddr 0x%016llx, len %llu, flags 0x%x\n",
834 	    lmr, reg_msg.mrs_pd_hkey, reg_msg.mrs_vaddr, reg_msg.mrs_len,
835 	    reg_msg.mrs_flags);
836 
837 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
838 	    "mr_register_shared: cookie \n0x");
839 	for (i = 4; i >= 0; i--) {
840 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
841 		    "%016llx", reg_msg.mrs_shm_cookie.mc_uint_arr[i]);
842 	}
843 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "\n");
844 
845 	/* call into driver to allocate MR resource */
846 	retval = ioctl(ia_fd, DAPL_MR_REGISTER_SHARED, &reg_msg);
847 	if (retval != 0) {
848 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
849 		    "mr_register_shared: lmr 0x%p, failed (%s)\n",
850 		    lmr, strerror(errno));
851 		dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
852 		return (dapls_convert_error(errno, retval));
853 	}
854 	mr_handle->mr_hkey = reg_msg.mrs_hkey;
855 	lmr->param.lmr_context = (DAT_LMR_CONTEXT)reg_msg.mrs_lkey;
856 	lmr->param.rmr_context = (DAT_RMR_CONTEXT)reg_msg.mrs_rkey;
857 	lmr->param.registered_address = reg_msg.mrs_vaddr;
858 	lmr->param.registered_size = reg_msg.mrs_len;
859 	lmr->mr_handle = mr_handle;
860 
861 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
862 	    "mr_register_shared: successful, lmr 0x%p, mr_hkey 0x%016llx, "
863 	    "lmr_ctx 0x%08x\n\n", lmr, reg_msg.mrs_hkey,
864 	    reg_msg.mrs_lkey);
865 	return (DAT_SUCCESS);
866 }
867 
868 /*
869  * dapl_ib_mr_deregister
870  *
871  * Free a memory region
872  *
873  * Input:
874  *	lmr			pointer to dapl_lmr struct
875  *
876  * Output:
877  * 	none
878  *
879  * Returns:
880  * 	DAT_SUCCESS
881  *	DAT_INSUFFICIENT_RESOURCES
882  *
883  */
884 DAT_RETURN
dapls_ib_mr_deregister(IN DAPL_LMR * lmr)885 dapls_ib_mr_deregister(
886 	IN  DAPL_LMR		*lmr)
887 {
888 	dapl_mr_deregister_t args;
889 	int retval;
890 
891 	args.mrd_hkey = lmr->mr_handle->mr_hkey;
892 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
893 	    "mr_deregister: lmr 0x%p, hkey 0x%016llx, lmr_ctx 0x%08x\n"
894 	    "               vaddr 0x%016llx, len %llu, flags 0x%x\n",
895 	    lmr, args.mrd_hkey, lmr->param.lmr_context,
896 	    lmr->param.registered_address, lmr->param.registered_size,
897 	    dapl_lmr_convert_privileges(lmr->param.mem_priv) |
898 	    IBT_MR_ENABLE_WINDOW_BIND);
899 
900 	/* call into driver to do MR deregister */
901 	retval = ioctl(lmr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd,
902 	    DAPL_MR_DEREGISTER, &args);
903 
904 	if (retval != 0) {
905 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
906 		    "mr_deregister: lmr 0x%p, failed (%s)\n",
907 		    lmr, strerror(errno));
908 		return (dapls_convert_error(errno, retval));
909 	}
910 
911 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
912 	    "mr_deregister: successful\n\n");
913 	dapl_os_free(lmr->mr_handle, sizeof (struct dapls_ib_mr_handle));
914 	lmr->mr_handle = NULL;
915 	return (DAT_SUCCESS);
916 }
917 
918 
919 /*
920  * dapl_ib_mr_register_lmr
921  *
922  * Register a memory region based on attributes of an existing one
923  *
924  * Input:
925  *	ia_handle		IA handle
926  *	lmr			pointer to dapl_lmr struct
927  *	virt_addr		virtual address of beginning of mem region
928  *	length			length of memory region
929  *
930  * Output:
931  * 	none
932  *
933  * Returns:
934  * 	DAT_SUCCESS
935  *	DAT_INSUFFICIENT_RESOURCES
936  *
937  */
938 DAT_RETURN
dapls_ib_mr_register_lmr(IN DAPL_IA * ia,IN DAPL_LMR * lmr,IN DAT_MEM_PRIV_FLAGS privileges)939 dapls_ib_mr_register_lmr(
940 	IN  DAPL_IA 			*ia,
941 	IN  DAPL_LMR			*lmr,
942 	IN  DAT_MEM_PRIV_FLAGS		privileges)
943 {
944 	dapl_mr_register_lmr_t		regl_msg;
945 	DAPL_LMR			*orig_lmr;
946 	struct dapls_ib_mr_handle	*orig_mr_handle;
947 	ib_mr_handle_t			mr_handle;
948 	int				ia_fd;
949 	int				retval;
950 
951 	ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
952 	mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
953 	if (mr_handle == NULL) {
954 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
955 		    "mr_register_lmr: lmr 0x%p, ia 0x%p, "
956 		    "cannot alloc mr_handle\n", lmr, ia);
957 		return (DAT_INSUFFICIENT_RESOURCES);
958 	}
959 
960 	orig_lmr = (DAPL_LMR *)lmr->param.region_desc.for_lmr_handle;
961 	orig_mr_handle = (struct dapls_ib_mr_handle *)orig_lmr->mr_handle;
962 	regl_msg.mrl_orig_hkey = orig_mr_handle->mr_hkey;
963 	regl_msg.mrl_flags = (ibt_mr_flags_t)
964 	    dapl_lmr_convert_privileges(privileges);
965 	regl_msg.mrl_flags |= IBT_MR_ENABLE_WINDOW_BIND;
966 	regl_msg.mrl_lkey = regl_msg.mrl_rkey = 0;
967 
968 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
969 	    "mr_register_lmr: lmr 0x%p, hkey 0x%016llx, lmr_ctx 0x%08x\n"
970 	    "                 vaddr 0x%016llx, len %llu, flags 0x%x\n",
971 	    lmr, mr_handle->mr_hkey, lmr->param.lmr_context,
972 	    orig_lmr->param.registered_address,
973 	    orig_lmr->param.registered_size,
974 	    dapl_lmr_convert_privileges(orig_lmr->param.mem_priv) |
975 	    IBT_MR_ENABLE_WINDOW_BIND);
976 
977 
978 	/* call into driver to allocate MR resource */
979 	retval = ioctl(ia_fd, DAPL_MR_REGISTER_LMR, &regl_msg);
980 	if (retval != 0) {
981 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
982 		    "mr_register_lmr: failed (%s), orig_hkey (%016llx)\n",
983 		    strerror(errno), orig_mr_handle->mr_hkey);
984 		dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
985 		return (dapls_convert_error(errno, retval));
986 	}
987 
988 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
989 	    "mr_registered_lmr: successful, lmr 0x%p, hkey 0x%016llx\n",
990 	    lmr, regl_msg.mrl_hkey);
991 
992 	mr_handle->mr_hkey = regl_msg.mrl_hkey;
993 	lmr->param.lmr_context = (DAT_LMR_CONTEXT)regl_msg.mrl_lkey;
994 	lmr->param.rmr_context = (DAT_RMR_CONTEXT)regl_msg.mrl_rkey;
995 	lmr->param.registered_address = orig_lmr->param.registered_address;
996 	lmr->param.registered_size = orig_lmr->param.registered_size;
997 	lmr->mr_handle = mr_handle;
998 
999 	return (DAT_SUCCESS);
1000 }
1001 
1002 
1003 /*
1004  * dapls_ib_mw_alloc
1005  *
1006  * Bind a protection domain to a memory window
1007  *
1008  * Input:
1009  *	rmr			Initialized rmr to hold binding handles
1010  *
1011  * Output:
1012  * 	none
1013  *
1014  * Returns:
1015  * 	DAT_SUCCESS
1016  *	DAT_INSUFFICIENT_RESOURCES
1017  *
1018  */
1019 DAT_RETURN
dapls_ib_mw_alloc(IN DAPL_RMR * rmr)1020 dapls_ib_mw_alloc(
1021 	IN  DAPL_RMR	*rmr)
1022 {
1023 	DAPL_IA		*ia_hdl = (DAPL_IA *)rmr->param.ia_handle;
1024 	DAPL_PZ		*pz_hdl = rmr->param.pz_handle;
1025 	dapl_mw_alloc_t	args;
1026 	ib_mw_handle_t	mw_handle;
1027 	int		ia_fd;
1028 	int		retval;
1029 
1030 	ia_fd = ((struct dapls_ib_hca_handle *)(ia_hdl->hca_ptr->
1031 	    ib_hca_handle))->ia_fd;
1032 
1033 	mw_handle = dapl_os_alloc(sizeof (struct dapls_ib_mw_handle));
1034 	if (mw_handle == NULL) {
1035 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1036 		    "mw_alloc: rmr 0x%p, cannot alloc mw_handle\n", rmr);
1037 		return (DAT_INSUFFICIENT_RESOURCES);
1038 	}
1039 	args.mw_pd_hkey = ((struct dapls_ib_pd_handle *)
1040 	    (pz_hdl->pd_handle))->pd_hkey;
1041 
1042 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1043 	    "mw_alloc: rmr 0x%p, pd_hkey 0x%016llx\n",
1044 	    rmr, args.mw_pd_hkey);
1045 
1046 	retval = ioctl(ia_fd, DAPL_MW_ALLOC, &args);
1047 	if (retval != 0) {
1048 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1049 		    "mw_alloc: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
1050 		dapl_os_free(mw_handle, sizeof (struct dapls_ib_mr_handle));
1051 		return (dapls_convert_error(errno, retval));
1052 	}
1053 
1054 	mw_handle->mw_hkey = args.mw_hkey;
1055 	rmr->mw_handle = mw_handle;
1056 	rmr->param.rmr_context = (DAT_RMR_CONTEXT) args.mw_rkey;
1057 
1058 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1059 	    "mw_alloc: successful, rmr 0x%p, mw_hkey 0x%llx, "
1060 	    "rmr_ctx 0x%x\n\n", rmr, (uint64_t)args.mw_hkey,
1061 	    rmr->param.rmr_context);
1062 
1063 	return (DAT_SUCCESS);
1064 }
1065 
1066 
1067 /*
1068  * dapls_ib_mw_free
1069  *
1070  * Release bindings of a protection domain to a memory window
1071  *
1072  * Input:
1073  *	rmr			Initialized rmr to hold binding handles
1074  *
1075  * Output:
1076  * 	none
1077  *
1078  * Returns:
1079  * 	DAT_SUCCESS
1080  *	DAT_INSUFFICIENT_RESOURCES
1081  *
1082  */
1083 DAT_RETURN
dapls_ib_mw_free(IN DAPL_RMR * rmr)1084 dapls_ib_mw_free(
1085 	IN  DAPL_RMR	*rmr)
1086 {
1087 	DAPL_IA		*ia_hdl = rmr->param.ia_handle;
1088 	dapl_mw_free_t	args;
1089 	int		ia_fd;
1090 	int		retval;
1091 
1092 	ia_fd = ((struct dapls_ib_hca_handle *)(ia_hdl->hca_ptr->
1093 	    ib_hca_handle))->ia_fd;
1094 
1095 	args.mw_hkey = rmr->mw_handle->mw_hkey;
1096 
1097 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1098 	    "mw_free: rmr 0x%p, mw_hkey 0x%016llx\n", rmr, args.mw_hkey);
1099 
1100 	retval = ioctl(ia_fd, DAPL_MW_FREE, &args);
1101 	if (retval != 0) {
1102 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1103 		    "mw_free: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
1104 		return (dapls_convert_error(errno, retval));
1105 	}
1106 
1107 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "mw_free: successful\n\n");
1108 	dapl_os_free(rmr->mw_handle, sizeof (struct dapls_ib_mr_handle));
1109 	rmr->mw_handle = NULL;
1110 
1111 	return (DAT_SUCCESS);
1112 }
1113 
1114 /*
1115  * dapls_ib_mw_bind
1116  *
1117  * Bind a protection domain to a memory window
1118  *
1119  * Input:
1120  *	rmr			Initialized rmr to hold binding handles
1121  *
1122  * Output:
1123  * 	none
1124  *
1125  * Returns:
1126  * 	DAT_SUCCESS
1127  *	DAT_INSUFFICIENT_RESOURCES
1128  *
1129  */
1130 DAT_RETURN
dapls_ib_mw_bind(IN DAPL_RMR * rmr,IN DAT_LMR_CONTEXT lmr_context,IN DAPL_EP * ep,IN DAPL_COOKIE * cookie,IN DAT_VADDR virtual_address,IN DAT_VLEN length,IN DAT_MEM_PRIV_FLAGS mem_priv,IN DAT_COMPLETION_FLAGS completion_flags)1131 dapls_ib_mw_bind(
1132 	IN  DAPL_RMR		*rmr,
1133 	IN  DAT_LMR_CONTEXT	lmr_context,
1134 	IN  DAPL_EP		*ep,
1135 	IN  DAPL_COOKIE		*cookie,
1136 	IN  DAT_VADDR		virtual_address,
1137 	IN  DAT_VLEN		length,
1138 	IN  DAT_MEM_PRIV_FLAGS	mem_priv,
1139 	IN  DAT_COMPLETION_FLAGS completion_flags)
1140 {
1141 	ibt_send_wr_t	wre;
1142 	ibt_wr_bind_t	wrbind;
1143 	boolean_t	suppress_notification;
1144 	int		retval;
1145 
1146 	if (length > 0) {
1147 		wrbind.bind_flags = (ibt_bind_flags_t)
1148 		    (dapl_rmr_convert_privileges(mem_priv) |
1149 		    IBT_WR_BIND_ATOMIC);
1150 	} else {
1151 		wrbind.bind_flags = (ibt_bind_flags_t)NULL;
1152 	}
1153 	wrbind.bind_rkey = rmr->param.rmr_context;
1154 	wrbind.bind_va = virtual_address;
1155 	wrbind.bind_len = length;
1156 	wrbind.bind_lkey = lmr_context;
1157 
1158 	wre.wr_id = (ibt_wrid_t)(uintptr_t)cookie;
1159 	/*
1160 	 * wre.wr_flags = (is_signaled) ? IBT_WR_SEND_SIGNAL :
1161 	 *   IBT_WR_NO_FLAGS;
1162 	 * Till we fix the chan alloc flags do the following -
1163 	 */
1164 	/* Translate dapl flags */
1165 	wre.wr_flags = (DAT_COMPLETION_BARRIER_FENCE_FLAG &
1166 	    completion_flags) ? IBT_WR_SEND_FENCE : 0;
1167 	/* suppress completions */
1168 	wre.wr_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
1169 	    completion_flags) ? 0 : IBT_WR_SEND_SIGNAL;
1170 
1171 	wre.wr_trans = IBT_RC_SRV;
1172 	wre.wr_opcode = IBT_WRC_BIND;
1173 	wre.wr_nds = 0;
1174 	wre.wr_sgl = NULL;
1175 	wre.wr.rc.rcwr.bind = &wrbind;
1176 
1177 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1178 	    "mw_bind: rmr 0x%p, wr_flags 0x%x, rkey 0x%x, bind_flags 0x%x\n"
1179 	    "         bind_va 0x%llx, bind_len 0x%llx, mem_priv 0x%x\n",
1180 	    rmr, wre.wr_flags, wrbind.bind_rkey, wrbind.bind_flags,
1181 	    wrbind.bind_va, wrbind.bind_len, mem_priv);
1182 
1183 	if (ep->param.ep_attr.recv_completion_flags &
1184 	    DAT_COMPLETION_UNSIGNALLED_FLAG) {
1185 		/* This flag is used to control notification of completions */
1186 		suppress_notification = (completion_flags &
1187 		    DAT_COMPLETION_UNSIGNALLED_FLAG) ? B_TRUE : B_FALSE;
1188 	} else {
1189 		/*
1190 		 * The evd waiter will use threshold to control wakeups
1191 		 * Hence the event notification will be done via arming the
1192 		 * CQ so we do not need special notification generation
1193 		 * hence set suppression to true
1194 		 */
1195 		suppress_notification = B_TRUE;
1196 	}
1197 
1198 	retval = DAPL_SEND(ep)(ep, &wre, suppress_notification);
1199 
1200 	if (retval != 0) {
1201 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1202 		    "mw_bind: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
1203 		return (dapls_convert_error(errno, retval));
1204 	}
1205 
1206 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1207 	    "mw_bind: new_rkey = 0x%x\n", wrbind.bind_rkey_out);
1208 	rmr->param.rmr_context = (DAT_RMR_CONTEXT) wrbind.bind_rkey_out;
1209 
1210 	return (DAT_SUCCESS);
1211 }
1212 
1213 /*
1214  * dapls_ib_mw_unbind
1215  *
1216  * Unbind a protection domain from a memory window
1217  *
1218  * Input:
1219  *	rmr			Initialized rmr to hold binding handles
1220  *
1221  * Output:
1222  *	none
1223  *
1224  * Returns:
1225  *	DAT_SUCCESS
1226  *	DAT_INSUFFICIENT_RESOURCES
1227  *
1228  */
1229 DAT_RETURN
dapls_ib_mw_unbind(IN DAPL_RMR * rmr,IN DAT_LMR_CONTEXT lmr_context,IN DAPL_EP * ep,IN DAPL_COOKIE * cookie,IN DAT_COMPLETION_FLAGS completion_flags)1230 dapls_ib_mw_unbind(
1231 	IN  DAPL_RMR		*rmr,
1232 	IN  DAT_LMR_CONTEXT	lmr_context,
1233 	IN  DAPL_EP		*ep,
1234 	IN  DAPL_COOKIE		*cookie,
1235 	IN  DAT_COMPLETION_FLAGS completion_flags)
1236 {
1237 	DAT_RETURN retval;
1238 
1239 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1240 	    "mw_unbind: rmr 0x%p, enter\n", rmr);
1241 
1242 	retval = dapls_ib_mw_bind(rmr, lmr_context, ep, cookie,
1243 	    (DAT_VADDR)0, (DAT_VLEN)0, (DAT_MEM_PRIV_FLAGS)NULL,
1244 	    completion_flags);
1245 
1246 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1247 	    "mw_unbind: rmr 0x%p, exit\n\n", rmr);
1248 
1249 	return (retval);
1250 }
1251 
1252 /*
1253  * Processes async events and calls appropriate callbacks so that events
1254  * can be posted to the async evd.
1255  */
1256 void
dapls_ib_async_callback(IN DAPL_EVD * async_evd,IN ib_hca_handle_t hca_handle,IN ib_error_record_t * event_ptr,IN void * context)1257 dapls_ib_async_callback(
1258 	IN    DAPL_EVD		  *async_evd,
1259 	IN    ib_hca_handle_t	  hca_handle,
1260 	IN    ib_error_record_t	  *event_ptr,
1261 	IN    void		  *context)
1262 {
1263 	DAPL_IA			*ia_ptr;
1264 	DAPL_EP			*ep_ptr;
1265 	DAPL_EVD		*evd_ptr;
1266 	dapl_ib_async_event_t	*async_evp;
1267 
1268 	ia_ptr = (DAPL_IA *)context;
1269 
1270 	dapl_os_assert(event_ptr != NULL);
1271 	async_evp = (dapl_ib_async_event_t *)event_ptr;
1272 
1273 	switch (async_evp->ibae_type) {
1274 	case IBT_ERROR_INVALID_REQUEST_CHAN:
1275 	case IBT_ERROR_CATASTROPHIC_CHAN:
1276 		/*
1277 		 * Walk the EPs to match this EP, then invoke the
1278 		 * routine when we have the EP we need
1279 		 */
1280 		dapl_os_assert(!dapl_llist_is_empty(&ia_ptr->ep_list_head));
1281 		dapl_os_lock(&ia_ptr->header.lock);
1282 
1283 		ep_ptr = (DAPL_EP *)dapl_llist_next_entry(&ia_ptr->ep_list_head,
1284 		    NULL);
1285 		while (ep_ptr != NULL) {
1286 			if (ep_ptr ==
1287 			    (DAPL_EP *)(uintptr_t)async_evp->ibae_cookie) {
1288 				break;
1289 			}
1290 
1291 			ep_ptr = (DAPL_EP *) dapl_llist_next_entry(
1292 			    &ia_ptr->ep_list_head,
1293 			    &ep_ptr->header.ia_list_entry);
1294 		}
1295 
1296 		dapl_os_unlock(&ia_ptr->header.lock);
1297 		dapl_os_assert(ep_ptr != NULL);
1298 		dapl_evd_qp_async_error_callback(hca_handle, NULL, event_ptr,
1299 		    (void *)ep_ptr);
1300 		break;
1301 	case IBT_ERROR_CQ:
1302 		/*
1303 		 * Walk the EVDs to match this EVD, then invoke the
1304 		 * routine when we have the EVD we need
1305 		 */
1306 		dapl_os_assert(!dapl_llist_is_empty(&ia_ptr->evd_list_head));
1307 		dapl_os_lock(&ia_ptr->header.lock);
1308 
1309 		evd_ptr = (DAPL_EVD *) dapl_llist_next_entry(
1310 		    &ia_ptr->evd_list_head, NULL);
1311 		while (evd_ptr != NULL) {
1312 			if (evd_ptr ==
1313 			    (DAPL_EVD *)(uintptr_t)async_evp->ibae_cookie) {
1314 				break;
1315 			}
1316 			evd_ptr = (DAPL_EVD *)
1317 			    dapl_llist_next_entry(&ia_ptr->evd_list_head,
1318 			    &evd_ptr->header.ia_list_entry);
1319 		}
1320 		dapl_os_unlock(&ia_ptr->header.lock);
1321 		dapl_os_assert(evd_ptr != NULL);
1322 		dapl_evd_cq_async_error_callback(hca_handle, NULL, event_ptr,
1323 		    (void *)evd_ptr);
1324 		break;
1325 	case IBT_ERROR_PORT_DOWN:
1326 	case IBT_ERROR_LOCAL_CATASTROPHIC:
1327 		dapl_evd_un_async_error_callback(hca_handle, event_ptr,
1328 		    (void *)async_evd);
1329 		break;
1330 	default:
1331 		/*
1332 		 * We are not interested in the following events
1333 		 * case IBT_EVENT_PATH_MIGRATED:
1334 		 * case IBT_EVENT_COM_EST:
1335 		 * case IBT_EVENT_SQD:
1336 		 * case IBT_ERROR_PATH_MIGRATE_REQ:
1337 		 * case IBT_EVENT_PORT_UP:
1338 		 */
1339 		dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1340 		    "dapls_ib_async_callback: unhandled async code:%x\n",
1341 		    async_evp->ibae_type);
1342 		break;
1343 	}
1344 }
1345 
1346 /*
1347  * dapls_ib_setup_async_callback
1348  * The reference implementation calls this to register callbacks,
1349  * but since our model of polling for events is based on retrieving
1350  * events by the waiting thread itself this is a NOOP for us.
1351  */
1352 /* ARGSUSED */
1353 DAT_RETURN
dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,IN DAPL_ASYNC_HANDLER_TYPE handler_type,IN unsigned int * callback_handle,IN ib_async_handler_t callback,IN void * context)1354 dapls_ib_setup_async_callback(
1355 	IN  DAPL_IA			*ia_ptr,
1356 	IN  DAPL_ASYNC_HANDLER_TYPE	handler_type,
1357 	IN  unsigned int		*callback_handle,
1358 	IN  ib_async_handler_t		callback,
1359 	IN  void			*context)
1360 {
1361 	return (DAT_SUCCESS);
1362 }
1363 
1364 /*
1365  * dapls_ib_query_hca
1366  *
1367  * Set up an asynchronous callbacks of various kinds
1368  *
1369  * Input:
1370  *	hca_handl		hca handle
1371  *	ep_attr			attribute of the ep
1372  *
1373  * Output:
1374  * 	none
1375  *
1376  * Returns:
1377  * 	DAT_SUCCESS
1378  *	DAT_INVALID_PARAMETER
1379  */
1380 
1381 /* these are just arbitrary values for now */
1382 
1383 static DAT_RETURN
dapls_ib_query_ia(IN dapl_hca_attr_t * hca_attr,IN DAPL_HCA * hca_ptr,OUT DAT_IA_ATTR * ia_attr)1384 dapls_ib_query_ia(
1385 	IN  dapl_hca_attr_t		*hca_attr,
1386 	IN  DAPL_HCA			*hca_ptr,
1387 	OUT DAT_IA_ATTR			*ia_attr)
1388 {
1389 	(void) dapl_os_memzero(ia_attr, sizeof (*ia_attr));
1390 
1391 	(void) dapl_os_strcpy(ia_attr->adapter_name, VN_ADAPTER_NAME);
1392 
1393 	(void) sprintf(ia_attr->vendor_name, "0x%08x:0x%08x",
1394 	    hca_attr->dhca_vendor_id, hca_attr->dhca_device_id);
1395 
1396 	ia_attr->hardware_version_major = (DAT_UINT32)hca_attr->dhca_version_id;
1397 	ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
1398 	ia_attr->max_eps = (DAT_COUNT)hca_attr->dhca_max_chans;
1399 	ia_attr->max_dto_per_ep = (DAT_COUNT)hca_attr->dhca_max_chan_sz;
1400 	ia_attr->max_rdma_read_per_ep_in = hca_attr->dhca_max_rdma_in_chan;
1401 	ia_attr->max_rdma_read_per_ep_out = hca_attr->dhca_max_rdma_out_chan;
1402 	ia_attr->max_evds = (DAT_COUNT)hca_attr->dhca_max_cq;
1403 	ia_attr->max_evd_qlen = (DAT_COUNT)hca_attr->dhca_max_cq_sz;
1404 	/* max_iov_segments_per_dto is for non-RDMA */
1405 	ia_attr->max_iov_segments_per_dto = (DAT_COUNT)hca_attr->dhca_max_sgl;
1406 	ia_attr->max_lmrs = (DAT_COUNT)hca_attr->dhca_max_memr;
1407 	ia_attr->max_lmr_block_size = (DAT_VLEN)hca_attr->dhca_max_memr_len;
1408 	ia_attr->max_lmr_virtual_address = (DAT_VADDR)DAPL_MAX_ADDRESS;
1409 	ia_attr->max_pzs = (DAT_COUNT)hca_attr->dhca_max_pd;
1410 	ia_attr->max_mtu_size = (DAT_VLEN)DAPL_IB_MAX_MESSAGE_SIZE;
1411 	ia_attr->max_rdma_size = (DAT_VLEN)DAPL_IB_MAX_MESSAGE_SIZE;
1412 	ia_attr->max_rmrs = (DAT_COUNT)hca_attr->dhca_max_mem_win;
1413 	ia_attr->max_rmr_target_address = (DAT_VADDR)DAPL_MAX_ADDRESS;
1414 	ia_attr->max_iov_segments_per_rdma_read =
1415 	    (DAT_COUNT)hca_attr->dhca_max_sgl;
1416 	ia_attr->max_iov_segments_per_rdma_write =
1417 	    (DAT_COUNT)hca_attr->dhca_max_sgl;
1418 	/* all instances of IA */
1419 	ia_attr->max_rdma_read_in = hca_attr->dhca_max_rdma_in_chan *
1420 	    hca_attr->dhca_max_chans;
1421 	ia_attr->max_rdma_read_out = hca_attr->dhca_max_rdma_out_chan *
1422 	    hca_attr->dhca_max_chans;
1423 	ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
1424 	ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
1425 	ia_attr->max_srqs = (DAT_COUNT)hca_attr->dhca_max_srqs;
1426 	ia_attr->max_ep_per_srq = ia_attr->max_eps;
1427 	ia_attr->max_recv_per_srq = (DAT_COUNT)hca_attr->dhca_max_srqs_sz;
1428 
1429 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "IA Attributes:\n"
1430 	    "\tadapter_name %s\n "
1431 	    "\tvendor_name %s\n "
1432 	    "\thardware_version_major 0x%08x\n"
1433 	    "\tmax_eps %d\n"
1434 	    "\tmax_dto_per_ep %d\n"
1435 	    "\tmax_rdma_read_per_ep_in %d\n"
1436 	    "\tmax_rdma_read_per_ep_out %d\n"
1437 	    "\tmax_evds %d\n"
1438 	    "\tmax_evd_qlen %d\n"
1439 	    "\tmax_iov_segments_per_dto %d\n"
1440 	    "\tmax_lmrs %d\n"
1441 	    "\tmax_lmr_block_size 0x%016llx\n"
1442 	    "\tmax_lmr_virtual_address 0x%016llx\n"
1443 	    "\tmax_pzs %d\n"
1444 	    "\tmax_mtu_size 0x%016llx\n"
1445 	    "\tmax_rdma_size 0x%016llx\n"
1446 	    "\tmax_rmrs %d\n"
1447 	    "\tmax_rmr_target_address 0x%016llx\n"
1448 	    "\tmax_iov_segments_per_rdma_read %d\n"
1449 	    "\tmax_iov_segments_per_rdma_write %d\n"
1450 	    "\tmax_rdma_read_in %d\n"
1451 	    "\tmax_rdma_read_out %d\n"
1452 	    "\tmax_srqs %d\n"
1453 	    "\tmax_ep_per_srq %d\n"
1454 	    "\tmax_recv_per_srq %d\n"
1455 	    "\n",
1456 	    ia_attr->adapter_name,
1457 	    ia_attr->vendor_name,
1458 	    ia_attr->hardware_version_major,
1459 	    ia_attr->max_eps,
1460 	    ia_attr->max_dto_per_ep,
1461 	    ia_attr->max_rdma_read_per_ep_in,
1462 	    ia_attr->max_rdma_read_per_ep_out,
1463 	    ia_attr->max_evds,
1464 	    ia_attr->max_evd_qlen,
1465 	    ia_attr->max_iov_segments_per_dto,
1466 	    ia_attr->max_lmrs,
1467 	    ia_attr->max_lmr_block_size,
1468 	    ia_attr->max_lmr_virtual_address,
1469 	    ia_attr->max_pzs,
1470 	    ia_attr->max_mtu_size,
1471 	    ia_attr->max_rdma_size,
1472 	    ia_attr->max_rmrs,
1473 	    ia_attr->max_rmr_target_address,
1474 	    ia_attr->max_iov_segments_per_rdma_read,
1475 	    ia_attr->max_iov_segments_per_rdma_write,
1476 	    ia_attr->max_rdma_read_in,
1477 	    ia_attr->max_rdma_read_out,
1478 	    ia_attr->max_srqs,
1479 	    ia_attr->max_ep_per_srq,
1480 	    ia_attr->max_recv_per_srq);
1481 
1482 	return (DAT_SUCCESS);
1483 }
1484 
1485 /* ARGSUSED */
1486 static DAT_RETURN
dapls_ib_query_ep(IN dapl_hca_attr_t * hca_attr,IN DAPL_HCA * hca_ptr,OUT DAT_EP_ATTR * ep_attr)1487 dapls_ib_query_ep(
1488 	IN  dapl_hca_attr_t		*hca_attr,
1489 	IN  DAPL_HCA			*hca_ptr,
1490 	OUT DAT_EP_ATTR			*ep_attr)
1491 {
1492 	(void) dapl_os_memzero(ep_attr, sizeof (*ep_attr));
1493 	ep_attr->service_type = DAT_SERVICE_TYPE_RC;
1494 	ep_attr->max_mtu_size = DAPL_IB_MAX_MESSAGE_SIZE;
1495 	ep_attr->max_rdma_size = DAPL_IB_MAX_MESSAGE_SIZE;
1496 	ep_attr->qos = DAT_QOS_BEST_EFFORT;
1497 	ep_attr->max_recv_dtos = hca_attr->dhca_max_chan_sz;
1498 	ep_attr->max_request_dtos = hca_attr->dhca_max_chan_sz;
1499 	ep_attr->max_recv_iov = hca_attr->dhca_max_sgl;
1500 	ep_attr->max_request_iov = hca_attr->dhca_max_sgl;
1501 	ep_attr->request_completion_flags = DAT_COMPLETION_DEFAULT_FLAG;
1502 	ep_attr->recv_completion_flags = DAT_COMPLETION_DEFAULT_FLAG;
1503 	ep_attr->srq_soft_hw = DAT_HW_DEFAULT;
1504 	return (DAT_SUCCESS);
1505 }
1506 
1507 static void
dapls_ib_query_srq(IN dapl_hca_attr_t * hca_attr,OUT DAT_SRQ_ATTR * srq_attr)1508 dapls_ib_query_srq(
1509 	IN  dapl_hca_attr_t		*hca_attr,
1510 	OUT DAT_SRQ_ATTR		*srq_attr)
1511 {
1512 	(void) dapl_os_memzero(srq_attr, sizeof (*srq_attr));
1513 	srq_attr->max_recv_dtos = hca_attr->dhca_max_srqs_sz;
1514 	srq_attr->max_recv_iov = hca_attr->dhca_max_srq_sgl;
1515 	srq_attr->low_watermark = DAT_SRQ_LW_DEFAULT;
1516 }
1517 
1518 /* ARGSUSED */
1519 DAT_RETURN
dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,OUT DAT_IA_ATTR * ia_attr,OUT DAT_EP_ATTR * ep_attr,OUT DAT_SOCK_ADDR6 * ip_addr,OUT DAT_SRQ_ATTR * srq_attr)1520 dapls_ib_query_hca(
1521 	IN  DAPL_HCA			*hca_ptr,
1522 	OUT DAT_IA_ATTR			*ia_attr,
1523 	OUT DAT_EP_ATTR			*ep_attr,
1524 	OUT DAT_SOCK_ADDR6		*ip_addr,
1525 	OUT DAT_SRQ_ATTR		*srq_attr)
1526 {
1527 	dapl_ia_query_t args;
1528 	int ia_fd, retval;
1529 
1530 	if (hca_ptr == NULL) {
1531 		return (DAT_INVALID_PARAMETER);
1532 	}
1533 
1534 	ia_fd = hca_ptr->ib_hca_handle->ia_fd;
1535 	retval = ioctl(ia_fd, DAPL_IA_QUERY, &args);
1536 	if (retval != 0) {
1537 		return (dapls_convert_error(errno, retval));
1538 	}
1539 
1540 	if (ia_attr != NULL) {
1541 		(void) dapls_ib_query_ia(&args.hca_attr, hca_ptr, ia_attr);
1542 	}
1543 	if (ep_attr != NULL) {
1544 		(void) dapls_ib_query_ep(&args.hca_attr, hca_ptr, ep_attr);
1545 	}
1546 	if (srq_attr != NULL) {
1547 		(void) dapls_ib_query_srq(&args.hca_attr, srq_attr);
1548 	}
1549 	if (ia_attr == NULL && ep_attr == NULL && srq_attr == NULL) {
1550 		return (DAT_INVALID_PARAMETER);
1551 	}
1552 	return (DAT_SUCCESS);
1553 }
1554 
1555 void
dapls_ib_store_premature_events(IN ib_qp_handle_t qp_ptr,IN ib_work_completion_t * cqe_ptr)1556 dapls_ib_store_premature_events(
1557 	IN ib_qp_handle_t	qp_ptr,
1558 	IN ib_work_completion_t	*cqe_ptr)
1559 {
1560 	ib_srq_handle_t	srqp;
1561 	int		head;
1562 
1563 	if (qp_ptr->qp_srq_enabled) {
1564 		/*
1565 		 * For QPs with SRQ attached store the premature event in the
1566 		 * SRQ's premature event list
1567 		 */
1568 		srqp = qp_ptr->qp_srq;
1569 		dapl_os_assert(srqp->srq_freepr_num_events > 0);
1570 		head = srqp->srq_freepr_events[srqp->srq_freepr_head];
1571 		/*
1572 		 * mark cqe as valid before storing it in the
1573 		 * premature events list
1574 		 */
1575 		DAPL_SET_CQE_VALID(cqe_ptr);
1576 		(void) dapl_os_memcpy(&(srqp->srq_premature_events[head]),
1577 		    cqe_ptr, sizeof (*cqe_ptr));
1578 		srqp->srq_freepr_head = (srqp->srq_freepr_head + 1) %
1579 		    srqp->srq_wq_numwqe;
1580 		srqp->srq_freepr_num_events--;
1581 	} else {
1582 		(void) dapl_os_memcpy(&(qp_ptr->qp_premature_events[
1583 		    qp_ptr->qp_num_premature_events]),
1584 		    cqe_ptr, sizeof (*cqe_ptr));
1585 	}
1586 	qp_ptr->qp_num_premature_events++;
1587 }
1588 
1589 void
dapls_ib_poll_premature_events(IN DAPL_EP * ep_ptr,OUT ib_work_completion_t ** cqe_ptr,OUT int * nevents)1590 dapls_ib_poll_premature_events(
1591 	IN  DAPL_EP			*ep_ptr,
1592 	OUT ib_work_completion_t	**cqe_ptr,
1593 	OUT int				*nevents)
1594 {
1595 	ib_qp_handle_t qp = ep_ptr->qp_handle;
1596 
1597 	if (qp->qp_srq_enabled) {
1598 		*cqe_ptr = qp->qp_srq->srq_premature_events;
1599 	} else {
1600 		*cqe_ptr = qp->qp_premature_events;
1601 	}
1602 
1603 	*nevents = qp->qp_num_premature_events;
1604 	qp->qp_num_premature_events = 0;
1605 }
1606 
1607 /*
1608  * Return the premature events to the free list after processing it
1609  * This function is called only for premature events on the SRQ
1610  */
1611 void
dapls_ib_free_premature_events(IN DAPL_EP * ep_ptr,IN int free_index)1612 dapls_ib_free_premature_events(
1613 	IN  DAPL_EP	*ep_ptr,
1614 	IN  int		free_index)
1615 {
1616 	ib_qp_handle_t	qp_ptr;
1617 	ib_srq_handle_t	srq_ptr;
1618 	int		tail;
1619 
1620 	qp_ptr = ep_ptr->qp_handle;
1621 	srq_ptr = qp_ptr->qp_srq;
1622 
1623 	dapl_os_assert(qp_ptr->qp_srq_enabled);
1624 
1625 	tail = srq_ptr->srq_freepr_tail;
1626 	srq_ptr->srq_freepr_events[tail] = free_index;
1627 	srq_ptr->srq_freepr_tail = (tail + 1) % srq_ptr->srq_wq_numwqe;
1628 	srq_ptr->srq_freepr_num_events++;
1629 	DAPL_SET_CQE_INVALID(&srq_ptr->srq_premature_events[free_index]);
1630 }
1631 
1632 /*
1633  * dapls_ib_get_async_event
1634  *
1635  * Translate an asynchronous event type to the DAT event.
1636  * Note that different providers have different sets of errors.
1637  *
1638  * Input:
1639  *	cause_ptr		provider event cause
1640  *
1641  * Output:
1642  *	async_event		DAT mapping of error
1643  *
1644  * Returns:
1645  *	DAT_SUCCESS
1646  *	DAT_NOT_IMPLEMENTED	Caller is not interested this event
1647  */
1648 
dapls_ib_get_async_event(IN ib_error_record_t * cause_ptr,OUT DAT_EVENT_NUMBER * async_event)1649 DAT_RETURN dapls_ib_get_async_event(
1650 	IN  ib_error_record_t		*cause_ptr,
1651 	OUT DAT_EVENT_NUMBER		*async_event)
1652 {
1653 	ibt_async_code_t	code;
1654 	DAT_RETURN		dat_status;
1655 
1656 	dat_status = DAT_SUCCESS;
1657 	code = (ibt_async_code_t)((dapl_ib_async_event_t *)cause_ptr->
1658 	    ibae_type);
1659 
1660 	switch (code) {
1661 	case IBT_ERROR_CQ:
1662 	case IBT_ERROR_ACCESS_VIOLATION_CHAN:
1663 	case IBT_ERROR_INVALID_REQUEST_CHAN:
1664 		*async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
1665 		break;
1666 	/* CATASTROPHIC errors */
1667 	case IBT_ERROR_CATASTROPHIC_CHAN:
1668 	case IBT_ERROR_LOCAL_CATASTROPHIC:
1669 	case IBT_ERROR_PORT_DOWN:
1670 		*async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
1671 		break;
1672 	default:
1673 		/*
1674 		 * Errors we are not interested in reporting:
1675 		 * IBT_EVENT_PATH_MIGRATED
1676 		 * IBT_ERROR_PATH_MIGRATE_REQ
1677 		 * IBT_EVENT_COM_EST
1678 		 * IBT_EVENT_SQD
1679 		 * IBT_EVENT_PORT_UP
1680 		 */
1681 		dat_status = DAT_NOT_IMPLEMENTED;
1682 	}
1683 	return (dat_status);
1684 }
1685 
1686 DAT_RETURN
dapls_ib_event_poll(IN DAPL_EVD * evd_ptr,IN uint64_t timeout,IN uint_t threshold,OUT dapl_ib_event_t * evp_ptr,OUT int * num_events)1687 dapls_ib_event_poll(
1688 	IN DAPL_EVD		*evd_ptr,
1689 	IN uint64_t		timeout,
1690 	IN uint_t		threshold,
1691 	OUT dapl_ib_event_t	*evp_ptr,
1692 	OUT int			*num_events)
1693 {
1694 	dapl_event_poll_t	evp_msg;
1695 	int			ia_fd;
1696 	int			retval;
1697 
1698 	*num_events = 0;
1699 	ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;
1700 
1701 	evp_msg.evp_evd_hkey = evd_ptr->ib_cq_handle->evd_hkey;
1702 	evp_msg.evp_threshold = threshold;
1703 	evp_msg.evp_timeout = timeout;
1704 	evp_msg.evp_ep = evp_ptr;
1705 	if (evp_ptr) {
1706 		evp_msg.evp_num_ev =
1707 		    DAPL_MAX(evd_ptr->threshold, NUM_EVENTS_PER_POLL);
1708 	} else {
1709 		evp_msg.evp_num_ev = 0;
1710 	}
1711 	evp_msg.evp_num_polled = 0;
1712 
1713 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1714 	    "event_poll: evd 0x%p, hkey 0x%llx, threshold %d,\n"
1715 	    "            timeout 0x%llx, evp_ptr 0x%p, num_ev %d\n",
1716 	    evd_ptr, evp_msg.evp_evd_hkey, evp_msg.evp_threshold,
1717 	    timeout, evp_ptr, evp_msg.evp_num_ev);
1718 
1719 	/*
1720 	 * Poll the EVD and if there are no events then we wait in
1721 	 * the kernel.
1722 	 */
1723 	retval = ioctl(ia_fd, DAPL_EVENT_POLL, &evp_msg);
1724 	if (retval != 0) {
1725 		dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1726 		    "event_poll: evd 0x%p, retval %d err: %s\n",
1727 		    evd_ptr, retval, strerror(errno));
1728 		*num_events = evp_msg.evp_num_polled;
1729 		return (dapls_convert_error(errno, retval));
1730 	}
1731 
1732 	dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1733 	    "dapls_ib_event_poll: evd %p nevents %d\n", evd_ptr,
1734 	    evp_msg.evp_num_polled);
1735 
1736 	*num_events = evp_msg.evp_num_polled;
1737 
1738 	return (DAT_SUCCESS);
1739 }
1740 
1741 DAT_RETURN
dapls_ib_event_wakeup(IN DAPL_EVD * evd_ptr)1742 dapls_ib_event_wakeup(
1743 	IN DAPL_EVD		*evd_ptr)
1744 {
1745 	dapl_event_wakeup_t	evw_msg;
1746 	int			ia_fd;
1747 	int			retval;
1748 
1749 	ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;
1750 
1751 	evw_msg.evw_hkey = evd_ptr->ib_cq_handle->evd_hkey;
1752 
1753 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1754 	    "event_wakeup: evd 0x%p, hkey 0x%llx\n",
1755 	    evd_ptr, evw_msg.evw_hkey);
1756 
1757 	/*
1758 	 * Wakeup any thread waiting in the kernel on this EVD
1759 	 */
1760 	retval = ioctl(ia_fd, DAPL_EVENT_WAKEUP, &evw_msg);
1761 	if (retval != 0) {
1762 		dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1763 		    "event_wakeup: evd 0x%p, retval %d err: %s\n",
1764 		    evd_ptr, retval, strerror(errno));
1765 		return (dapls_convert_error(errno, retval));
1766 	}
1767 
1768 	return (DAT_SUCCESS);
1769 }
1770 
1771 /*
1772  * dapls_ib_cq_peek is used by dapl_cno_wait(). After the CQ has been
1773  * inspected we arm the CQ if it was empty.
1774  *
1775  */
dapls_ib_cq_peek(IN DAPL_EVD * evd_ptr,OUT int * num_cqe)1776 void dapls_ib_cq_peek(
1777 	IN DAPL_EVD	*evd_ptr,
1778 	OUT int		*num_cqe)
1779 {
1780 	DAPL_IA		*ia_ptr;
1781 
1782 	*num_cqe = 0;
1783 	if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
1784 		DAPL_PEEK(evd_ptr)(evd_ptr->ib_cq_handle, num_cqe);
1785 		/* No events found in CQ arm it now */
1786 		if (*num_cqe == 0) {
1787 			ia_ptr = evd_ptr->header.owner_ia;
1788 			(void) dapls_set_cq_notify(ia_ptr, evd_ptr);
1789 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1790 			    "dapls_ib_cq_peek: set_cq_notify\n");
1791 		}
1792 	}
1793 }
1794 
1795 /*
1796  * Modifies the CNO associated to an EVD
1797  */
dapls_ib_modify_cno(IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr)1798 DAT_RETURN dapls_ib_modify_cno(
1799 	IN DAPL_EVD	*evd_ptr,
1800 	IN DAPL_CNO	*cno_ptr)
1801 {
1802 	dapl_evd_modify_cno_t	evmc_msg;
1803 	int			ia_fd;
1804 	int			retval;
1805 
1806 	ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;
1807 
1808 	evmc_msg.evmc_hkey = evd_ptr->ib_cq_handle->evd_hkey;
1809 
1810 	if (cno_ptr) {
1811 		evmc_msg.evmc_cno_hkey = (uint64_t)cno_ptr->ib_cno_handle;
1812 	} else {
1813 		evmc_msg.evmc_cno_hkey = 0;
1814 	}
1815 
1816 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1817 	    "modify_cno: evd 0x%p, hkey 0x%llx, cno 0x%p, cno_hkey 0x%llx\n",
1818 	    evd_ptr, evmc_msg.evmc_hkey, cno_ptr, evmc_msg.evmc_cno_hkey);
1819 
1820 	/*
1821 	 * modify CNO associated with the EVD
1822 	 */
1823 	retval = ioctl(ia_fd, DAPL_EVD_MODIFY_CNO, &evmc_msg);
1824 	if (retval != 0) {
1825 		dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1826 		    "modify_cno: evd 0x%p, cno %p retval %d err: %s\n",
1827 		    evd_ptr, cno_ptr, retval, strerror(errno));
1828 		return (dapls_convert_error(errno, retval));
1829 	}
1830 
1831 	return (DAT_SUCCESS);
1832 }
1833 
1834 DAT_RETURN
dapls_ib_cno_wait(IN DAPL_CNO * cno_ptr,IN DAT_TIMEOUT timeout,IN DAPL_EVD ** evd_ptr_p)1835 dapls_ib_cno_wait(
1836 	IN DAPL_CNO	*cno_ptr,
1837 	IN DAT_TIMEOUT	timeout,
1838 	IN DAPL_EVD	**evd_ptr_p)
1839 {
1840 	dapl_cno_wait_t		args;
1841 	int			retval;
1842 
1843 	args.cnw_hkey = (uint64_t)cno_ptr->ib_cno_handle;
1844 	if (timeout == DAT_TIMEOUT_INFINITE) {
1845 		args.cnw_timeout = UINT64_MAX;
1846 	} else {
1847 		args.cnw_timeout = (uint64_t)timeout & 0x00000000ffffffff;
1848 	}
1849 
1850 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1851 	    "cno_wait: cno 0x%p, hkey 0x%016llx, timeout 0x%016llx\n",
1852 	    cno_ptr, args.cnw_hkey, args.cnw_timeout);
1853 
1854 	retval = ioctl(cno_ptr->header.owner_ia->hca_ptr->
1855 	    ib_hca_handle->ia_fd, DAPL_CNO_WAIT, &args);
1856 
1857 	if (retval != 0) {
1858 		*evd_ptr_p = (DAPL_EVD *)NULL;
1859 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1860 		    "cno_wait: cno 0x%p ioctl err: %s\n",
1861 		    cno_ptr, strerror(errno));
1862 		return (dapls_convert_error(errno, retval));
1863 	}
1864 
1865 	*evd_ptr_p = (DAPL_EVD *)(uintptr_t)args.cnw_evd_cookie;
1866 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1867 	    "cno_wait: woken up, cno 0x%p, evd 0x%p\n\n",
1868 	    cno_ptr, *evd_ptr_p);
1869 
1870 	return (DAT_SUCCESS);
1871 }
1872 
1873 DAT_RETURN
dapls_ib_cno_alloc(IN DAPL_IA * ia_ptr,IN DAPL_CNO * cno_ptr)1874 dapls_ib_cno_alloc(
1875 	IN DAPL_IA	*ia_ptr,
1876 	IN DAPL_CNO	*cno_ptr)
1877 {
1878 	dapl_cno_alloc_t	args;
1879 	int			retval;
1880 
1881 	if (cno_ptr->cno_wait_agent.instance_data != NULL ||
1882 	    cno_ptr->cno_wait_agent.proxy_agent_func != NULL) {
1883 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1884 		    "cno_alloc: cno 0x%p, wait_agent != NULL\n", cno_ptr);
1885 		return (DAT_NOT_IMPLEMENTED);
1886 	}
1887 
1888 	retval = ioctl(ia_ptr->hca_ptr->ib_hca_handle->ia_fd,
1889 	    DAPL_CNO_ALLOC, &args);
1890 	if (retval != 0) {
1891 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1892 		    "cno_alloc: cno 0x%p ioctl err: %s\n",
1893 		    cno_ptr, strerror(errno));
1894 		return (dapls_convert_error(errno, retval));
1895 	}
1896 
1897 	cno_ptr->ib_cno_handle = (ib_cno_handle_t)args.cno_hkey;
1898 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1899 	    "cno_alloc: cno 0x%p allocated, ia_ptr 0x%p, hkey 0x%016llx\n",
1900 	    cno_ptr, ia_ptr, args.cno_hkey);
1901 
1902 	return (DAT_SUCCESS);
1903 }
1904 
1905 DAT_RETURN
dapls_ib_cno_free(IN DAPL_CNO * cno_ptr)1906 dapls_ib_cno_free(
1907 	IN DAPL_CNO	*cno_ptr)
1908 {
1909 	dapl_cno_free_t		args;
1910 	int			retval;
1911 
1912 	args.cnf_hkey = (uint64_t)cno_ptr->ib_cno_handle;
1913 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1914 	    "cno_free: cno 0x%p, hkey 0x%016llx\n",
1915 	    cno_ptr, args.cnf_hkey);
1916 
1917 	retval = ioctl(cno_ptr->header.owner_ia->hca_ptr->
1918 	    ib_hca_handle->ia_fd, DAPL_CNO_FREE, &args);
1919 
1920 	if (retval != 0) {
1921 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1922 		    "cno_free: cno 0x%p ioctl err: %s\n",
1923 		    cno_ptr, strerror(errno));
1924 		return (dapls_convert_error(errno, retval));
1925 	}
1926 
1927 	dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1928 	    "cno_free: cno 0x%p freed\n", cno_ptr);
1929 
1930 	return (DAT_SUCCESS);
1931 }
1932 
1933 DAT_RETURN
dapls_convert_error(int errnum,int retval)1934 dapls_convert_error(int errnum, int retval)
1935 {
1936 	if (retval < 0) {
1937 		switch (errnum) {
1938 		case EINVAL:
1939 			return (DAT_INVALID_PARAMETER);
1940 		case ENOMEM:
1941 			return (DAT_INSUFFICIENT_RESOURCES);
1942 		case ETIME:
1943 			return (DAT_TIMEOUT_EXPIRED);
1944 		case EINTR:
1945 			return (DAT_INTERRUPTED_CALL);
1946 		case EFAULT:
1947 			return (DAT_INTERNAL_ERROR);
1948 		default:
1949 			return (DAT_INTERNAL_ERROR);
1950 		}
1951 	} else {
1952 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1953 		    "ERROR: got IBTF error %d\n", retval);
1954 		switch (retval) {
1955 		case IBT_SERVICE_RECORDS_NOT_FOUND:
1956 			/*
1957 			 * Connecting to a non-existant conn qual gets
1958 			 * us here
1959 			 */
1960 			return (DAT_ERROR(DAT_INVALID_PARAMETER,
1961 			    DAT_INVALID_ADDRESS_UNREACHABLE));
1962 		case IBT_INSUFF_RESOURCE:
1963 		case IBT_INSUFF_KERNEL_RESOURCE:
1964 			return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
1965 		case IBT_AR_NOT_REGISTERED:
1966 			/*
1967 			 * forward ipaddr lookup failed
1968 			 */
1969 			return (DAT_ERROR(DAT_INVALID_ADDRESS, 0));
1970 		default:
1971 			return (DAT_INTERNAL_ERROR);
1972 		}
1973 	}
1974 }
1975 
1976 typedef struct dapls_ib_dbp_page_s {
1977 	uint32_t			*dbp_page_addr;
1978 	uint64_t			dbp_mapoffset;
1979 	struct dapls_ib_dbp_page_s	*next;
1980 	int				fd;
1981 } dapls_ib_dbp_page_t;
1982 
1983 dapls_ib_dbp_page_t	*dapls_ib_pagelist = NULL;
1984 
1985 /* Function that returns a pointer to the specified doorbell entry */
dapls_ib_get_dbp(uint64_t maplen,int fd,uint64_t mapoffset,uint32_t offset)1986 uint32_t *dapls_ib_get_dbp(uint64_t maplen, int fd, uint64_t mapoffset,
1987     uint32_t offset)
1988 {
1989 	dapls_ib_dbp_page_t	*new_page;
1990 	dapls_ib_dbp_page_t	*cur_page;
1991 
1992 	dapl_os_lock(&dapls_ib_dbp_lock);
1993 	/* Check to see if page already mapped for entry */
1994 	for (cur_page = dapls_ib_pagelist; cur_page != NULL;
1995 	    cur_page = cur_page->next)
1996 		if (cur_page->dbp_mapoffset == mapoffset &&
1997 		    cur_page->fd == fd) {
1998 			dapl_os_unlock(&dapls_ib_dbp_lock);
1999 			return ((uint32_t *)
2000 			    (offset + (uintptr_t)cur_page->dbp_page_addr));
2001 		}
2002 
2003 	/* If not, map a new page and prepend to pagelist */
2004 	new_page = malloc(sizeof (dapls_ib_dbp_page_t));
2005 	if (new_page == NULL) {
2006 		dapl_os_unlock(&dapls_ib_dbp_lock);
2007 		return (MAP_FAILED);
2008 	}
2009 	new_page->dbp_page_addr = (uint32_t *)(void *)mmap64((void *)0,
2010 	    maplen, (PROT_READ | PROT_WRITE), MAP_SHARED, fd, mapoffset);
2011 	if (new_page->dbp_page_addr == MAP_FAILED) {
2012 		free(new_page);
2013 		dapl_os_unlock(&dapls_ib_dbp_lock);
2014 		return (MAP_FAILED);
2015 	}
2016 	new_page->next = dapls_ib_pagelist;
2017 	new_page->dbp_mapoffset = mapoffset;
2018 	new_page->fd = fd;
2019 	dapls_ib_pagelist = new_page;
2020 	dapl_os_unlock(&dapls_ib_dbp_lock);
2021 	return ((uint32_t *)(offset + (uintptr_t)new_page->dbp_page_addr));
2022 }
2023