1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include "dapl.h"
28 #include "dapl_adapter_util.h"
29 #include "dapl_evd_util.h"
30 #include "dapl_cr_util.h"
31 #include "dapl_lmr_util.h"
32 #include "dapl_rmr_util.h"
33 #include "dapl_cookie.h"
34 #include "dapl_ring_buffer_util.h"
35 #include "dapl_vendor.h"
36 #include "dapl_tavor_ibtf_impl.h"
37
38 /* Function prototypes */
39 static DAT_RETURN dapli_ib_cq_resize_internal(DAPL_EVD *, DAT_COUNT);
40
41 /*
42 * The following declarations/fn are to used by the base library
43 * place holder for now
44 */
45
46 int g_loopback_connection = 0;
47
48 /*
49 * dapl_ib_cq_alloc
50 *
51 * Alloc a CQ
52 *
53 * Input:
54 * ia_handle IA handle
55 * evd_ptr pointer to EVD struct
56 * cno_ptr pointer to CNO struct
57 * cqlen minimum QLen
58 *
59 * Output:
60 * none
61 *
62 * Returns:
63 * DAT_SUCCESS
64 * DAT_INSUFFICIENT_RESOURCES
65 *
66 */
67 DAT_RETURN
dapls_ib_cq_alloc(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr,IN DAT_COUNT * cqlen)68 dapls_ib_cq_alloc(
69 IN DAPL_IA *ia_ptr,
70 IN DAPL_EVD *evd_ptr,
71 IN DAPL_CNO *cno_ptr,
72 IN DAT_COUNT *cqlen)
73 {
74 dapl_evd_create_t create_msg;
75 dapl_evd_free_t free_msg;
76 ib_cq_handle_t cq_handle = IB_INVALID_HANDLE;
77 int ia_fd;
78 int hca_fd;
79 int retval;
80 mlnx_umap_cq_data_out_t *mcq;
81
82 /* cq handle is created even for non-cq type events */
83 /* since cq handle is where the evd fd gets stored. */
84 cq_handle = (ib_cq_handle_t)
85 dapl_os_alloc(sizeof (struct dapls_ib_cq_handle));
86 if (cq_handle == NULL) {
87 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
88 "cq_alloc: evd_ptr 0x%p, cq_handle == NULL\n",
89 evd_ptr);
90 return (DAT_INSUFFICIENT_RESOURCES);
91 }
92
93 (void) dapl_os_memzero(cq_handle, sizeof (*cq_handle));
94
95 /* get the hca information from ia_ptr */
96 (void) dapl_os_memzero(&create_msg, sizeof (create_msg));
97 create_msg.evd_flags = evd_ptr->evd_flags;
98 create_msg.evd_cookie = (uintptr_t)evd_ptr;
99 if (cno_ptr != NULL) {
100 create_msg.evd_cno_hkey =
101 (uint64_t)cno_ptr->ib_cno_handle;
102 }
103 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
104 create_msg.evd_cq_size = (uint32_t)*cqlen;
105 }
106
107 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
108 "cq_alloc: evd 0x%p, flags 0x%x, cookie 0x%llx, hkey 0x%llx,\n"
109 " cno_hkey 0x%llx, cq_size %d\n", evd_ptr,
110 create_msg.evd_flags, create_msg.evd_cookie, create_msg.evd_hkey,
111 create_msg.evd_cno_hkey, create_msg.evd_cq_size);
112
113 ia_fd = ia_ptr->hca_ptr->ib_hca_handle->ia_fd;
114 hca_fd = ia_ptr->hca_ptr->ib_hca_handle->hca_fd;
115 mcq = (mlnx_umap_cq_data_out_t *)create_msg.evd_cq_data_out;
116
117 /* The next line is only needed for backward compatibility */
118 mcq->mcq_rev = MLNX_UMAP_IF_VERSION;
119
120 /* call into driver to allocate cq */
121 retval = ioctl(ia_fd, DAPL_EVD_CREATE, &create_msg);
122 if (retval != 0 || mcq->mcq_rev != MLNX_UMAP_IF_VERSION) {
123 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
124 "cq_alloc: evd_create failed, %s\n", strerror(errno));
125 dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
126 return (dapls_convert_error(errno, retval));
127 }
128 (void) dapl_os_memzero(cq_handle, sizeof (struct dapls_ib_cq_handle));
129 dapl_os_lock_init(&cq_handle->cq_wrid_wqhdr_lock);
130
131 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
132 "cq_alloc: created, evd 0x%p, hkey 0x%016llx\n\n", evd_ptr,
133 create_msg.evd_hkey);
134
135 cq_handle->evd_hkey = create_msg.evd_hkey;
136
137 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
138
139 /*
140 * allocate a hash table for wrid management, the key is
141 * a combination of QPnumber and SEND/RECV type. This is
142 * required only for evd which have a CQ mapped to
143 * it.
144 */
145 if (DAT_SUCCESS != dapls_hash_create(DAPL_MED_HASHSIZE,
146 DAT_FALSE, &cq_handle->cq_wrid_wqhdr_list)) {
147 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
148 "cq_alloc: hash_create failed\n");
149 dapl_os_free(cq_handle,
150 sizeof (struct dapls_ib_cq_handle));
151 return (DAT_INSUFFICIENT_RESOURCES |
152 DAT_RESOURCE_MEMORY);
153 }
154
155 dapl_os_assert(create_msg.evd_cq_real_size > 0);
156
157 /* In the case of Arbel or Hermon */
158 if (mcq->mcq_polldbr_mapoffset != 0 ||
159 mcq->mcq_polldbr_maplen != 0)
160 cq_handle->cq_poll_dbp = dapls_ib_get_dbp(
161 mcq->mcq_polldbr_maplen, hca_fd,
162 mcq->mcq_polldbr_mapoffset,
163 mcq->mcq_polldbr_offset);
164 if (mcq->mcq_armdbr_mapoffset != 0 ||
165 mcq->mcq_armdbr_maplen != 0)
166 cq_handle->cq_arm_dbp = dapls_ib_get_dbp(
167 mcq->mcq_armdbr_maplen, hca_fd,
168 mcq->mcq_armdbr_mapoffset,
169 mcq->mcq_armdbr_offset);
170
171 cq_handle->cq_addr = (tavor_hw_cqe_t *)(void *) mmap64(
172 (void *)0, mcq->mcq_maplen,
173 (PROT_READ | PROT_WRITE), MAP_SHARED, hca_fd,
174 mcq->mcq_mapoffset);
175
176 if (cq_handle->cq_addr == MAP_FAILED ||
177 cq_handle->cq_poll_dbp == MAP_FAILED ||
178 cq_handle->cq_arm_dbp == MAP_FAILED) {
179 free_msg.evf_hkey = cq_handle->evd_hkey;
180 retval = ioctl(ia_fd, DAPL_EVD_FREE, &free_msg);
181 if (retval != 0) {
182 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
183 "cq_alloc: EVD_FREE err:%s\n",
184 strerror(errno));
185 }
186
187 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
188 "cq_alloc: DAPL_CQ_ALLOC failed\n");
189 /* free the hash table we created */
190 (void) dapls_hash_free(cq_handle->cq_wrid_wqhdr_list);
191 dapl_os_free(cq_handle,
192 sizeof (struct dapls_ib_cq_handle));
193 return (DAT_INSUFFICIENT_RESOURCES);
194 }
195
196 cq_handle->cq_map_offset = mcq->mcq_mapoffset;
197 cq_handle->cq_map_len = mcq->mcq_maplen;
198 cq_handle->cq_num = mcq->mcq_cqnum;
199 /*
200 * cq_size is the actual depth of the CQ which is 1 more
201 * than what ibt_alloc_cq reports. However the application
202 * can only use (cq_size - 1) entries.
203 */
204 cq_handle->cq_size = create_msg.evd_cq_real_size + 1;
205 cq_handle->cq_cqesz = mcq->mcq_cqesz;
206 cq_handle->cq_iauar = ia_ptr->hca_ptr->ib_hca_handle->ia_uar;
207 *cqlen = create_msg.evd_cq_real_size;
208
209 DAPL_INIT_CQ(ia_ptr)(cq_handle);
210 }
211
212 evd_ptr->ib_cq_handle = cq_handle;
213 return (DAT_SUCCESS);
214 }
215
216
217 /*
218 * dapl_ib_cq_resize
219 *
220 * Resize a CQ
221 *
222 * Input:
223 * evd_ptr pointer to EVD struct
224 * cqlen new length of the cq
225 * Output:
226 * none
227 *
228 * Returns:
229 * DAT_SUCCESS
230 * DAT_INVALID_HANDLE
231 * DAT_INTERNAL_ERROR
232 * DAT_INSUFFICIENT_RESOURCES
233 *
234 */
235 DAT_RETURN
dapls_ib_cq_resize(IN DAPL_EVD * evd_ptr,IN DAT_COUNT cqlen)236 dapls_ib_cq_resize(
237 IN DAPL_EVD *evd_ptr,
238 IN DAT_COUNT cqlen)
239 {
240 ib_cq_handle_t cq_handle;
241 DAT_RETURN dat_status;
242
243 dat_status = dapli_ib_cq_resize_internal(evd_ptr, cqlen);
244 if (DAT_INSUFFICIENT_RESOURCES == dat_status) {
245 cq_handle = evd_ptr->ib_cq_handle;
246 /* attempt to resize back to the current size */
247 dat_status = dapli_ib_cq_resize_internal(evd_ptr,
248 cq_handle->cq_size - 1);
249 if (DAT_SUCCESS != dat_status) {
250 /*
251 * XXX this is catastrophic need to post an event
252 * to the async evd
253 */
254 return (DAT_INTERNAL_ERROR);
255 }
256 }
257
258 return (dat_status);
259 }
260
261 /*
262 * dapli_ib_cq_resize_internal
263 *
264 * An internal routine to resize a CQ.
265 *
266 * Input:
267 * evd_ptr pointer to EVD struct
268 * cqlen new length of the cq
269 * Output:
270 * none
271 *
272 * Returns:
273 * DAT_SUCCESS
274 * DAT_INVALID_HANDLE
275 * DAT_INSUFFICIENT_RESOURCES
276 *
277 */
278 static DAT_RETURN
dapli_ib_cq_resize_internal(IN DAPL_EVD * evd_ptr,IN DAT_COUNT cqlen)279 dapli_ib_cq_resize_internal(
280 IN DAPL_EVD *evd_ptr,
281 IN DAT_COUNT cqlen)
282 {
283 ib_cq_handle_t cq_handle;
284 dapl_cq_resize_t resize_msg;
285 int ia_fd;
286 int hca_fd;
287 int retval;
288 mlnx_umap_cq_data_out_t *mcq;
289 DAPL_HCA *hca_ptr;
290 dapls_hw_cqe_t cq_addr;
291
292 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
293 "dapls_ib_cq_resize: evd 0x%p cq 0x%p "
294 "evd_hkey 0x%016llx cqlen %d\n",
295 evd_ptr, (void *)evd_ptr->ib_cq_handle,
296 evd_ptr->ib_cq_handle->evd_hkey, cqlen);
297
298 cq_handle = evd_ptr->ib_cq_handle;
299 /*
300 * Since CQs are created in powers of 2 with one non-usable slot,
301 * its possible that the previously allocated CQ has sufficient
302 * entries. If the current cq is big enough and it is mapped in
303 * we are done.
304 */
305 if ((cqlen < cq_handle->cq_size) && (cq_handle->cq_addr)) {
306 return (DAT_SUCCESS);
307 }
308
309 hca_ptr = evd_ptr->header.owner_ia->hca_ptr;
310
311 /* unmap the CQ before resizing it */
312 if (hca_ptr->hermon_resize_cq == 0) {
313 if ((cq_handle->cq_addr) &&
314 (munmap((char *)cq_handle->cq_addr,
315 cq_handle->cq_map_len) < 0)) {
316 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
317 "cq_resize: munmap(%p:0x%llx) failed(%d)\n",
318 cq_handle->cq_addr, cq_handle->cq_map_len, errno);
319 return (DAT_INVALID_HANDLE);
320 }
321 /* cq_addr is unmapped and no longer valid */
322 cq_handle->cq_addr = NULL;
323 }
324
325 ia_fd = hca_ptr->ib_hca_handle->ia_fd;
326 hca_fd = hca_ptr->ib_hca_handle->hca_fd;
327
328 (void) dapl_os_memzero(&resize_msg, sizeof (resize_msg));
329 mcq = (mlnx_umap_cq_data_out_t *)resize_msg.cqr_cq_data_out;
330 resize_msg.cqr_evd_hkey = cq_handle->evd_hkey;
331 resize_msg.cqr_cq_new_size = cqlen;
332
333 /* The next line is only needed for backward compatibility */
334 mcq->mcq_rev = MLNX_UMAP_IF_VERSION;
335 retval = ioctl(ia_fd, DAPL_CQ_RESIZE, &resize_msg);
336 if (retval != 0 || mcq->mcq_rev != MLNX_UMAP_IF_VERSION) {
337 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
338 "dapls_ib_cq_resize: evd 0x%p, err: %s\n",
339 evd_ptr, strerror(errno));
340 if (errno == EINVAL) { /* Couldn't find evd for this cq */
341 return (DAT_INVALID_HANDLE);
342 } else { /* Need to retry resize with a smaller qlen */
343 return (DAT_INSUFFICIENT_RESOURCES);
344 }
345 }
346
347 dapl_os_assert(cq_handle->cq_num == mcq->mcq_cqnum);
348
349 /* In the case of Arbel or Hermon */
350 if (mcq->mcq_polldbr_mapoffset != 0 ||
351 mcq->mcq_polldbr_maplen != 0)
352 cq_handle->cq_poll_dbp = dapls_ib_get_dbp(
353 mcq->mcq_polldbr_maplen, hca_fd,
354 mcq->mcq_polldbr_mapoffset,
355 mcq->mcq_polldbr_offset);
356 if (mcq->mcq_armdbr_mapoffset != 0 ||
357 mcq->mcq_armdbr_maplen != 0)
358 cq_handle->cq_arm_dbp = dapls_ib_get_dbp(
359 mcq->mcq_armdbr_maplen, hca_fd,
360 mcq->mcq_armdbr_mapoffset,
361 mcq->mcq_armdbr_offset);
362
363 cq_addr = (tavor_hw_cqe_t *)(void *)mmap64((void *)0,
364 mcq->mcq_maplen, (PROT_READ | PROT_WRITE),
365 MAP_SHARED, hca_fd, mcq->mcq_mapoffset);
366
367 if (cq_addr == MAP_FAILED ||
368 cq_handle->cq_poll_dbp == MAP_FAILED ||
369 cq_handle->cq_arm_dbp == MAP_FAILED) {
370 if (hca_ptr->hermon_resize_cq == 0)
371 cq_handle->cq_addr = NULL;
372 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
373 "cq_resize: mmap failed(%d)\n", errno);
374 /* Need to retry resize with a smaller qlen */
375 return (DAT_INSUFFICIENT_RESOURCES);
376 }
377
378 if (hca_ptr->hermon_resize_cq == 0) {
379 cq_handle->cq_addr = cq_addr;
380 cq_handle->cq_map_offset = mcq->mcq_mapoffset;
381 cq_handle->cq_map_len = mcq->mcq_maplen;
382 cq_handle->cq_size = resize_msg.cqr_cq_real_size + 1;
383 cq_handle->cq_cqesz = mcq->mcq_cqesz;
384 /*
385 * upon resize the old events are moved to the start of the CQ
386 * hence we need to reset the consumer index too
387 */
388 cq_handle->cq_consindx = 0;
389 } else { /* Hermon */
390 cq_handle->cq_resize_addr = cq_addr;
391 cq_handle->cq_resize_map_offset = mcq->mcq_mapoffset;
392 cq_handle->cq_resize_map_len = mcq->mcq_maplen;
393 cq_handle->cq_resize_size = resize_msg.cqr_cq_real_size + 1;
394 cq_handle->cq_resize_cqesz = mcq->mcq_cqesz;
395 }
396
397 return (DAT_SUCCESS);
398 }
399
400 /*
401 * dapl_ib_cq_free
402 *
403 * Free a CQ
404 *
405 * Input:
406 * ia_handle IA handle
407 * evd_ptr pointer to EVD struct
408 * Output:
409 * none
410 *
411 * Returns:
412 * DAT_SUCCESS
413 * DAT_INVALID_HANDLE
414 * DAT_INSUFFICIENT_RESOURCES
415 *
416 */
417 DAT_RETURN
dapls_ib_cq_free(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr)418 dapls_ib_cq_free(
419 IN DAPL_IA *ia_ptr,
420 IN DAPL_EVD *evd_ptr)
421 {
422 dapl_evd_free_t args;
423 int retval;
424 ib_cq_handle_t cq_handle = evd_ptr->ib_cq_handle;
425
426 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
427 "dapls_ib_cq_free: evd 0x%p cq 0x%p hkey %016llx\n", evd_ptr,
428 (void *)evd_ptr->ib_cq_handle, evd_ptr->ib_cq_handle->evd_hkey);
429
430 /* If the cq was mmap'd unmap it before freeing it */
431 if ((cq_handle->cq_addr) &&
432 (munmap((char *)cq_handle->cq_addr, cq_handle->cq_map_len) < 0)) {
433 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
434 "cq_free: (%p:0x%llx)\n", cq_handle->cq_addr,
435 cq_handle->cq_map_len);
436 }
437
438
439 args.evf_hkey = cq_handle->evd_hkey;
440
441 retval = ioctl(ia_ptr->hca_ptr->ib_hca_handle->ia_fd,
442 DAPL_EVD_FREE, &args);
443 if (retval != 0) {
444 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
445 "dapls_ib_cq_free: evd 0x%p, err: %s\n",
446 evd_ptr, strerror(errno));
447 return (dapls_convert_error(errno, retval));
448 }
449
450 dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
451 evd_ptr->ib_cq_handle = NULL;
452
453 return (DAT_SUCCESS);
454 }
455
456 /*
457 * dapl_set_cq_notify
458 *
459 * Set up CQ completion notifications
460 *
461 * Input:
462 * ia_handle IA handle
463 * evd_ptr pointer to EVD struct
464 *
465 * Output:
466 * none
467 *
468 * Returns:
469 * DAT_SUCCESS
470 * DAT_INVALID_HANDLE
471 * DAT_INSUFFICIENT_RESOURCES
472 *
473 */
474 /* ARGSUSED */
475 DAT_RETURN
dapls_set_cq_notify(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr)476 dapls_set_cq_notify(
477 IN DAPL_IA *ia_ptr,
478 IN DAPL_EVD *evd_ptr)
479 {
480 int retval;
481 ib_cq_handle_t cq_handle = evd_ptr->ib_cq_handle;
482
483 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
484 "dapls_ib_cq_notify: evd 0x%p cq 0x%p\n", evd_ptr,
485 (void *)cq_handle);
486
487 retval = DAPL_NOTIFY(evd_ptr)(cq_handle, IB_NOTIFY_ON_NEXT_COMP, 0);
488
489 return (retval);
490
491 }
492
493 /* ARGSUSED */
494 DAT_RETURN
dapls_set_cqN_notify(IN DAPL_IA * ia_ptr,IN DAPL_EVD * evd_ptr,IN uint32_t num_events)495 dapls_set_cqN_notify(
496 IN DAPL_IA *ia_ptr,
497 IN DAPL_EVD *evd_ptr,
498 IN uint32_t num_events)
499 {
500 int retval;
501 ib_cq_handle_t cq_handle = evd_ptr->ib_cq_handle;
502
503 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
504 "dapls_set_cqN_notify:evd %p cq %p num_events %d\n", evd_ptr,
505 (void *)cq_handle, num_events);
506
507 retval = DAPL_NOTIFY(evd_ptr)(cq_handle, IB_NOTIFY_ON_NEXT_NCOMP,
508 num_events);
509
510 return (retval);
511
512 }
513
514 /*
515 * dapls_ib_cqd_create
516 *
517 * Set up CQ notification event thread
518 *
519 * Input:
520 * ia_handle IA handle
521 *
522 * Output:
523 * none
524 *
525 * Returns:
526 * DAT_SUCCESS
527 * DAT_INVALID_HANDLE
528 * DAT_INSUFFICIENT_RESOURCES
529 *
530 */
531 /* ARGSUSED */
532 DAT_RETURN
dapls_ib_cqd_create(IN DAPL_HCA * hca_ptr)533 dapls_ib_cqd_create(
534 IN DAPL_HCA *hca_ptr)
535 {
536 return (DAT_SUCCESS);
537 }
538
539
540 /*
541 * dapl_cqd_destroy
542 *
543 * Destroy CQ notification event thread
544 *
545 * Input:
546 * ia_handle IA handle
547 *
548 * Output:
549 * none
550 *
551 * Returns:
552 * DAT_SUCCESS
553 * DAT_INVALID_HANDLE
554 * DAT_INSUFFICIENT_RESOURCES
555 *
556 */
557 DAT_RETURN
dapls_ib_cqd_destroy(IN DAPL_HCA * hca_ptr)558 dapls_ib_cqd_destroy(
559 IN DAPL_HCA *hca_ptr)
560 {
561 dapl_evd_free_t args;
562 ib_cq_handle_t cq_handle;
563 int retval;
564
565 if (hca_ptr->null_ib_cq_handle != IB_INVALID_HANDLE) {
566 /* free up the dummy cq */
567 cq_handle = hca_ptr->null_ib_cq_handle;
568 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
569 "dapls_ib_cqd_destroy: cq %p\n", (void *)cq_handle);
570
571 args.evf_hkey = cq_handle->evd_hkey;
572
573 retval = ioctl(hca_ptr->ib_hca_handle->ia_fd,
574 DAPL_EVD_FREE, &args);
575 if (retval != 0) {
576 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
577 "dapls_ib_cqd_destroy: EVD_FREE err:%d errno:%d\n",
578 retval, errno);
579 }
580
581 dapl_os_free(cq_handle, sizeof (struct dapls_ib_cq_handle));
582 hca_ptr->null_ib_cq_handle = IB_INVALID_HANDLE;
583 }
584
585 return (DAT_SUCCESS);
586 }
587
588
589 /*
590 * dapl_ib_pd_alloc
591 *
592 * Alloc a PD
593 *
594 * Input:
595 * ia_handle IA handle
596 * PZ_ptr pointer to PZEVD struct
597 *
598 * Output:
599 * none
600 *
601 * Returns:
602 * DAT_SUCCESS
603 * DAT_INSUFFICIENT_RESOURCES
604 *
605 */
606 DAT_RETURN
dapls_ib_pd_alloc(IN DAPL_IA * ia,IN DAPL_PZ * pz)607 dapls_ib_pd_alloc(
608 IN DAPL_IA *ia,
609 IN DAPL_PZ *pz)
610 {
611 struct dapls_ib_pd_handle *pd_p;
612 dapl_pd_alloc_t args;
613 int retval;
614
615 pd_p = (struct dapls_ib_pd_handle *)dapl_os_alloc(sizeof (*pd_p));
616 if (pd_p == NULL) {
617 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
618 "pd_alloc: ia 0x%p, pz 0x%p, cannot allocate pd\n",
619 ia, pz);
620 return (DAT_INSUFFICIENT_RESOURCES);
621 }
622 retval = ioctl(ia->hca_ptr->ib_hca_handle->ia_fd,
623 DAPL_PD_ALLOC, &args);
624 if (retval != 0) {
625 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
626 "pd_alloc: ia 0x%p, pz 0x%p, cannot create pd, "
627 "err: %s\n", ia, pz, strerror(errno));
628 dapl_os_free(pd_p, sizeof (*pd_p));
629 return (dapls_convert_error(errno, retval));
630 }
631
632 pd_p->pd_hkey = args.pda_hkey;
633 pz->pd_handle = pd_p;
634 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
635 "pd_alloc: successful, ia 0x%p, pz 0x%p, hkey %016llx\n",
636 ia, pz, args.pda_hkey);
637
638 return (DAT_SUCCESS);
639 }
640
641
642 /*
643 * dapl_ib_pd_free
644 *
645 * Free a PD
646 *
647 * Input:
648 * ia_handle IA handle
649 * PZ_ptr pointer to PZ struct
650 *
651 * Output:
652 * none
653 *
654 * Returns:
655 * DAT_SUCCESS
656 * DAT_INSUFFICIENT_RESOURCES
657 *
658 */
659 DAT_RETURN
dapls_ib_pd_free(IN DAPL_PZ * pz)660 dapls_ib_pd_free(
661 IN DAPL_PZ *pz)
662 {
663 struct dapls_ib_pd_handle *pd_p;
664 dapl_pd_free_t args;
665 int retval;
666
667 pd_p = (struct dapls_ib_pd_handle *)pz->pd_handle;
668 args.pdf_hkey = pd_p->pd_hkey;
669
670 retval = ioctl(pz->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd,
671 DAPL_PD_FREE, &args);
672 if (retval != 0) {
673 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
674 "pd_free: pz 0x%p, cannot free pd\n", pz);
675 return (dapls_convert_error(errno, retval));
676 }
677 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
678 "pd_free: pz 0x%p, hkey %016llx, freed\n", pz, pd_p->pd_hkey);
679 dapl_os_free((void *)pd_p, sizeof (*pd_p));
680 pz->pd_handle = NULL;
681 return (DAT_SUCCESS);
682 }
683
684
685 /*
686 * dapl_ib_mr_register
687 *
688 * Register a virtual memory region
689 *
690 * Input:
691 * ia_handle IA handle
692 * lmr pointer to dapl_lmr struct
693 * virt_addr virtual address of beginning of mem region
694 * length length of memory region
695 *
696 * Output:
697 * none
698 *
699 * Returns:
700 * DAT_SUCCESS
701 * DAT_INSUFFICIENT_RESOURCES
702 *
703 */
704 DAT_RETURN
dapls_ib_mr_register(IN DAPL_IA * ia,IN DAPL_LMR * lmr,IN DAT_PVOID virt_addr,IN DAT_VLEN length,IN DAT_MEM_PRIV_FLAGS privileges)705 dapls_ib_mr_register(
706 IN DAPL_IA *ia,
707 IN DAPL_LMR *lmr,
708 IN DAT_PVOID virt_addr,
709 IN DAT_VLEN length,
710 IN DAT_MEM_PRIV_FLAGS privileges)
711 {
712 dapl_mr_register_t reg_msg;
713 ib_mr_handle_t mr_handle;
714 DAPL_PZ * pz_handle;
715 int ia_fd;
716 int retval;
717
718 ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
719 mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
720 if (mr_handle == NULL) {
721 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
722 "mr_register: lmr 0x%p, ia 0x%p, "
723 "cannot alloc mr_handle\n", lmr, ia);
724 return (DAT_INSUFFICIENT_RESOURCES);
725 }
726 pz_handle = ((DAPL_PZ *)lmr->param.pz_handle);
727 if (pz_handle == NULL) {
728 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
729 "mr_register: lmr 0x%p, ia 0x%p, "
730 "pz_handle == NULL!\n", lmr, ia);
731 dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
732 return (DAT_INVALID_PARAMETER);
733 }
734 reg_msg.mr_pd_hkey = pz_handle->pd_handle->pd_hkey;
735 reg_msg.mr_vaddr = (ib_vaddr_t)(uintptr_t)virt_addr;
736 reg_msg.mr_len = (ib_memlen_t)length;
737 reg_msg.mr_flags = (ibt_mr_flags_t)
738 dapl_lmr_convert_privileges(privileges);
739 reg_msg.mr_flags |= IBT_MR_ENABLE_WINDOW_BIND;
740
741 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
742 "mr_register: lmr 0x%p, pd_hkey 0x%016llx, vaddr 0x%016llx, "
743 "len %llu, flags 0x%x\n", lmr, reg_msg.mr_pd_hkey,
744 reg_msg.mr_vaddr, reg_msg.mr_len, reg_msg.mr_flags);
745
746 /* call into driver to allocate MR resource */
747 retval = ioctl(ia_fd, DAPL_MR_REGISTER, ®_msg);
748 if (retval != 0) {
749 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
750 "mr_register: lmr 0x%p, failed (%s)\n",
751 lmr, strerror(errno));
752 dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
753 return (dapls_convert_error(errno, retval));
754 }
755 mr_handle->mr_hkey = reg_msg.mr_hkey;
756 lmr->param.lmr_context = (DAT_LMR_CONTEXT)reg_msg.mr_lkey;
757 lmr->param.rmr_context = (DAT_RMR_CONTEXT)reg_msg.mr_rkey;
758 lmr->param.registered_address = reg_msg.mr_vaddr;
759 lmr->param.registered_size = reg_msg.mr_len;
760 lmr->mr_handle = mr_handle;
761
762 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
763 "mr_register: successful, lmr 0x%p, mr_hkey 0x%016llx, "
764 "lmr_ctx 0x%08x\n\n", lmr, reg_msg.mr_hkey,
765 reg_msg.mr_lkey);
766 return (DAT_SUCCESS);
767 }
768
769 /*
770 * dapl_ib_mr_register_shared
771 *
772 * Register a shared virtual memory region
773 *
774 * Input:
775 * ia_handle IA handle
776 * lmr pointer to dapl_lmr struct
777 * virt_addr virtual address of beginning of mem region
778 * cookie shared memory identifer
779 * length length of memory region
780 *
781 * Output:
782 * none
783 *
784 * Returns:
785 * DAT_SUCCESS
786 * DAT_INSUFFICIENT_RESOURCES
787 *
788 */
789 DAT_RETURN
dapls_ib_mr_register_shared(IN DAPL_IA * ia,IN DAPL_LMR * lmr,IN DAT_PVOID virt_addr,IN DAT_VLEN length,IN DAT_LMR_COOKIE cookie,IN DAT_MEM_PRIV_FLAGS privileges)790 dapls_ib_mr_register_shared(
791 IN DAPL_IA *ia,
792 IN DAPL_LMR *lmr,
793 IN DAT_PVOID virt_addr,
794 IN DAT_VLEN length,
795 IN DAT_LMR_COOKIE cookie,
796 IN DAT_MEM_PRIV_FLAGS privileges)
797 {
798 dapl_mr_register_shared_t reg_msg;
799 ib_mr_handle_t mr_handle;
800 DAPL_PZ *pz_handle;
801 int ia_fd, i;
802 int retval;
803
804 ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
805 mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
806 if (mr_handle == NULL) {
807 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
808 "mr_register_shared: lmr 0x%p, ia 0x%p, "
809 "cannot alloc mr_handle\n", lmr, ia);
810 return (DAT_INSUFFICIENT_RESOURCES);
811 }
812 pz_handle = ((DAPL_PZ *)lmr->param.pz_handle);
813 if (pz_handle == NULL) {
814 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
815 "mr_register_shared: lmr 0x%p, ia 0x%p, "
816 "pz_handle == NULL!\n", lmr, ia);
817 dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
818 return (DAT_INVALID_PARAMETER);
819 }
820 reg_msg.mrs_pd_hkey = pz_handle->pd_handle->pd_hkey;
821 reg_msg.mrs_vaddr = (ib_vaddr_t)(uintptr_t)virt_addr;
822 reg_msg.mrs_len = (ib_memlen_t)length;
823 reg_msg.mrs_flags = (ibt_mr_flags_t)
824 dapl_lmr_convert_privileges(privileges);
825 reg_msg.mrs_flags |= IBT_MR_ENABLE_WINDOW_BIND;
826 /*CONSTCOND*/
827 dapl_os_assert(DAT_LMR_COOKIE_SIZE == sizeof (reg_msg.mrs_shm_cookie));
828 (void) dapl_os_memcpy((void *)®_msg.mrs_shm_cookie, (void *)cookie,
829 DAT_LMR_COOKIE_SIZE);
830
831 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
832 "mr_register_shared: lmr 0x%p, pd_hkey 0x%016llx, "
833 "vaddr 0x%016llx, len %llu, flags 0x%x\n",
834 lmr, reg_msg.mrs_pd_hkey, reg_msg.mrs_vaddr, reg_msg.mrs_len,
835 reg_msg.mrs_flags);
836
837 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
838 "mr_register_shared: cookie \n0x");
839 for (i = 4; i >= 0; i--) {
840 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
841 "%016llx", reg_msg.mrs_shm_cookie.mc_uint_arr[i]);
842 }
843 dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "\n");
844
845 /* call into driver to allocate MR resource */
846 retval = ioctl(ia_fd, DAPL_MR_REGISTER_SHARED, ®_msg);
847 if (retval != 0) {
848 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
849 "mr_register_shared: lmr 0x%p, failed (%s)\n",
850 lmr, strerror(errno));
851 dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
852 return (dapls_convert_error(errno, retval));
853 }
854 mr_handle->mr_hkey = reg_msg.mrs_hkey;
855 lmr->param.lmr_context = (DAT_LMR_CONTEXT)reg_msg.mrs_lkey;
856 lmr->param.rmr_context = (DAT_RMR_CONTEXT)reg_msg.mrs_rkey;
857 lmr->param.registered_address = reg_msg.mrs_vaddr;
858 lmr->param.registered_size = reg_msg.mrs_len;
859 lmr->mr_handle = mr_handle;
860
861 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
862 "mr_register_shared: successful, lmr 0x%p, mr_hkey 0x%016llx, "
863 "lmr_ctx 0x%08x\n\n", lmr, reg_msg.mrs_hkey,
864 reg_msg.mrs_lkey);
865 return (DAT_SUCCESS);
866 }
867
868 /*
869 * dapl_ib_mr_deregister
870 *
871 * Free a memory region
872 *
873 * Input:
874 * lmr pointer to dapl_lmr struct
875 *
876 * Output:
877 * none
878 *
879 * Returns:
880 * DAT_SUCCESS
881 * DAT_INSUFFICIENT_RESOURCES
882 *
883 */
884 DAT_RETURN
dapls_ib_mr_deregister(IN DAPL_LMR * lmr)885 dapls_ib_mr_deregister(
886 IN DAPL_LMR *lmr)
887 {
888 dapl_mr_deregister_t args;
889 int retval;
890
891 args.mrd_hkey = lmr->mr_handle->mr_hkey;
892 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
893 "mr_deregister: lmr 0x%p, hkey 0x%016llx, lmr_ctx 0x%08x\n"
894 " vaddr 0x%016llx, len %llu, flags 0x%x\n",
895 lmr, args.mrd_hkey, lmr->param.lmr_context,
896 lmr->param.registered_address, lmr->param.registered_size,
897 dapl_lmr_convert_privileges(lmr->param.mem_priv) |
898 IBT_MR_ENABLE_WINDOW_BIND);
899
900 /* call into driver to do MR deregister */
901 retval = ioctl(lmr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd,
902 DAPL_MR_DEREGISTER, &args);
903
904 if (retval != 0) {
905 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
906 "mr_deregister: lmr 0x%p, failed (%s)\n",
907 lmr, strerror(errno));
908 return (dapls_convert_error(errno, retval));
909 }
910
911 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
912 "mr_deregister: successful\n\n");
913 dapl_os_free(lmr->mr_handle, sizeof (struct dapls_ib_mr_handle));
914 lmr->mr_handle = NULL;
915 return (DAT_SUCCESS);
916 }
917
918
919 /*
920 * dapl_ib_mr_register_lmr
921 *
922 * Register a memory region based on attributes of an existing one
923 *
924 * Input:
925 * ia_handle IA handle
926 * lmr pointer to dapl_lmr struct
927 * virt_addr virtual address of beginning of mem region
928 * length length of memory region
929 *
930 * Output:
931 * none
932 *
933 * Returns:
934 * DAT_SUCCESS
935 * DAT_INSUFFICIENT_RESOURCES
936 *
937 */
938 DAT_RETURN
dapls_ib_mr_register_lmr(IN DAPL_IA * ia,IN DAPL_LMR * lmr,IN DAT_MEM_PRIV_FLAGS privileges)939 dapls_ib_mr_register_lmr(
940 IN DAPL_IA *ia,
941 IN DAPL_LMR *lmr,
942 IN DAT_MEM_PRIV_FLAGS privileges)
943 {
944 dapl_mr_register_lmr_t regl_msg;
945 DAPL_LMR *orig_lmr;
946 struct dapls_ib_mr_handle *orig_mr_handle;
947 ib_mr_handle_t mr_handle;
948 int ia_fd;
949 int retval;
950
951 ia_fd = ia->hca_ptr->ib_hca_handle->ia_fd;
952 mr_handle = dapl_os_alloc(sizeof (struct dapls_ib_mr_handle));
953 if (mr_handle == NULL) {
954 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
955 "mr_register_lmr: lmr 0x%p, ia 0x%p, "
956 "cannot alloc mr_handle\n", lmr, ia);
957 return (DAT_INSUFFICIENT_RESOURCES);
958 }
959
960 orig_lmr = (DAPL_LMR *)lmr->param.region_desc.for_lmr_handle;
961 orig_mr_handle = (struct dapls_ib_mr_handle *)orig_lmr->mr_handle;
962 regl_msg.mrl_orig_hkey = orig_mr_handle->mr_hkey;
963 regl_msg.mrl_flags = (ibt_mr_flags_t)
964 dapl_lmr_convert_privileges(privileges);
965 regl_msg.mrl_flags |= IBT_MR_ENABLE_WINDOW_BIND;
966 regl_msg.mrl_lkey = regl_msg.mrl_rkey = 0;
967
968 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
969 "mr_register_lmr: lmr 0x%p, hkey 0x%016llx, lmr_ctx 0x%08x\n"
970 " vaddr 0x%016llx, len %llu, flags 0x%x\n",
971 lmr, mr_handle->mr_hkey, lmr->param.lmr_context,
972 orig_lmr->param.registered_address,
973 orig_lmr->param.registered_size,
974 dapl_lmr_convert_privileges(orig_lmr->param.mem_priv) |
975 IBT_MR_ENABLE_WINDOW_BIND);
976
977
978 /* call into driver to allocate MR resource */
979 retval = ioctl(ia_fd, DAPL_MR_REGISTER_LMR, ®l_msg);
980 if (retval != 0) {
981 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
982 "mr_register_lmr: failed (%s), orig_hkey (%016llx)\n",
983 strerror(errno), orig_mr_handle->mr_hkey);
984 dapl_os_free(mr_handle, sizeof (struct dapls_ib_mr_handle));
985 return (dapls_convert_error(errno, retval));
986 }
987
988 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
989 "mr_registered_lmr: successful, lmr 0x%p, hkey 0x%016llx\n",
990 lmr, regl_msg.mrl_hkey);
991
992 mr_handle->mr_hkey = regl_msg.mrl_hkey;
993 lmr->param.lmr_context = (DAT_LMR_CONTEXT)regl_msg.mrl_lkey;
994 lmr->param.rmr_context = (DAT_RMR_CONTEXT)regl_msg.mrl_rkey;
995 lmr->param.registered_address = orig_lmr->param.registered_address;
996 lmr->param.registered_size = orig_lmr->param.registered_size;
997 lmr->mr_handle = mr_handle;
998
999 return (DAT_SUCCESS);
1000 }
1001
1002
1003 /*
1004 * dapls_ib_mw_alloc
1005 *
1006 * Bind a protection domain to a memory window
1007 *
1008 * Input:
1009 * rmr Initialized rmr to hold binding handles
1010 *
1011 * Output:
1012 * none
1013 *
1014 * Returns:
1015 * DAT_SUCCESS
1016 * DAT_INSUFFICIENT_RESOURCES
1017 *
1018 */
1019 DAT_RETURN
dapls_ib_mw_alloc(IN DAPL_RMR * rmr)1020 dapls_ib_mw_alloc(
1021 IN DAPL_RMR *rmr)
1022 {
1023 DAPL_IA *ia_hdl = (DAPL_IA *)rmr->param.ia_handle;
1024 DAPL_PZ *pz_hdl = rmr->param.pz_handle;
1025 dapl_mw_alloc_t args;
1026 ib_mw_handle_t mw_handle;
1027 int ia_fd;
1028 int retval;
1029
1030 ia_fd = ((struct dapls_ib_hca_handle *)(ia_hdl->hca_ptr->
1031 ib_hca_handle))->ia_fd;
1032
1033 mw_handle = dapl_os_alloc(sizeof (struct dapls_ib_mw_handle));
1034 if (mw_handle == NULL) {
1035 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1036 "mw_alloc: rmr 0x%p, cannot alloc mw_handle\n", rmr);
1037 return (DAT_INSUFFICIENT_RESOURCES);
1038 }
1039 args.mw_pd_hkey = ((struct dapls_ib_pd_handle *)
1040 (pz_hdl->pd_handle))->pd_hkey;
1041
1042 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1043 "mw_alloc: rmr 0x%p, pd_hkey 0x%016llx\n",
1044 rmr, args.mw_pd_hkey);
1045
1046 retval = ioctl(ia_fd, DAPL_MW_ALLOC, &args);
1047 if (retval != 0) {
1048 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1049 "mw_alloc: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
1050 dapl_os_free(mw_handle, sizeof (struct dapls_ib_mr_handle));
1051 return (dapls_convert_error(errno, retval));
1052 }
1053
1054 mw_handle->mw_hkey = args.mw_hkey;
1055 rmr->mw_handle = mw_handle;
1056 rmr->param.rmr_context = (DAT_RMR_CONTEXT) args.mw_rkey;
1057
1058 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1059 "mw_alloc: successful, rmr 0x%p, mw_hkey 0x%llx, "
1060 "rmr_ctx 0x%x\n\n", rmr, (uint64_t)args.mw_hkey,
1061 rmr->param.rmr_context);
1062
1063 return (DAT_SUCCESS);
1064 }
1065
1066
1067 /*
1068 * dapls_ib_mw_free
1069 *
1070 * Release bindings of a protection domain to a memory window
1071 *
1072 * Input:
1073 * rmr Initialized rmr to hold binding handles
1074 *
1075 * Output:
1076 * none
1077 *
1078 * Returns:
1079 * DAT_SUCCESS
1080 * DAT_INSUFFICIENT_RESOURCES
1081 *
1082 */
1083 DAT_RETURN
dapls_ib_mw_free(IN DAPL_RMR * rmr)1084 dapls_ib_mw_free(
1085 IN DAPL_RMR *rmr)
1086 {
1087 DAPL_IA *ia_hdl = rmr->param.ia_handle;
1088 dapl_mw_free_t args;
1089 int ia_fd;
1090 int retval;
1091
1092 ia_fd = ((struct dapls_ib_hca_handle *)(ia_hdl->hca_ptr->
1093 ib_hca_handle))->ia_fd;
1094
1095 args.mw_hkey = rmr->mw_handle->mw_hkey;
1096
1097 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1098 "mw_free: rmr 0x%p, mw_hkey 0x%016llx\n", rmr, args.mw_hkey);
1099
1100 retval = ioctl(ia_fd, DAPL_MW_FREE, &args);
1101 if (retval != 0) {
1102 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1103 "mw_free: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
1104 return (dapls_convert_error(errno, retval));
1105 }
1106
1107 dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "mw_free: successful\n\n");
1108 dapl_os_free(rmr->mw_handle, sizeof (struct dapls_ib_mr_handle));
1109 rmr->mw_handle = NULL;
1110
1111 return (DAT_SUCCESS);
1112 }
1113
1114 /*
1115 * dapls_ib_mw_bind
1116 *
1117 * Bind a protection domain to a memory window
1118 *
1119 * Input:
1120 * rmr Initialized rmr to hold binding handles
1121 *
1122 * Output:
1123 * none
1124 *
1125 * Returns:
1126 * DAT_SUCCESS
1127 * DAT_INSUFFICIENT_RESOURCES
1128 *
1129 */
1130 DAT_RETURN
dapls_ib_mw_bind(IN DAPL_RMR * rmr,IN DAT_LMR_CONTEXT lmr_context,IN DAPL_EP * ep,IN DAPL_COOKIE * cookie,IN DAT_VADDR virtual_address,IN DAT_VLEN length,IN DAT_MEM_PRIV_FLAGS mem_priv,IN DAT_COMPLETION_FLAGS completion_flags)1131 dapls_ib_mw_bind(
1132 IN DAPL_RMR *rmr,
1133 IN DAT_LMR_CONTEXT lmr_context,
1134 IN DAPL_EP *ep,
1135 IN DAPL_COOKIE *cookie,
1136 IN DAT_VADDR virtual_address,
1137 IN DAT_VLEN length,
1138 IN DAT_MEM_PRIV_FLAGS mem_priv,
1139 IN DAT_COMPLETION_FLAGS completion_flags)
1140 {
1141 ibt_send_wr_t wre;
1142 ibt_wr_bind_t wrbind;
1143 boolean_t suppress_notification;
1144 int retval;
1145
1146 if (length > 0) {
1147 wrbind.bind_flags = (ibt_bind_flags_t)
1148 (dapl_rmr_convert_privileges(mem_priv) |
1149 IBT_WR_BIND_ATOMIC);
1150 } else {
1151 wrbind.bind_flags = (ibt_bind_flags_t)NULL;
1152 }
1153 wrbind.bind_rkey = rmr->param.rmr_context;
1154 wrbind.bind_va = virtual_address;
1155 wrbind.bind_len = length;
1156 wrbind.bind_lkey = lmr_context;
1157
1158 wre.wr_id = (ibt_wrid_t)(uintptr_t)cookie;
1159 /*
1160 * wre.wr_flags = (is_signaled) ? IBT_WR_SEND_SIGNAL :
1161 * IBT_WR_NO_FLAGS;
1162 * Till we fix the chan alloc flags do the following -
1163 */
1164 /* Translate dapl flags */
1165 wre.wr_flags = (DAT_COMPLETION_BARRIER_FENCE_FLAG &
1166 completion_flags) ? IBT_WR_SEND_FENCE : 0;
1167 /* suppress completions */
1168 wre.wr_flags |= (DAT_COMPLETION_SUPPRESS_FLAG &
1169 completion_flags) ? 0 : IBT_WR_SEND_SIGNAL;
1170
1171 wre.wr_trans = IBT_RC_SRV;
1172 wre.wr_opcode = IBT_WRC_BIND;
1173 wre.wr_nds = 0;
1174 wre.wr_sgl = NULL;
1175 wre.wr.rc.rcwr.bind = &wrbind;
1176
1177 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1178 "mw_bind: rmr 0x%p, wr_flags 0x%x, rkey 0x%x, bind_flags 0x%x\n"
1179 " bind_va 0x%llx, bind_len 0x%llx, mem_priv 0x%x\n",
1180 rmr, wre.wr_flags, wrbind.bind_rkey, wrbind.bind_flags,
1181 wrbind.bind_va, wrbind.bind_len, mem_priv);
1182
1183 if (ep->param.ep_attr.recv_completion_flags &
1184 DAT_COMPLETION_UNSIGNALLED_FLAG) {
1185 /* This flag is used to control notification of completions */
1186 suppress_notification = (completion_flags &
1187 DAT_COMPLETION_UNSIGNALLED_FLAG) ? B_TRUE : B_FALSE;
1188 } else {
1189 /*
1190 * The evd waiter will use threshold to control wakeups
1191 * Hence the event notification will be done via arming the
1192 * CQ so we do not need special notification generation
1193 * hence set suppression to true
1194 */
1195 suppress_notification = B_TRUE;
1196 }
1197
1198 retval = DAPL_SEND(ep)(ep, &wre, suppress_notification);
1199
1200 if (retval != 0) {
1201 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1202 "mw_bind: rmr 0x%p, failed (%s)\n", rmr, strerror(errno));
1203 return (dapls_convert_error(errno, retval));
1204 }
1205
1206 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1207 "mw_bind: new_rkey = 0x%x\n", wrbind.bind_rkey_out);
1208 rmr->param.rmr_context = (DAT_RMR_CONTEXT) wrbind.bind_rkey_out;
1209
1210 return (DAT_SUCCESS);
1211 }
1212
1213 /*
1214 * dapls_ib_mw_unbind
1215 *
1216 * Unbind a protection domain from a memory window
1217 *
1218 * Input:
1219 * rmr Initialized rmr to hold binding handles
1220 *
1221 * Output:
1222 * none
1223 *
1224 * Returns:
1225 * DAT_SUCCESS
1226 * DAT_INSUFFICIENT_RESOURCES
1227 *
1228 */
1229 DAT_RETURN
dapls_ib_mw_unbind(IN DAPL_RMR * rmr,IN DAT_LMR_CONTEXT lmr_context,IN DAPL_EP * ep,IN DAPL_COOKIE * cookie,IN DAT_COMPLETION_FLAGS completion_flags)1230 dapls_ib_mw_unbind(
1231 IN DAPL_RMR *rmr,
1232 IN DAT_LMR_CONTEXT lmr_context,
1233 IN DAPL_EP *ep,
1234 IN DAPL_COOKIE *cookie,
1235 IN DAT_COMPLETION_FLAGS completion_flags)
1236 {
1237 DAT_RETURN retval;
1238
1239 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1240 "mw_unbind: rmr 0x%p, enter\n", rmr);
1241
1242 retval = dapls_ib_mw_bind(rmr, lmr_context, ep, cookie,
1243 (DAT_VADDR)0, (DAT_VLEN)0, (DAT_MEM_PRIV_FLAGS)NULL,
1244 completion_flags);
1245
1246 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1247 "mw_unbind: rmr 0x%p, exit\n\n", rmr);
1248
1249 return (retval);
1250 }
1251
1252 /*
1253 * Processes async events and calls appropriate callbacks so that events
1254 * can be posted to the async evd.
1255 */
1256 void
dapls_ib_async_callback(IN DAPL_EVD * async_evd,IN ib_hca_handle_t hca_handle,IN ib_error_record_t * event_ptr,IN void * context)1257 dapls_ib_async_callback(
1258 IN DAPL_EVD *async_evd,
1259 IN ib_hca_handle_t hca_handle,
1260 IN ib_error_record_t *event_ptr,
1261 IN void *context)
1262 {
1263 DAPL_IA *ia_ptr;
1264 DAPL_EP *ep_ptr;
1265 DAPL_EVD *evd_ptr;
1266 dapl_ib_async_event_t *async_evp;
1267
1268 ia_ptr = (DAPL_IA *)context;
1269
1270 dapl_os_assert(event_ptr != NULL);
1271 async_evp = (dapl_ib_async_event_t *)event_ptr;
1272
1273 switch (async_evp->ibae_type) {
1274 case IBT_ERROR_INVALID_REQUEST_CHAN:
1275 case IBT_ERROR_CATASTROPHIC_CHAN:
1276 /*
1277 * Walk the EPs to match this EP, then invoke the
1278 * routine when we have the EP we need
1279 */
1280 dapl_os_assert(!dapl_llist_is_empty(&ia_ptr->ep_list_head));
1281 dapl_os_lock(&ia_ptr->header.lock);
1282
1283 ep_ptr = (DAPL_EP *)dapl_llist_next_entry(&ia_ptr->ep_list_head,
1284 NULL);
1285 while (ep_ptr != NULL) {
1286 if (ep_ptr ==
1287 (DAPL_EP *)(uintptr_t)async_evp->ibae_cookie) {
1288 break;
1289 }
1290
1291 ep_ptr = (DAPL_EP *) dapl_llist_next_entry(
1292 &ia_ptr->ep_list_head,
1293 &ep_ptr->header.ia_list_entry);
1294 }
1295
1296 dapl_os_unlock(&ia_ptr->header.lock);
1297 dapl_os_assert(ep_ptr != NULL);
1298 dapl_evd_qp_async_error_callback(hca_handle, NULL, event_ptr,
1299 (void *)ep_ptr);
1300 break;
1301 case IBT_ERROR_CQ:
1302 /*
1303 * Walk the EVDs to match this EVD, then invoke the
1304 * routine when we have the EVD we need
1305 */
1306 dapl_os_assert(!dapl_llist_is_empty(&ia_ptr->evd_list_head));
1307 dapl_os_lock(&ia_ptr->header.lock);
1308
1309 evd_ptr = (DAPL_EVD *) dapl_llist_next_entry(
1310 &ia_ptr->evd_list_head, NULL);
1311 while (evd_ptr != NULL) {
1312 if (evd_ptr ==
1313 (DAPL_EVD *)(uintptr_t)async_evp->ibae_cookie) {
1314 break;
1315 }
1316 evd_ptr = (DAPL_EVD *)
1317 dapl_llist_next_entry(&ia_ptr->evd_list_head,
1318 &evd_ptr->header.ia_list_entry);
1319 }
1320 dapl_os_unlock(&ia_ptr->header.lock);
1321 dapl_os_assert(evd_ptr != NULL);
1322 dapl_evd_cq_async_error_callback(hca_handle, NULL, event_ptr,
1323 (void *)evd_ptr);
1324 break;
1325 case IBT_ERROR_PORT_DOWN:
1326 case IBT_ERROR_LOCAL_CATASTROPHIC:
1327 dapl_evd_un_async_error_callback(hca_handle, event_ptr,
1328 (void *)async_evd);
1329 break;
1330 default:
1331 /*
1332 * We are not interested in the following events
1333 * case IBT_EVENT_PATH_MIGRATED:
1334 * case IBT_EVENT_COM_EST:
1335 * case IBT_EVENT_SQD:
1336 * case IBT_ERROR_PATH_MIGRATE_REQ:
1337 * case IBT_EVENT_PORT_UP:
1338 */
1339 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1340 "dapls_ib_async_callback: unhandled async code:%x\n",
1341 async_evp->ibae_type);
1342 break;
1343 }
1344 }
1345
1346 /*
1347 * dapls_ib_setup_async_callback
1348 * The reference implementation calls this to register callbacks,
1349 * but since our model of polling for events is based on retrieving
1350 * events by the waiting thread itself this is a NOOP for us.
1351 */
1352 /* ARGSUSED */
1353 DAT_RETURN
dapls_ib_setup_async_callback(IN DAPL_IA * ia_ptr,IN DAPL_ASYNC_HANDLER_TYPE handler_type,IN unsigned int * callback_handle,IN ib_async_handler_t callback,IN void * context)1354 dapls_ib_setup_async_callback(
1355 IN DAPL_IA *ia_ptr,
1356 IN DAPL_ASYNC_HANDLER_TYPE handler_type,
1357 IN unsigned int *callback_handle,
1358 IN ib_async_handler_t callback,
1359 IN void *context)
1360 {
1361 return (DAT_SUCCESS);
1362 }
1363
1364 /*
1365 * dapls_ib_query_hca
1366 *
1367 * Set up an asynchronous callbacks of various kinds
1368 *
1369 * Input:
1370 * hca_handl hca handle
1371 * ep_attr attribute of the ep
1372 *
1373 * Output:
1374 * none
1375 *
1376 * Returns:
1377 * DAT_SUCCESS
1378 * DAT_INVALID_PARAMETER
1379 */
1380
1381 /* these are just arbitrary values for now */
1382
1383 static DAT_RETURN
dapls_ib_query_ia(IN dapl_hca_attr_t * hca_attr,IN DAPL_HCA * hca_ptr,OUT DAT_IA_ATTR * ia_attr)1384 dapls_ib_query_ia(
1385 IN dapl_hca_attr_t *hca_attr,
1386 IN DAPL_HCA *hca_ptr,
1387 OUT DAT_IA_ATTR *ia_attr)
1388 {
1389 (void) dapl_os_memzero(ia_attr, sizeof (*ia_attr));
1390
1391 (void) dapl_os_strcpy(ia_attr->adapter_name, VN_ADAPTER_NAME);
1392
1393 (void) sprintf(ia_attr->vendor_name, "0x%08x:0x%08x",
1394 hca_attr->dhca_vendor_id, hca_attr->dhca_device_id);
1395
1396 ia_attr->hardware_version_major = (DAT_UINT32)hca_attr->dhca_version_id;
1397 ia_attr->ia_address_ptr = (DAT_IA_ADDRESS_PTR)&hca_ptr->hca_address;
1398 ia_attr->max_eps = (DAT_COUNT)hca_attr->dhca_max_chans;
1399 ia_attr->max_dto_per_ep = (DAT_COUNT)hca_attr->dhca_max_chan_sz;
1400 ia_attr->max_rdma_read_per_ep_in = hca_attr->dhca_max_rdma_in_chan;
1401 ia_attr->max_rdma_read_per_ep_out = hca_attr->dhca_max_rdma_out_chan;
1402 ia_attr->max_evds = (DAT_COUNT)hca_attr->dhca_max_cq;
1403 ia_attr->max_evd_qlen = (DAT_COUNT)hca_attr->dhca_max_cq_sz;
1404 /* max_iov_segments_per_dto is for non-RDMA */
1405 ia_attr->max_iov_segments_per_dto = (DAT_COUNT)hca_attr->dhca_max_sgl;
1406 ia_attr->max_lmrs = (DAT_COUNT)hca_attr->dhca_max_memr;
1407 ia_attr->max_lmr_block_size = (DAT_VLEN)hca_attr->dhca_max_memr_len;
1408 ia_attr->max_lmr_virtual_address = (DAT_VADDR)DAPL_MAX_ADDRESS;
1409 ia_attr->max_pzs = (DAT_COUNT)hca_attr->dhca_max_pd;
1410 ia_attr->max_mtu_size = (DAT_VLEN)DAPL_IB_MAX_MESSAGE_SIZE;
1411 ia_attr->max_rdma_size = (DAT_VLEN)DAPL_IB_MAX_MESSAGE_SIZE;
1412 ia_attr->max_rmrs = (DAT_COUNT)hca_attr->dhca_max_mem_win;
1413 ia_attr->max_rmr_target_address = (DAT_VADDR)DAPL_MAX_ADDRESS;
1414 ia_attr->max_iov_segments_per_rdma_read =
1415 (DAT_COUNT)hca_attr->dhca_max_sgl;
1416 ia_attr->max_iov_segments_per_rdma_write =
1417 (DAT_COUNT)hca_attr->dhca_max_sgl;
1418 /* all instances of IA */
1419 ia_attr->max_rdma_read_in = hca_attr->dhca_max_rdma_in_chan *
1420 hca_attr->dhca_max_chans;
1421 ia_attr->max_rdma_read_out = hca_attr->dhca_max_rdma_out_chan *
1422 hca_attr->dhca_max_chans;
1423 ia_attr->max_rdma_read_per_ep_in_guaranteed = DAT_TRUE;
1424 ia_attr->max_rdma_read_per_ep_out_guaranteed = DAT_TRUE;
1425 ia_attr->max_srqs = (DAT_COUNT)hca_attr->dhca_max_srqs;
1426 ia_attr->max_ep_per_srq = ia_attr->max_eps;
1427 ia_attr->max_recv_per_srq = (DAT_COUNT)hca_attr->dhca_max_srqs_sz;
1428
1429 dapl_dbg_log(DAPL_DBG_TYPE_UTIL, "IA Attributes:\n"
1430 "\tadapter_name %s\n "
1431 "\tvendor_name %s\n "
1432 "\thardware_version_major 0x%08x\n"
1433 "\tmax_eps %d\n"
1434 "\tmax_dto_per_ep %d\n"
1435 "\tmax_rdma_read_per_ep_in %d\n"
1436 "\tmax_rdma_read_per_ep_out %d\n"
1437 "\tmax_evds %d\n"
1438 "\tmax_evd_qlen %d\n"
1439 "\tmax_iov_segments_per_dto %d\n"
1440 "\tmax_lmrs %d\n"
1441 "\tmax_lmr_block_size 0x%016llx\n"
1442 "\tmax_lmr_virtual_address 0x%016llx\n"
1443 "\tmax_pzs %d\n"
1444 "\tmax_mtu_size 0x%016llx\n"
1445 "\tmax_rdma_size 0x%016llx\n"
1446 "\tmax_rmrs %d\n"
1447 "\tmax_rmr_target_address 0x%016llx\n"
1448 "\tmax_iov_segments_per_rdma_read %d\n"
1449 "\tmax_iov_segments_per_rdma_write %d\n"
1450 "\tmax_rdma_read_in %d\n"
1451 "\tmax_rdma_read_out %d\n"
1452 "\tmax_srqs %d\n"
1453 "\tmax_ep_per_srq %d\n"
1454 "\tmax_recv_per_srq %d\n"
1455 "\n",
1456 ia_attr->adapter_name,
1457 ia_attr->vendor_name,
1458 ia_attr->hardware_version_major,
1459 ia_attr->max_eps,
1460 ia_attr->max_dto_per_ep,
1461 ia_attr->max_rdma_read_per_ep_in,
1462 ia_attr->max_rdma_read_per_ep_out,
1463 ia_attr->max_evds,
1464 ia_attr->max_evd_qlen,
1465 ia_attr->max_iov_segments_per_dto,
1466 ia_attr->max_lmrs,
1467 ia_attr->max_lmr_block_size,
1468 ia_attr->max_lmr_virtual_address,
1469 ia_attr->max_pzs,
1470 ia_attr->max_mtu_size,
1471 ia_attr->max_rdma_size,
1472 ia_attr->max_rmrs,
1473 ia_attr->max_rmr_target_address,
1474 ia_attr->max_iov_segments_per_rdma_read,
1475 ia_attr->max_iov_segments_per_rdma_write,
1476 ia_attr->max_rdma_read_in,
1477 ia_attr->max_rdma_read_out,
1478 ia_attr->max_srqs,
1479 ia_attr->max_ep_per_srq,
1480 ia_attr->max_recv_per_srq);
1481
1482 return (DAT_SUCCESS);
1483 }
1484
1485 /* ARGSUSED */
1486 static DAT_RETURN
dapls_ib_query_ep(IN dapl_hca_attr_t * hca_attr,IN DAPL_HCA * hca_ptr,OUT DAT_EP_ATTR * ep_attr)1487 dapls_ib_query_ep(
1488 IN dapl_hca_attr_t *hca_attr,
1489 IN DAPL_HCA *hca_ptr,
1490 OUT DAT_EP_ATTR *ep_attr)
1491 {
1492 (void) dapl_os_memzero(ep_attr, sizeof (*ep_attr));
1493 ep_attr->service_type = DAT_SERVICE_TYPE_RC;
1494 ep_attr->max_mtu_size = DAPL_IB_MAX_MESSAGE_SIZE;
1495 ep_attr->max_rdma_size = DAPL_IB_MAX_MESSAGE_SIZE;
1496 ep_attr->qos = DAT_QOS_BEST_EFFORT;
1497 ep_attr->max_recv_dtos = hca_attr->dhca_max_chan_sz;
1498 ep_attr->max_request_dtos = hca_attr->dhca_max_chan_sz;
1499 ep_attr->max_recv_iov = hca_attr->dhca_max_sgl;
1500 ep_attr->max_request_iov = hca_attr->dhca_max_sgl;
1501 ep_attr->request_completion_flags = DAT_COMPLETION_DEFAULT_FLAG;
1502 ep_attr->recv_completion_flags = DAT_COMPLETION_DEFAULT_FLAG;
1503 ep_attr->srq_soft_hw = DAT_HW_DEFAULT;
1504 return (DAT_SUCCESS);
1505 }
1506
1507 static void
dapls_ib_query_srq(IN dapl_hca_attr_t * hca_attr,OUT DAT_SRQ_ATTR * srq_attr)1508 dapls_ib_query_srq(
1509 IN dapl_hca_attr_t *hca_attr,
1510 OUT DAT_SRQ_ATTR *srq_attr)
1511 {
1512 (void) dapl_os_memzero(srq_attr, sizeof (*srq_attr));
1513 srq_attr->max_recv_dtos = hca_attr->dhca_max_srqs_sz;
1514 srq_attr->max_recv_iov = hca_attr->dhca_max_srq_sgl;
1515 srq_attr->low_watermark = DAT_SRQ_LW_DEFAULT;
1516 }
1517
1518 /* ARGSUSED */
1519 DAT_RETURN
dapls_ib_query_hca(IN DAPL_HCA * hca_ptr,OUT DAT_IA_ATTR * ia_attr,OUT DAT_EP_ATTR * ep_attr,OUT DAT_SOCK_ADDR6 * ip_addr,OUT DAT_SRQ_ATTR * srq_attr)1520 dapls_ib_query_hca(
1521 IN DAPL_HCA *hca_ptr,
1522 OUT DAT_IA_ATTR *ia_attr,
1523 OUT DAT_EP_ATTR *ep_attr,
1524 OUT DAT_SOCK_ADDR6 *ip_addr,
1525 OUT DAT_SRQ_ATTR *srq_attr)
1526 {
1527 dapl_ia_query_t args;
1528 int ia_fd, retval;
1529
1530 if (hca_ptr == NULL) {
1531 return (DAT_INVALID_PARAMETER);
1532 }
1533
1534 ia_fd = hca_ptr->ib_hca_handle->ia_fd;
1535 retval = ioctl(ia_fd, DAPL_IA_QUERY, &args);
1536 if (retval != 0) {
1537 return (dapls_convert_error(errno, retval));
1538 }
1539
1540 if (ia_attr != NULL) {
1541 (void) dapls_ib_query_ia(&args.hca_attr, hca_ptr, ia_attr);
1542 }
1543 if (ep_attr != NULL) {
1544 (void) dapls_ib_query_ep(&args.hca_attr, hca_ptr, ep_attr);
1545 }
1546 if (srq_attr != NULL) {
1547 (void) dapls_ib_query_srq(&args.hca_attr, srq_attr);
1548 }
1549 if (ia_attr == NULL && ep_attr == NULL && srq_attr == NULL) {
1550 return (DAT_INVALID_PARAMETER);
1551 }
1552 return (DAT_SUCCESS);
1553 }
1554
1555 void
dapls_ib_store_premature_events(IN ib_qp_handle_t qp_ptr,IN ib_work_completion_t * cqe_ptr)1556 dapls_ib_store_premature_events(
1557 IN ib_qp_handle_t qp_ptr,
1558 IN ib_work_completion_t *cqe_ptr)
1559 {
1560 ib_srq_handle_t srqp;
1561 int head;
1562
1563 if (qp_ptr->qp_srq_enabled) {
1564 /*
1565 * For QPs with SRQ attached store the premature event in the
1566 * SRQ's premature event list
1567 */
1568 srqp = qp_ptr->qp_srq;
1569 dapl_os_assert(srqp->srq_freepr_num_events > 0);
1570 head = srqp->srq_freepr_events[srqp->srq_freepr_head];
1571 /*
1572 * mark cqe as valid before storing it in the
1573 * premature events list
1574 */
1575 DAPL_SET_CQE_VALID(cqe_ptr);
1576 (void) dapl_os_memcpy(&(srqp->srq_premature_events[head]),
1577 cqe_ptr, sizeof (*cqe_ptr));
1578 srqp->srq_freepr_head = (srqp->srq_freepr_head + 1) %
1579 srqp->srq_wq_numwqe;
1580 srqp->srq_freepr_num_events--;
1581 } else {
1582 (void) dapl_os_memcpy(&(qp_ptr->qp_premature_events[
1583 qp_ptr->qp_num_premature_events]),
1584 cqe_ptr, sizeof (*cqe_ptr));
1585 }
1586 qp_ptr->qp_num_premature_events++;
1587 }
1588
1589 void
dapls_ib_poll_premature_events(IN DAPL_EP * ep_ptr,OUT ib_work_completion_t ** cqe_ptr,OUT int * nevents)1590 dapls_ib_poll_premature_events(
1591 IN DAPL_EP *ep_ptr,
1592 OUT ib_work_completion_t **cqe_ptr,
1593 OUT int *nevents)
1594 {
1595 ib_qp_handle_t qp = ep_ptr->qp_handle;
1596
1597 if (qp->qp_srq_enabled) {
1598 *cqe_ptr = qp->qp_srq->srq_premature_events;
1599 } else {
1600 *cqe_ptr = qp->qp_premature_events;
1601 }
1602
1603 *nevents = qp->qp_num_premature_events;
1604 qp->qp_num_premature_events = 0;
1605 }
1606
1607 /*
1608 * Return the premature events to the free list after processing it
1609 * This function is called only for premature events on the SRQ
1610 */
1611 void
dapls_ib_free_premature_events(IN DAPL_EP * ep_ptr,IN int free_index)1612 dapls_ib_free_premature_events(
1613 IN DAPL_EP *ep_ptr,
1614 IN int free_index)
1615 {
1616 ib_qp_handle_t qp_ptr;
1617 ib_srq_handle_t srq_ptr;
1618 int tail;
1619
1620 qp_ptr = ep_ptr->qp_handle;
1621 srq_ptr = qp_ptr->qp_srq;
1622
1623 dapl_os_assert(qp_ptr->qp_srq_enabled);
1624
1625 tail = srq_ptr->srq_freepr_tail;
1626 srq_ptr->srq_freepr_events[tail] = free_index;
1627 srq_ptr->srq_freepr_tail = (tail + 1) % srq_ptr->srq_wq_numwqe;
1628 srq_ptr->srq_freepr_num_events++;
1629 DAPL_SET_CQE_INVALID(&srq_ptr->srq_premature_events[free_index]);
1630 }
1631
1632 /*
1633 * dapls_ib_get_async_event
1634 *
1635 * Translate an asynchronous event type to the DAT event.
1636 * Note that different providers have different sets of errors.
1637 *
1638 * Input:
1639 * cause_ptr provider event cause
1640 *
1641 * Output:
1642 * async_event DAT mapping of error
1643 *
1644 * Returns:
1645 * DAT_SUCCESS
1646 * DAT_NOT_IMPLEMENTED Caller is not interested this event
1647 */
1648
dapls_ib_get_async_event(IN ib_error_record_t * cause_ptr,OUT DAT_EVENT_NUMBER * async_event)1649 DAT_RETURN dapls_ib_get_async_event(
1650 IN ib_error_record_t *cause_ptr,
1651 OUT DAT_EVENT_NUMBER *async_event)
1652 {
1653 ibt_async_code_t code;
1654 DAT_RETURN dat_status;
1655
1656 dat_status = DAT_SUCCESS;
1657 code = (ibt_async_code_t)((dapl_ib_async_event_t *)cause_ptr->
1658 ibae_type);
1659
1660 switch (code) {
1661 case IBT_ERROR_CQ:
1662 case IBT_ERROR_ACCESS_VIOLATION_CHAN:
1663 case IBT_ERROR_INVALID_REQUEST_CHAN:
1664 *async_event = DAT_ASYNC_ERROR_PROVIDER_INTERNAL_ERROR;
1665 break;
1666 /* CATASTROPHIC errors */
1667 case IBT_ERROR_CATASTROPHIC_CHAN:
1668 case IBT_ERROR_LOCAL_CATASTROPHIC:
1669 case IBT_ERROR_PORT_DOWN:
1670 *async_event = DAT_ASYNC_ERROR_IA_CATASTROPHIC;
1671 break;
1672 default:
1673 /*
1674 * Errors we are not interested in reporting:
1675 * IBT_EVENT_PATH_MIGRATED
1676 * IBT_ERROR_PATH_MIGRATE_REQ
1677 * IBT_EVENT_COM_EST
1678 * IBT_EVENT_SQD
1679 * IBT_EVENT_PORT_UP
1680 */
1681 dat_status = DAT_NOT_IMPLEMENTED;
1682 }
1683 return (dat_status);
1684 }
1685
1686 DAT_RETURN
dapls_ib_event_poll(IN DAPL_EVD * evd_ptr,IN uint64_t timeout,IN uint_t threshold,OUT dapl_ib_event_t * evp_ptr,OUT int * num_events)1687 dapls_ib_event_poll(
1688 IN DAPL_EVD *evd_ptr,
1689 IN uint64_t timeout,
1690 IN uint_t threshold,
1691 OUT dapl_ib_event_t *evp_ptr,
1692 OUT int *num_events)
1693 {
1694 dapl_event_poll_t evp_msg;
1695 int ia_fd;
1696 int retval;
1697
1698 *num_events = 0;
1699 ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;
1700
1701 evp_msg.evp_evd_hkey = evd_ptr->ib_cq_handle->evd_hkey;
1702 evp_msg.evp_threshold = threshold;
1703 evp_msg.evp_timeout = timeout;
1704 evp_msg.evp_ep = evp_ptr;
1705 if (evp_ptr) {
1706 evp_msg.evp_num_ev =
1707 DAPL_MAX(evd_ptr->threshold, NUM_EVENTS_PER_POLL);
1708 } else {
1709 evp_msg.evp_num_ev = 0;
1710 }
1711 evp_msg.evp_num_polled = 0;
1712
1713 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1714 "event_poll: evd 0x%p, hkey 0x%llx, threshold %d,\n"
1715 " timeout 0x%llx, evp_ptr 0x%p, num_ev %d\n",
1716 evd_ptr, evp_msg.evp_evd_hkey, evp_msg.evp_threshold,
1717 timeout, evp_ptr, evp_msg.evp_num_ev);
1718
1719 /*
1720 * Poll the EVD and if there are no events then we wait in
1721 * the kernel.
1722 */
1723 retval = ioctl(ia_fd, DAPL_EVENT_POLL, &evp_msg);
1724 if (retval != 0) {
1725 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1726 "event_poll: evd 0x%p, retval %d err: %s\n",
1727 evd_ptr, retval, strerror(errno));
1728 *num_events = evp_msg.evp_num_polled;
1729 return (dapls_convert_error(errno, retval));
1730 }
1731
1732 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1733 "dapls_ib_event_poll: evd %p nevents %d\n", evd_ptr,
1734 evp_msg.evp_num_polled);
1735
1736 *num_events = evp_msg.evp_num_polled;
1737
1738 return (DAT_SUCCESS);
1739 }
1740
1741 DAT_RETURN
dapls_ib_event_wakeup(IN DAPL_EVD * evd_ptr)1742 dapls_ib_event_wakeup(
1743 IN DAPL_EVD *evd_ptr)
1744 {
1745 dapl_event_wakeup_t evw_msg;
1746 int ia_fd;
1747 int retval;
1748
1749 ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;
1750
1751 evw_msg.evw_hkey = evd_ptr->ib_cq_handle->evd_hkey;
1752
1753 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1754 "event_wakeup: evd 0x%p, hkey 0x%llx\n",
1755 evd_ptr, evw_msg.evw_hkey);
1756
1757 /*
1758 * Wakeup any thread waiting in the kernel on this EVD
1759 */
1760 retval = ioctl(ia_fd, DAPL_EVENT_WAKEUP, &evw_msg);
1761 if (retval != 0) {
1762 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1763 "event_wakeup: evd 0x%p, retval %d err: %s\n",
1764 evd_ptr, retval, strerror(errno));
1765 return (dapls_convert_error(errno, retval));
1766 }
1767
1768 return (DAT_SUCCESS);
1769 }
1770
1771 /*
1772 * dapls_ib_cq_peek is used by dapl_cno_wait(). After the CQ has been
1773 * inspected we arm the CQ if it was empty.
1774 *
1775 */
dapls_ib_cq_peek(IN DAPL_EVD * evd_ptr,OUT int * num_cqe)1776 void dapls_ib_cq_peek(
1777 IN DAPL_EVD *evd_ptr,
1778 OUT int *num_cqe)
1779 {
1780 DAPL_IA *ia_ptr;
1781
1782 *num_cqe = 0;
1783 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | DAT_EVD_RMR_BIND_FLAG)) {
1784 DAPL_PEEK(evd_ptr)(evd_ptr->ib_cq_handle, num_cqe);
1785 /* No events found in CQ arm it now */
1786 if (*num_cqe == 0) {
1787 ia_ptr = evd_ptr->header.owner_ia;
1788 (void) dapls_set_cq_notify(ia_ptr, evd_ptr);
1789 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1790 "dapls_ib_cq_peek: set_cq_notify\n");
1791 }
1792 }
1793 }
1794
1795 /*
1796 * Modifies the CNO associated to an EVD
1797 */
dapls_ib_modify_cno(IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr)1798 DAT_RETURN dapls_ib_modify_cno(
1799 IN DAPL_EVD *evd_ptr,
1800 IN DAPL_CNO *cno_ptr)
1801 {
1802 dapl_evd_modify_cno_t evmc_msg;
1803 int ia_fd;
1804 int retval;
1805
1806 ia_fd = evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle->ia_fd;
1807
1808 evmc_msg.evmc_hkey = evd_ptr->ib_cq_handle->evd_hkey;
1809
1810 if (cno_ptr) {
1811 evmc_msg.evmc_cno_hkey = (uint64_t)cno_ptr->ib_cno_handle;
1812 } else {
1813 evmc_msg.evmc_cno_hkey = 0;
1814 }
1815
1816 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1817 "modify_cno: evd 0x%p, hkey 0x%llx, cno 0x%p, cno_hkey 0x%llx\n",
1818 evd_ptr, evmc_msg.evmc_hkey, cno_ptr, evmc_msg.evmc_cno_hkey);
1819
1820 /*
1821 * modify CNO associated with the EVD
1822 */
1823 retval = ioctl(ia_fd, DAPL_EVD_MODIFY_CNO, &evmc_msg);
1824 if (retval != 0) {
1825 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1826 "modify_cno: evd 0x%p, cno %p retval %d err: %s\n",
1827 evd_ptr, cno_ptr, retval, strerror(errno));
1828 return (dapls_convert_error(errno, retval));
1829 }
1830
1831 return (DAT_SUCCESS);
1832 }
1833
1834 DAT_RETURN
dapls_ib_cno_wait(IN DAPL_CNO * cno_ptr,IN DAT_TIMEOUT timeout,IN DAPL_EVD ** evd_ptr_p)1835 dapls_ib_cno_wait(
1836 IN DAPL_CNO *cno_ptr,
1837 IN DAT_TIMEOUT timeout,
1838 IN DAPL_EVD **evd_ptr_p)
1839 {
1840 dapl_cno_wait_t args;
1841 int retval;
1842
1843 args.cnw_hkey = (uint64_t)cno_ptr->ib_cno_handle;
1844 if (timeout == DAT_TIMEOUT_INFINITE) {
1845 args.cnw_timeout = UINT64_MAX;
1846 } else {
1847 args.cnw_timeout = (uint64_t)timeout & 0x00000000ffffffff;
1848 }
1849
1850 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1851 "cno_wait: cno 0x%p, hkey 0x%016llx, timeout 0x%016llx\n",
1852 cno_ptr, args.cnw_hkey, args.cnw_timeout);
1853
1854 retval = ioctl(cno_ptr->header.owner_ia->hca_ptr->
1855 ib_hca_handle->ia_fd, DAPL_CNO_WAIT, &args);
1856
1857 if (retval != 0) {
1858 *evd_ptr_p = (DAPL_EVD *)NULL;
1859 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1860 "cno_wait: cno 0x%p ioctl err: %s\n",
1861 cno_ptr, strerror(errno));
1862 return (dapls_convert_error(errno, retval));
1863 }
1864
1865 *evd_ptr_p = (DAPL_EVD *)(uintptr_t)args.cnw_evd_cookie;
1866 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1867 "cno_wait: woken up, cno 0x%p, evd 0x%p\n\n",
1868 cno_ptr, *evd_ptr_p);
1869
1870 return (DAT_SUCCESS);
1871 }
1872
1873 DAT_RETURN
dapls_ib_cno_alloc(IN DAPL_IA * ia_ptr,IN DAPL_CNO * cno_ptr)1874 dapls_ib_cno_alloc(
1875 IN DAPL_IA *ia_ptr,
1876 IN DAPL_CNO *cno_ptr)
1877 {
1878 dapl_cno_alloc_t args;
1879 int retval;
1880
1881 if (cno_ptr->cno_wait_agent.instance_data != NULL ||
1882 cno_ptr->cno_wait_agent.proxy_agent_func != NULL) {
1883 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1884 "cno_alloc: cno 0x%p, wait_agent != NULL\n", cno_ptr);
1885 return (DAT_NOT_IMPLEMENTED);
1886 }
1887
1888 retval = ioctl(ia_ptr->hca_ptr->ib_hca_handle->ia_fd,
1889 DAPL_CNO_ALLOC, &args);
1890 if (retval != 0) {
1891 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1892 "cno_alloc: cno 0x%p ioctl err: %s\n",
1893 cno_ptr, strerror(errno));
1894 return (dapls_convert_error(errno, retval));
1895 }
1896
1897 cno_ptr->ib_cno_handle = (ib_cno_handle_t)args.cno_hkey;
1898 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1899 "cno_alloc: cno 0x%p allocated, ia_ptr 0x%p, hkey 0x%016llx\n",
1900 cno_ptr, ia_ptr, args.cno_hkey);
1901
1902 return (DAT_SUCCESS);
1903 }
1904
1905 DAT_RETURN
dapls_ib_cno_free(IN DAPL_CNO * cno_ptr)1906 dapls_ib_cno_free(
1907 IN DAPL_CNO *cno_ptr)
1908 {
1909 dapl_cno_free_t args;
1910 int retval;
1911
1912 args.cnf_hkey = (uint64_t)cno_ptr->ib_cno_handle;
1913 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1914 "cno_free: cno 0x%p, hkey 0x%016llx\n",
1915 cno_ptr, args.cnf_hkey);
1916
1917 retval = ioctl(cno_ptr->header.owner_ia->hca_ptr->
1918 ib_hca_handle->ia_fd, DAPL_CNO_FREE, &args);
1919
1920 if (retval != 0) {
1921 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1922 "cno_free: cno 0x%p ioctl err: %s\n",
1923 cno_ptr, strerror(errno));
1924 return (dapls_convert_error(errno, retval));
1925 }
1926
1927 dapl_dbg_log(DAPL_DBG_TYPE_UTIL,
1928 "cno_free: cno 0x%p freed\n", cno_ptr);
1929
1930 return (DAT_SUCCESS);
1931 }
1932
1933 DAT_RETURN
dapls_convert_error(int errnum,int retval)1934 dapls_convert_error(int errnum, int retval)
1935 {
1936 if (retval < 0) {
1937 switch (errnum) {
1938 case EINVAL:
1939 return (DAT_INVALID_PARAMETER);
1940 case ENOMEM:
1941 return (DAT_INSUFFICIENT_RESOURCES);
1942 case ETIME:
1943 return (DAT_TIMEOUT_EXPIRED);
1944 case EINTR:
1945 return (DAT_INTERRUPTED_CALL);
1946 case EFAULT:
1947 return (DAT_INTERNAL_ERROR);
1948 default:
1949 return (DAT_INTERNAL_ERROR);
1950 }
1951 } else {
1952 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1953 "ERROR: got IBTF error %d\n", retval);
1954 switch (retval) {
1955 case IBT_SERVICE_RECORDS_NOT_FOUND:
1956 /*
1957 * Connecting to a non-existant conn qual gets
1958 * us here
1959 */
1960 return (DAT_ERROR(DAT_INVALID_PARAMETER,
1961 DAT_INVALID_ADDRESS_UNREACHABLE));
1962 case IBT_INSUFF_RESOURCE:
1963 case IBT_INSUFF_KERNEL_RESOURCE:
1964 return (DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 0));
1965 case IBT_AR_NOT_REGISTERED:
1966 /*
1967 * forward ipaddr lookup failed
1968 */
1969 return (DAT_ERROR(DAT_INVALID_ADDRESS, 0));
1970 default:
1971 return (DAT_INTERNAL_ERROR);
1972 }
1973 }
1974 }
1975
1976 typedef struct dapls_ib_dbp_page_s {
1977 uint32_t *dbp_page_addr;
1978 uint64_t dbp_mapoffset;
1979 struct dapls_ib_dbp_page_s *next;
1980 int fd;
1981 } dapls_ib_dbp_page_t;
1982
1983 dapls_ib_dbp_page_t *dapls_ib_pagelist = NULL;
1984
1985 /* Function that returns a pointer to the specified doorbell entry */
dapls_ib_get_dbp(uint64_t maplen,int fd,uint64_t mapoffset,uint32_t offset)1986 uint32_t *dapls_ib_get_dbp(uint64_t maplen, int fd, uint64_t mapoffset,
1987 uint32_t offset)
1988 {
1989 dapls_ib_dbp_page_t *new_page;
1990 dapls_ib_dbp_page_t *cur_page;
1991
1992 dapl_os_lock(&dapls_ib_dbp_lock);
1993 /* Check to see if page already mapped for entry */
1994 for (cur_page = dapls_ib_pagelist; cur_page != NULL;
1995 cur_page = cur_page->next)
1996 if (cur_page->dbp_mapoffset == mapoffset &&
1997 cur_page->fd == fd) {
1998 dapl_os_unlock(&dapls_ib_dbp_lock);
1999 return ((uint32_t *)
2000 (offset + (uintptr_t)cur_page->dbp_page_addr));
2001 }
2002
2003 /* If not, map a new page and prepend to pagelist */
2004 new_page = malloc(sizeof (dapls_ib_dbp_page_t));
2005 if (new_page == NULL) {
2006 dapl_os_unlock(&dapls_ib_dbp_lock);
2007 return (MAP_FAILED);
2008 }
2009 new_page->dbp_page_addr = (uint32_t *)(void *)mmap64((void *)0,
2010 maplen, (PROT_READ | PROT_WRITE), MAP_SHARED, fd, mapoffset);
2011 if (new_page->dbp_page_addr == MAP_FAILED) {
2012 free(new_page);
2013 dapl_os_unlock(&dapls_ib_dbp_lock);
2014 return (MAP_FAILED);
2015 }
2016 new_page->next = dapls_ib_pagelist;
2017 new_page->dbp_mapoffset = mapoffset;
2018 new_page->fd = fd;
2019 dapls_ib_pagelist = new_page;
2020 dapl_os_unlock(&dapls_ib_dbp_lock);
2021 return ((uint32_t *)(offset + (uintptr_t)new_page->dbp_page_addr));
2022 }
2023