1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 /*
32 *
33 * MODULE: dapl_evd_util.c
34 *
35 * PURPOSE: Manage EVD Info structure
36 *
37 * $Id: dapl_evd_util.c,v 1.41 2003/08/20 13:18:36 sjs2 Exp $
38 */
39
40 #include <sys/time.h>
41 #include <strings.h>
42 #include "dapl_evd_util.h"
43 #include "dapl_ia_util.h"
44 #include "dapl_cno_util.h"
45 #include "dapl_ring_buffer_util.h"
46 #include "dapl_adapter_util.h"
47 #include "dapl_tavor_ibtf_impl.h"
48 #include "dapl_cookie.h"
49 #include "dapl.h"
50
51
52 #ifdef DAPL_DBG /* For debugging. */
53 static void
54 dapli_evd_eh_print_cqe(
55 IN ib_work_completion_t cqe);
56 #endif
57
58 static DAT_BOOLEAN
59 dapli_evd_cqe_to_event(
60 IN DAPL_EVD *evd_ptr,
61 IN ib_work_completion_t *cqe_ptr,
62 IN DAT_BOOLEAN process_premature_events,
63 OUT DAT_EVENT *event_ptr);
64
65 static DAT_RETURN
66 dapli_evd_event_alloc(
67 IN DAPL_EVD *evd_ptr,
68 IN DAPL_CNO *cno_ptr,
69 IN DAT_COUNT qlen);
70
71
72 /*
73 * dapls_evd_internal_create
74 *
75 * actually create the evd. this is called after all parameter checking
76 * has been performed in dapl_ep_create. it is also called from dapl_ia_open
77 * to create the default async evd.
78 *
79 * Input:
80 * ia_ptr
81 * cno_ptr
82 * qlen
83 * evd_flags
84 *
85 * Output:
86 * evd_ptr_ptr
87 *
88 * Returns:
89 * none
90 *
91 */
92
93 DAT_RETURN
dapls_evd_internal_create(DAPL_IA * ia_ptr,DAPL_CNO * cno_ptr,DAT_COUNT min_qlen,DAT_EVD_FLAGS evd_flags,DAPL_EVD ** evd_ptr_ptr)94 dapls_evd_internal_create(
95 DAPL_IA *ia_ptr,
96 DAPL_CNO *cno_ptr,
97 DAT_COUNT min_qlen,
98 DAT_EVD_FLAGS evd_flags,
99 DAPL_EVD **evd_ptr_ptr)
100 {
101 DAPL_EVD *evd_ptr;
102 DAT_COUNT cq_len;
103 DAT_RETURN dat_status;
104
105 dat_status = DAT_SUCCESS;
106 *evd_ptr_ptr = NULL;
107 cq_len = min_qlen;
108
109 evd_ptr = dapls_evd_alloc(ia_ptr,
110 cno_ptr,
111 evd_flags,
112 min_qlen);
113 if (!evd_ptr) {
114 dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
115 DAT_RESOURCE_MEMORY);
116 goto bail;
117 }
118
119 /*
120 * If we are dealing with event streams besides a CQ event stream,
121 * be conservative and set producer side locking. Otherwise, no.
122 */
123 evd_ptr->evd_producer_locking_needed =
124 ((evd_flags & ~ (DAT_EVD_DTO_FLAG|DAT_EVD_RMR_BIND_FLAG)) != 0);
125
126 /* Before we setup any callbacks, transition state to OPEN. */
127 evd_ptr->evd_state = DAPL_EVD_STATE_OPEN;
128
129 /*
130 * we need to call cq_alloc even for connection/cr/async evds
131 * since all the allocation happens there.
132 */
133 dat_status = dapls_ib_cq_alloc(ia_ptr,
134 evd_ptr, cno_ptr, &cq_len);
135 if (dat_status != DAT_SUCCESS) {
136 goto bail;
137 }
138
139 #if 0
140 /*
141 * Current implementation of dapls_ib_setup_async_callback() does
142 * nothing and returns DAT_SUCCESS. However, it is declared to expect
143 * function pointers with different signatures. We do leave the code
144 * block out till dapls_ib_setup_async_callback() is implemented.
145 */
146 dat_status = dapls_ib_setup_async_callback(
147 ia_ptr,
148 DAPL_ASYNC_CQ_COMPLETION,
149 (unsigned int *) evd_ptr->ib_cq_handle,
150 (ib_async_handler_t)dapl_evd_dto_callback,
151 evd_ptr);
152 if (dat_status != DAT_SUCCESS) {
153 goto bail;
154 }
155 #endif
156 /*
157 * cq_notify is not required since when evd_wait is called
158 * time we go and poll cq anyways.
159 * dat_status = dapls_set_cq_notify(ia_ptr, evd_ptr);
160 */
161
162 /*
163 * We now have an accurate count of events, so allocate them into
164 * the EVD
165 */
166 dat_status = dapli_evd_event_alloc(evd_ptr, cno_ptr, cq_len);
167 if (dat_status != DAT_SUCCESS) {
168 goto bail;
169 }
170
171 /* We're assuming success in the following. */
172 dapl_os_assert(dat_status == DAT_SUCCESS);
173 dapl_ia_link_evd(ia_ptr, evd_ptr);
174 *evd_ptr_ptr = evd_ptr;
175
176 bail:
177 if (dat_status != DAT_SUCCESS) {
178 if (evd_ptr) {
179 (void) dapls_evd_dealloc(evd_ptr);
180 }
181 }
182
183 return (dat_status);
184 }
185
186 /*
187 * dapls_evd_alloc
188 *
189 * alloc and initialize an EVD struct
190 *
191 * Input:
192 * ia
193 *
194 * Output:
195 * evd_ptr
196 *
197 * Returns:
198 * none
199 *
200 */
201 DAPL_EVD *
dapls_evd_alloc(IN DAPL_IA * ia_ptr,IN DAPL_CNO * cno_ptr,IN DAT_EVD_FLAGS evd_flags,IN DAT_COUNT qlen)202 dapls_evd_alloc(
203 IN DAPL_IA *ia_ptr,
204 IN DAPL_CNO *cno_ptr,
205 IN DAT_EVD_FLAGS evd_flags,
206 IN DAT_COUNT qlen) /* ARGSUSED */
207 {
208 DAPL_EVD *evd_ptr;
209
210 evd_ptr = NULL;
211
212 /* Allocate EVD */
213 evd_ptr = (DAPL_EVD *)dapl_os_alloc(sizeof (DAPL_EVD));
214 if (!evd_ptr) {
215 goto bail;
216 }
217
218 /* zero the structure */
219 (void) dapl_os_memzero(evd_ptr, sizeof (DAPL_EVD));
220
221 /*
222 * initialize the header
223 */
224 evd_ptr->header.provider = ia_ptr->header.provider;
225 evd_ptr->header.magic = DAPL_MAGIC_EVD;
226 evd_ptr->header.handle_type = DAT_HANDLE_TYPE_EVD;
227 evd_ptr->header.owner_ia = ia_ptr;
228 evd_ptr->header.user_context.as_64 = 0;
229 evd_ptr->header.user_context.as_ptr = NULL;
230 dapl_llist_init_entry(&evd_ptr->header.ia_list_entry);
231 dapl_os_lock_init(&evd_ptr->header.lock);
232
233 /*
234 * Initialize the body
235 */
236 evd_ptr->evd_state = DAPL_EVD_STATE_INITIAL;
237 evd_ptr->evd_flags = evd_flags;
238 evd_ptr->evd_enabled = DAT_TRUE;
239 evd_ptr->evd_waitable = DAT_TRUE;
240 evd_ptr->evd_producer_locking_needed = 1; /* Conservative value. */
241 evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
242 evd_ptr->evd_ref_count = 0;
243 evd_ptr->catastrophic_overflow = DAT_FALSE;
244 evd_ptr->qlen = qlen;
245
246 dapl_llist_init_entry(&evd_ptr->cno_list_entry);
247 evd_ptr->completion_type = DAPL_EVD_STATE_THRESHOLD;
248 (void) dapl_os_wait_object_init(&evd_ptr->wait_object);
249
250 bail:
251 return (evd_ptr);
252 }
253
254
255 /*
256 * dapls_evd_event_alloc
257 *
258 * alloc events into an EVD.
259 *
260 * Input:
261 * evd_ptr
262 * qlen
263 *
264 * Output:
265 * NONE
266 *
267 * Returns:
268 * DAT_SUCCESS
269 * ERROR
270 *
271 */
272 DAT_RETURN
dapli_evd_event_alloc(IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr,IN DAT_COUNT qlen)273 dapli_evd_event_alloc(
274 IN DAPL_EVD *evd_ptr,
275 IN DAPL_CNO *cno_ptr,
276 IN DAT_COUNT qlen)
277 {
278 DAT_EVENT *event_ptr;
279 DAT_COUNT i;
280 DAT_RETURN dat_status;
281
282 dat_status = DAT_SUCCESS;
283 event_ptr = NULL;
284
285 /* Allocate EVENTs */
286 event_ptr = (DAT_EVENT *) dapl_os_alloc(qlen * sizeof (DAT_EVENT));
287 if (!event_ptr) {
288 goto bail;
289 }
290 evd_ptr->events = event_ptr;
291 evd_ptr->qlen = qlen;
292
293 /* allocate free event queue */
294 dat_status = dapls_rbuf_alloc(&evd_ptr->free_event_queue, qlen);
295 if (dat_status != DAT_SUCCESS) {
296 goto bail;
297 }
298
299 /* allocate pending event queue */
300 dat_status = dapls_rbuf_alloc(&evd_ptr->pending_event_queue, qlen);
301 if (dat_status != DAT_SUCCESS) {
302 goto bail;
303 }
304
305 /* add events to free event queue */
306 for (i = 0; i < qlen; i++) {
307 dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue,
308 (void *)event_ptr);
309 dapl_os_assert(dat_status == DAT_SUCCESS);
310 event_ptr++;
311 }
312 evd_ptr->cq_notified = DAT_FALSE;
313 evd_ptr->cq_notified_when = 0;
314 evd_ptr->cno_active_count = 0;
315 if (cno_ptr != NULL) {
316 dapl_os_lock(&cno_ptr->header.lock);
317 dapl_llist_add_head(&cno_ptr->evd_list_head,
318 &evd_ptr->cno_list_entry, evd_ptr);
319 /* Take a reference count on the CNO */
320 dapl_os_atomic_inc(&cno_ptr->cno_ref_count);
321 dapl_os_unlock(&cno_ptr->header.lock);
322 }
323 evd_ptr->cno_ptr = cno_ptr;
324 evd_ptr->threshold = 0;
325
326 bail:
327 return (dat_status);
328 }
329
330
331 /*
332 * dapls_evd_dealloc
333 *
334 * Free the passed in EVD structure. If an error occurs, this function
335 * will clean up all of the internal data structures and report the
336 * error.
337 *
338 * Input:
339 * evd_ptr
340 *
341 * Output:
342 * none
343 *
344 * Returns:
345 * status
346 *
347 */
348 DAT_RETURN
dapls_evd_dealloc(IN DAPL_EVD * evd_ptr)349 dapls_evd_dealloc(
350 IN DAPL_EVD *evd_ptr)
351 {
352 DAT_RETURN dat_status;
353 DAPL_IA *ia_ptr;
354
355 dat_status = DAT_SUCCESS;
356
357 dapl_os_assert(evd_ptr->header.magic == DAPL_MAGIC_EVD);
358 dapl_os_assert(evd_ptr->evd_ref_count == 0);
359
360 /*
361 * Destroy the CQ first, to keep any more callbacks from coming
362 * up from it.
363 */
364 if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
365 ia_ptr = evd_ptr->header.owner_ia;
366
367 dat_status = dapls_ib_cq_free(ia_ptr, evd_ptr);
368 if (dat_status != DAT_SUCCESS) {
369 goto bail;
370 }
371 }
372
373 /*
374 * We should now be safe to invalidate the EVD; reset the
375 * magic to prevent reuse.
376 */
377 evd_ptr->header.magic = DAPL_MAGIC_INVALID;
378
379 /* Release reference on the CNO if it exists */
380 if (evd_ptr->cno_ptr != NULL) {
381 dapl_os_lock(&evd_ptr->cno_ptr->header.lock);
382 (void) dapl_llist_remove_entry(&evd_ptr->cno_ptr->evd_list_head,
383 &evd_ptr->cno_list_entry);
384 dapl_os_atomic_dec(&evd_ptr->cno_ptr->cno_ref_count);
385 dapl_os_unlock(&evd_ptr->cno_ptr->header.lock);
386 }
387
388 /*
389 * If the ring buffer allocation failed, then the dapls_rbuf_destroy
390 * function will detect that the ring buffer's internal data (ex. base
391 * pointer) are invalid and will handle the situation appropriately
392 */
393 dapls_rbuf_destroy(&evd_ptr->free_event_queue);
394 dapls_rbuf_destroy(&evd_ptr->pending_event_queue);
395
396 if (evd_ptr->events) {
397 dapl_os_free(evd_ptr->events,
398 evd_ptr->qlen * sizeof (DAT_EVENT));
399 }
400
401 (void) dapl_os_wait_object_destroy(&evd_ptr->wait_object);
402 dapl_os_free(evd_ptr, sizeof (DAPL_EVD));
403
404 bail:
405 return (dat_status);
406 }
407
408
409 /*
410 * dapli_evd_eh_print_cqe
411 *
412 * Input:
413 * cqe
414 *
415 * Output:
416 * none
417 *
418 * Prints out a CQE for debug purposes
419 *
420 */
421
422 #ifdef DAPL_DBG /* For debugging. */
423 void
dapli_evd_eh_print_cqe(IN ib_work_completion_t cqe)424 dapli_evd_eh_print_cqe(IN ib_work_completion_t cqe)
425 {
426 static char *optable[] = {
427 "",
428 "OP_SEND",
429 "OP_RDMA_READ",
430 "OP_RDMA_WRITE",
431 "OP_COMP_AND_SWAP",
432 "OP_FETCH_AND_ADD",
433 "OP_BIND_MW",
434 "OP_RECEIVE",
435 "OP_RECEIVE_RDMAWI",
436 0
437 };
438 DAPL_COOKIE *dto_cookie;
439
440 dto_cookie = (DAPL_COOKIE *) (uintptr_t)DAPL_GET_CQE_WRID(&cqe);
441
442 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
443 "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n");
444 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
445 "\t dapl_evd_dto_callback : CQE \n");
446 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
447 "\t\t work_req_id 0x%llx\n", DAPL_GET_CQE_WRID(&cqe));
448 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
449 "\t\t op_type: %s\n", optable[DAPL_GET_CQE_OPTYPE(&cqe)]);
450 if ((DAPL_GET_CQE_OPTYPE(&cqe) == OP_SEND) ||
451 (DAPL_GET_CQE_OPTYPE(&cqe) == OP_RDMA_WRITE)) {
452 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
453 "\t\t bytes_num %d\n", dto_cookie->val.dto.size);
454 } else {
455 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
456 "\t\t bytes_num %d\n", DAPL_GET_CQE_BYTESNUM(&cqe));
457 }
458 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
459 "\t\t status %d\n", DAPL_GET_CQE_STATUS(&cqe));
460 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
461 "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n");
462 }
463 #endif
464
465 /*
466 * Event posting code follows.
467 */
468
469 /*
470 * These next two functions (dapli_evd_get_event and dapli_evd_post_event)
471 * are a pair. They are always called together, from one of the functions
472 * at the end of this file (dapl_evd_post_*_event).
473 *
474 * Note that if producer side locking is enabled, the first one takes the
475 * EVD lock and the second releases it.
476 */
477
478 /*
479 * dapli_evd_get_event
480 *
481 * Get an event struct from the evd. The caller should fill in the event
482 * and call dapl_evd_post_event.
483 *
484 * If there are no events available, an overflow event is generated to the
485 * async EVD handler.
486 *
487 * If this EVD required producer locking, a successful return implies
488 * that the lock is held.
489 *
490 * Input:
491 * evd_ptr
492 *
493 * Output:
494 * event
495 *
496 */
497
498 static DAT_EVENT *
dapli_evd_get_event(DAPL_EVD * evd_ptr)499 dapli_evd_get_event(
500 DAPL_EVD *evd_ptr)
501 {
502 DAT_EVENT *event;
503
504 if (evd_ptr->evd_producer_locking_needed) {
505 dapl_os_lock(&evd_ptr->header.lock);
506 }
507
508 event = (DAT_EVENT *)dapls_rbuf_remove(&evd_ptr->free_event_queue);
509
510 /* Release the lock if it was taken and the call failed. */
511 if (!event && evd_ptr->evd_producer_locking_needed) {
512 dapl_os_unlock(&evd_ptr->header.lock);
513 }
514
515 return (event);
516 }
517
518 /*
519 * dapli_evd_post_event
520 *
521 * Post the <event> to the evd. If possible, invoke the evd's CNO.
522 * Otherwise post the event on the pending queue.
523 *
524 * If producer side locking is required, the EVD lock must be held upon
525 * entry to this function.
526 *
527 * Input:
528 * evd_ptr
529 * event
530 *
531 * Output:
532 * none
533 *
534 */
535
536 static void
dapli_evd_post_event(IN DAPL_EVD * evd_ptr,IN const DAT_EVENT * event_ptr)537 dapli_evd_post_event(
538 IN DAPL_EVD *evd_ptr,
539 IN const DAT_EVENT *event_ptr)
540 {
541 DAT_RETURN dat_status;
542 DAPL_CNO *cno_to_trigger = NULL;
543
544 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
545 "dapli_evd_post_event: Called with event # %x\n",
546 event_ptr->event_number);
547
548 dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue,
549 (void *)event_ptr);
550 dapl_os_assert(dat_status == DAT_SUCCESS);
551
552 dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED ||
553 evd_ptr->evd_state == DAPL_EVD_STATE_OPEN);
554
555 if (evd_ptr->evd_state == DAPL_EVD_STATE_OPEN) {
556 /* No waiter. Arrange to trigger a CNO if it exists. */
557
558 if (evd_ptr->evd_enabled) {
559 cno_to_trigger = evd_ptr->cno_ptr;
560 }
561 if (evd_ptr->evd_producer_locking_needed) {
562 dapl_os_unlock(&evd_ptr->header.lock);
563 }
564 } else {
565 /*
566 * This routine gets called
567 * - In the context of the waiting thread when CQ, CM or ASYNC
568 * events need to be put on to the EVD ring buffer.
569 * - Due to a post of a software event.
570 *
571 * In the first case the waiting thread is pulling the events
572 * from various streams into the evd so there is no need to
573 * wake any thread. In the second case if the evd is in waited
574 * state then we need to wakeup the waiting thread.
575 */
576 if (event_ptr->event_number == DAT_SOFTWARE_EVENT) {
577 /*
578 * We're in DAPL_EVD_STATE_WAITED. Take the lock if
579 * we don't have it, recheck, and signal.
580 */
581
582 if (!evd_ptr->evd_producer_locking_needed) {
583 dapl_os_lock(&evd_ptr->header.lock);
584 }
585
586 if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED) {
587 dapl_os_unlock(&evd_ptr->header.lock);
588 (void) dapls_ib_event_wakeup(evd_ptr);
589 } else {
590 dapl_os_unlock(&evd_ptr->header.lock);
591 }
592 } else {
593 if (evd_ptr->evd_producer_locking_needed) {
594 dapl_os_unlock(&evd_ptr->header.lock);
595 }
596 }
597 }
598
599 if (cno_to_trigger != NULL) {
600 dapl_cno_trigger(cno_to_trigger, evd_ptr);
601 }
602 }
603
604 /*
605 * dapli_evd_post_event_nosignal
606 *
607 * Post the <event> to the evd. Do not do any wakeup processing.
608 * This function should only be called if it is known that there are
609 * no waiters that it is appropriate to wakeup on this EVD. An example
610 * of such a situation is during internal dat_evd_wait() processing.
611 *
612 * If producer side locking is required, the EVD lock must be held upon
613 * entry to this function.
614 *
615 * Input:
616 * evd_ptr
617 * event
618 *
619 * Output:
620 * none
621 *
622 */
623
624 static void
dapli_evd_post_event_nosignal(IN DAPL_EVD * evd_ptr,IN const DAT_EVENT * event_ptr)625 dapli_evd_post_event_nosignal(
626 IN DAPL_EVD *evd_ptr,
627 IN const DAT_EVENT *event_ptr)
628 {
629 DAT_RETURN dat_status;
630
631 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
632 "dapli_evd_post_event: Called with event # %x\n",
633 event_ptr->event_number);
634
635 dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue,
636 (void *)event_ptr);
637 dapl_os_assert(dat_status == DAT_SUCCESS);
638
639 dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED ||
640 evd_ptr->evd_state == DAPL_EVD_STATE_OPEN);
641
642 if (evd_ptr->evd_producer_locking_needed) {
643 dapl_os_unlock(&evd_ptr->header.lock);
644 }
645 }
646
647 /*
648 * dapli_evd_format_overflow_event
649 *
650 * format an overflow event for posting
651 *
652 * Input:
653 * evd_ptr
654 * event_ptr
655 *
656 * Output:
657 * none
658 *
659 */
660 static void
dapli_evd_format_overflow_event(IN DAPL_EVD * evd_ptr,OUT DAT_EVENT * event_ptr)661 dapli_evd_format_overflow_event(
662 IN DAPL_EVD *evd_ptr,
663 OUT DAT_EVENT *event_ptr)
664 {
665 DAPL_IA *ia_ptr;
666
667 ia_ptr = evd_ptr->header.owner_ia;
668
669 event_ptr->evd_handle = (DAT_EVD_HANDLE)evd_ptr;
670 event_ptr->event_number = DAT_ASYNC_ERROR_EVD_OVERFLOW;
671 event_ptr->event_data.asynch_error_event_data.dat_handle =
672 (DAT_HANDLE)ia_ptr;
673 }
674
675 /*
676 * dapli_evd_post_overflow_event
677 *
678 * post an overflow event
679 *
680 * Input:
681 * async_evd_ptr
682 * evd_ptr
683 *
684 * Output:
685 * none
686 *
687 */
688 static void
dapli_evd_post_overflow_event(IN DAPL_EVD * async_evd_ptr,IN DAPL_EVD * overflow_evd_ptr)689 dapli_evd_post_overflow_event(
690 IN DAPL_EVD *async_evd_ptr,
691 IN DAPL_EVD *overflow_evd_ptr)
692 {
693 DAT_EVENT *overflow_event;
694
695 /*
696 * The overflow_evd_ptr mght be the same as evd.
697 * In that case we've got a catastrophic overflow.
698 */
699 if (async_evd_ptr == overflow_evd_ptr) {
700 async_evd_ptr->catastrophic_overflow = DAT_TRUE;
701 async_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD;
702 return;
703 }
704
705 overflow_event = dapli_evd_get_event(overflow_evd_ptr);
706 if (!overflow_event) {
707 /* this is not good */
708 overflow_evd_ptr->catastrophic_overflow = DAT_TRUE;
709 overflow_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD;
710 return;
711 }
712 dapli_evd_format_overflow_event(overflow_evd_ptr, overflow_event);
713 dapli_evd_post_event(overflow_evd_ptr, overflow_event);
714 }
715
716 static DAT_EVENT *
dapli_evd_get_and_init_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number)717 dapli_evd_get_and_init_event(
718 IN DAPL_EVD *evd_ptr,
719 IN DAT_EVENT_NUMBER event_number)
720 {
721 DAT_EVENT *event_ptr;
722
723 event_ptr = dapli_evd_get_event(evd_ptr);
724 if (NULL == event_ptr) {
725 dapli_evd_post_overflow_event(
726 evd_ptr->header.owner_ia->async_error_evd, evd_ptr);
727 } else {
728 event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr;
729 event_ptr->event_number = event_number;
730 }
731
732 return (event_ptr);
733 }
734
735 DAT_RETURN
dapls_evd_post_cr_arrival_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_SP_HANDLE sp_handle,DAT_IA_ADDRESS_PTR ia_address_ptr,DAT_CONN_QUAL conn_qual,DAT_CR_HANDLE cr_handle)736 dapls_evd_post_cr_arrival_event(
737 IN DAPL_EVD *evd_ptr,
738 IN DAT_EVENT_NUMBER event_number,
739 IN DAT_SP_HANDLE sp_handle,
740 DAT_IA_ADDRESS_PTR ia_address_ptr,
741 DAT_CONN_QUAL conn_qual,
742 DAT_CR_HANDLE cr_handle)
743 {
744 DAT_EVENT *event_ptr;
745 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
746 /*
747 * Note event lock may be held on successful return
748 * to be released by dapli_evd_post_event(), if provider side locking
749 * is needed.
750 */
751
752 if (!event_ptr) {
753 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
754 }
755
756 event_ptr->event_data.cr_arrival_event_data.sp_handle = sp_handle;
757 event_ptr->event_data.cr_arrival_event_data.local_ia_address_ptr
758 = ia_address_ptr;
759 event_ptr->event_data.cr_arrival_event_data.conn_qual = conn_qual;
760 event_ptr->event_data.cr_arrival_event_data.cr_handle = cr_handle;
761
762 dapli_evd_post_event(evd_ptr, event_ptr);
763 return (DAT_SUCCESS);
764 }
765
766
767 DAT_RETURN
dapls_evd_post_connection_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_EP_HANDLE ep_handle,IN DAT_COUNT private_data_size,IN DAT_PVOID private_data)768 dapls_evd_post_connection_event(
769 IN DAPL_EVD *evd_ptr,
770 IN DAT_EVENT_NUMBER event_number,
771 IN DAT_EP_HANDLE ep_handle,
772 IN DAT_COUNT private_data_size,
773 IN DAT_PVOID private_data)
774 {
775 DAT_EVENT *event_ptr;
776 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
777 /*
778 * Note event lock may be held on successful return
779 * to be released by dapli_evd_post_event(), if provider side locking
780 * is needed.
781 */
782
783 if (!event_ptr) {
784 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
785 }
786
787 event_ptr->event_data.connect_event_data.ep_handle = ep_handle;
788 event_ptr->event_data.connect_event_data.private_data_size
789 = private_data_size;
790 event_ptr->event_data.connect_event_data.private_data = private_data;
791
792 dapli_evd_post_event(evd_ptr, event_ptr);
793 return (DAT_SUCCESS);
794 }
795
796
797 DAT_RETURN
dapls_evd_post_async_error_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_IA_HANDLE ia_handle)798 dapls_evd_post_async_error_event(
799 IN DAPL_EVD *evd_ptr,
800 IN DAT_EVENT_NUMBER event_number,
801 IN DAT_IA_HANDLE ia_handle)
802 {
803 DAT_EVENT *event_ptr;
804 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
805 /*
806 * Note event lock may be held on successful return
807 * to be released by dapli_evd_post_event(), if provider side locking
808 * is needed.
809 */
810
811 if (!event_ptr) {
812 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
813 }
814
815 event_ptr->event_data.asynch_error_event_data.dat_handle = ia_handle;
816
817 dapli_evd_post_event(evd_ptr, event_ptr);
818 return (DAT_SUCCESS);
819 }
820
821
822 DAT_RETURN
dapls_evd_post_software_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_PVOID pointer)823 dapls_evd_post_software_event(
824 IN DAPL_EVD *evd_ptr,
825 IN DAT_EVENT_NUMBER event_number,
826 IN DAT_PVOID pointer)
827 {
828 DAT_EVENT *event_ptr;
829 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
830 /*
831 * Note event lock may be held on successful return
832 * to be released by dapli_evd_post_event(), if provider side locking
833 * is needed.
834 */
835
836 if (!event_ptr) {
837 return (DAT_QUEUE_FULL);
838 }
839
840 event_ptr->event_data.software_event_data.pointer = pointer;
841
842 dapli_evd_post_event(evd_ptr, event_ptr);
843 return (DAT_SUCCESS);
844 }
845
846 void
dapls_evd_post_premature_events(IN DAPL_EP * ep_ptr)847 dapls_evd_post_premature_events(IN DAPL_EP *ep_ptr)
848 {
849 DAPL_EVD *evd_ptr;
850 DAT_EVENT *event;
851 ib_work_completion_t *cqe;
852 uint32_t qpn;
853 int prm_idx;
854 int nevents;
855 int i;
856
857 dapls_ib_poll_premature_events(ep_ptr, &cqe, &nevents);
858 /* premature events are always recv events */
859 evd_ptr = ep_ptr->param.recv_evd_handle;
860 qpn = ep_ptr->qpn;
861
862 i = 0;
863 prm_idx = 0;
864 while (i < nevents) {
865 /*
866 * If srq_attached, premature events cannot exceed max_recv_dtos
867 */
868 dapl_os_assert(!ep_ptr->srq_attached ||
869 (prm_idx <= ((DAPL_SRQ *)ep_ptr->param.srq_handle)->
870 param.max_recv_dtos));
871
872 /*
873 * The SRQ premature event list could potentially have
874 * holes (ie. free entries in the middle) or premature
875 * events for other QPs. These need to be skipped.
876 */
877 if (ep_ptr->srq_attached &&
878 (!DAPL_CQE_IS_VALID(&cqe[prm_idx]) ||
879 (DAPL_GET_CQE_QPN(&cqe[prm_idx]) != qpn))) {
880 prm_idx++;
881 continue;
882 }
883
884 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
885 " Premature DTO processing\n");
886
887 #ifdef DAPL_DBG /* For debugging. */
888 dapli_evd_eh_print_cqe(cqe[i]);
889 #endif
890 /*
891 * Can use DAT_DTO_COMPLETION_EVENT because
892 * dapli_evd_cqe_to_event will overwrite.
893 */
894 event = dapli_evd_get_and_init_event(evd_ptr,
895 DAT_DTO_COMPLETION_EVENT);
896 if (event == NULL) {
897 /* We've already attempted the overflow post, return */
898 return;
899 }
900 (void) dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_TRUE,
901 event);
902 dapli_evd_post_event_nosignal(evd_ptr, event);
903 /*
904 * For SRQ attached QPs recycle the premature event
905 */
906 if (ep_ptr->srq_attached) {
907 dapls_ib_free_premature_events(ep_ptr, prm_idx);
908 prm_idx++;
909 }
910 i++;
911 }
912 }
913
914 /*
915 * dapli_evd_cqe_to_event
916 *
917 * Convert a CQE into an event structure.
918 *
919 * Input:
920 * evd_ptr
921 * cqe_ptr
922 *
923 * Output:
924 * event_ptr
925 *
926 * Returns:
927 * none
928 *
929 */
930 static DAT_BOOLEAN
dapli_evd_cqe_to_event(IN DAPL_EVD * evd_ptr,IN ib_work_completion_t * cqe_ptr,IN DAT_BOOLEAN process_premature_events,OUT DAT_EVENT * event_ptr)931 dapli_evd_cqe_to_event(
932 IN DAPL_EVD *evd_ptr,
933 IN ib_work_completion_t *cqe_ptr,
934 IN DAT_BOOLEAN process_premature_events,
935 OUT DAT_EVENT *event_ptr)
936 {
937 DAPL_EP *ep_ptr;
938 DAPL_SRQ *srq_ptr;
939 DAPL_COOKIE *cookie;
940 DAT_EP_STATE ep_state;
941 ib_qp_handle_t qp;
942 ib_uint32_t ib_status;
943 ib_uint32_t ibtype;
944 int srq_enabled;
945 int dto_error = 0;
946
947
948 /*
949 * All that can be relied on if the status is bad is the status
950 * and WRID.
951 */
952 ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
953
954 cookie = (DAPL_COOKIE *)((uintptr_t)DAPL_GET_CQE_WRID(cqe_ptr));
955 dapl_os_assert((NULL != cookie));
956
957 if (cookie->queue_type == DAPL_COOKIE_QUEUE_EP) {
958 srq_enabled = 0;
959 ep_ptr = cookie->queue.ep;
960 } else {
961 srq_enabled = 1;
962 srq_ptr = cookie->queue.srq;
963 dapl_os_assert(NULL != srq_ptr);
964 dapl_os_assert(srq_ptr->header.magic == DAPL_MAGIC_SRQ);
965 ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
966 ep_ptr = dapls_ib_srq_lookup_ep(srq_ptr, cqe_ptr);
967 }
968
969 dapl_os_assert((NULL != ep_ptr));
970 dapl_os_assert((ep_ptr->header.magic == DAPL_MAGIC_EP) ||
971 (ep_ptr->header.magic == DAPL_MAGIC_EP_EXIT));
972
973 event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr;
974
975 /*
976 * Check if the DTO completion arrived before CONNECTION_ESTABLISHED
977 * event -
978 *
979 * Send DTOs can occur only if ep state is CONNECTED/DISCONNECTED
980 * therefore it cannot occur before connection established event.
981 * Receive DTO can potentially complete before connection established
982 * event has been delivered to the client. In this case if the
983 * ep state is ACTIVE_CONNECTION_PENDING (active side) or
984 * COMPLETION_PENDING (passive side) the event is put in a special
985 * event queue in the qp_handle.
986 *
987 */
988 if (!process_premature_events &&
989 (cookie->type == DAPL_COOKIE_TYPE_DTO) &&
990 (ib_status == IB_COMP_ST_SUCCESS)) {
991 ep_state = ep_ptr->param.ep_state;
992 qp = ep_ptr->qp_handle;
993 if ((ep_state == DAT_EP_STATE_ACTIVE_CONNECTION_PENDING) ||
994 (ep_state == DAT_EP_STATE_COMPLETION_PENDING) ||
995 (qp->qp_num_premature_events > 0)) {
996 /*
997 * not yet ready to put the event in the evd ring
998 * buffer
999 */
1000 dapls_ib_store_premature_events(qp, cqe_ptr);
1001 return (DAT_FALSE);
1002 }
1003 }
1004
1005 switch (cookie->type) {
1006 case DAPL_COOKIE_TYPE_DTO:
1007 {
1008 DAPL_COOKIE_BUFFER *buffer;
1009
1010 if (DAPL_DTO_TYPE_RECV == cookie->val.dto.type) {
1011 if (srq_enabled) {
1012 dapl_os_atomic_dec(&srq_ptr->recv_count);
1013 buffer = &srq_ptr->recv_buffer;
1014 } else {
1015 dapl_os_atomic_dec(&ep_ptr->recv_count);
1016 buffer = &ep_ptr->recv_buffer;
1017 }
1018 } else {
1019 dapl_os_atomic_dec(&ep_ptr->req_count);
1020 buffer = &ep_ptr->req_buffer;
1021 }
1022
1023 event_ptr->event_number = DAT_DTO_COMPLETION_EVENT;
1024 event_ptr->event_data.dto_completion_event_data.ep_handle =
1025 ep_ptr;
1026 event_ptr->event_data.dto_completion_event_data.user_cookie =
1027 cookie->val.dto.cookie;
1028
1029 switch (ib_status) {
1030 case IB_COMP_ST_SUCCESS:
1031 {
1032 ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
1033
1034 event_ptr->event_data.dto_completion_event_data.status =
1035 DAT_DTO_SUCCESS;
1036 dapl_os_assert((ibtype == OP_SEND &&
1037 cookie->val.dto.type == DAPL_DTO_TYPE_SEND) ||
1038 (ibtype == OP_RECEIVE &&
1039 cookie->val.dto.type == DAPL_DTO_TYPE_RECV) ||
1040 (ibtype == OP_RDMA_WRITE &&
1041 cookie->val.dto.type ==
1042 DAPL_DTO_TYPE_RDMA_WRITE) ||
1043 (ibtype == OP_RDMA_READ &&
1044 cookie->val.dto.type ==
1045 DAPL_DTO_TYPE_RDMA_READ));
1046 break;
1047 }
1048 case IB_COMP_ST_LOCAL_LEN_ERR:
1049 {
1050 event_ptr->event_data.dto_completion_event_data.status =
1051 DAT_DTO_ERR_LOCAL_LENGTH;
1052 break;
1053 }
1054 case IB_COMP_ST_LOCAL_PROTECT_ERR:
1055 {
1056 event_ptr->event_data.dto_completion_event_data.status =
1057 DAT_DTO_ERR_LOCAL_PROTECTION;
1058 break;
1059 }
1060 case IB_COMP_ST_WR_FLUSHED_ERR:
1061 {
1062 event_ptr->event_data.dto_completion_event_data.status =
1063 DAT_DTO_ERR_FLUSHED;
1064 break;
1065 }
1066 case IB_COMP_ST_BAD_RESPONSE_ERR:
1067 {
1068 event_ptr->event_data.dto_completion_event_data.status =
1069 DAT_DTO_ERR_BAD_RESPONSE;
1070 break;
1071 }
1072 case IB_COMP_ST_REM_REQ_ERR:
1073 case IB_COMP_ST_REM_OP_ERR:
1074 {
1075 event_ptr->event_data.dto_completion_event_data.status =
1076 DAT_DTO_ERR_REMOTE_RESPONDER;
1077 break;
1078 }
1079 case IB_COMP_ST_REM_ACC_ERR:
1080 {
1081 event_ptr->event_data.dto_completion_event_data.status =
1082 DAT_DTO_ERR_REMOTE_ACCESS;
1083 break;
1084 }
1085 /*
1086 * Unsupported RD errors
1087 * case IB_COMP_ST_EE_STATE_ERR:
1088 * case IB_COMP_ST_EE_CTX_NO_ERR:
1089 */
1090 case IB_COMP_ST_TRANSP_COUNTER:
1091 {
1092 event_ptr->event_data.dto_completion_event_data.status =
1093 DAT_DTO_ERR_TRANSPORT;
1094 break;
1095 }
1096 case IB_COMP_ST_RNR_COUNTER:
1097 {
1098 event_ptr->event_data.dto_completion_event_data.status =
1099 DAT_DTO_ERR_RECEIVER_NOT_READY;
1100 break;
1101 }
1102 case IB_COMP_ST_MW_BIND_ERR:
1103 {
1104 event_ptr->event_data.dto_completion_event_data.status =
1105 DAT_RMR_OPERATION_FAILED;
1106 break;
1107 }
1108 case IB_COMP_ST_LOCAL_OP_ERR:
1109 {
1110 event_ptr->event_data.dto_completion_event_data.status =
1111 DAT_DTO_ERR_LOCAL_EP;
1112 break;
1113 }
1114 default:
1115 {
1116 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
1117 " DTO completion ERROR: %d: op %#x\n",
1118 DAPL_GET_CQE_STATUS(cqe_ptr),
1119 DAPL_GET_CQE_OPTYPE(cqe_ptr));
1120 event_ptr->event_data.dto_completion_event_data.status =
1121 DAT_DTO_FAILURE;
1122 break;
1123 }
1124 }
1125
1126 /* Most error DTO ops result in disconnecting the EP */
1127 if ((event_ptr->event_data.dto_completion_event_data.status !=
1128 DAT_DTO_SUCCESS) &&
1129 (event_ptr->event_data.dto_completion_event_data.status !=
1130 DAT_RMR_OPERATION_FAILED)) {
1131 dto_error = 1;
1132 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
1133 " DTO completion ERROR: %d: op %#x\n",
1134 DAPL_GET_CQE_STATUS(cqe_ptr),
1135 DAPL_GET_CQE_OPTYPE(cqe_ptr));
1136 }
1137
1138 if (cookie->val.dto.type == DAPL_DTO_TYPE_SEND ||
1139 cookie->val.dto.type == DAPL_DTO_TYPE_RDMA_WRITE) {
1140 /* Get size from DTO; CQE value may be off. */
1141 event_ptr->event_data.dto_completion_event_data.
1142 transfered_length = cookie->val.dto.size;
1143 } else {
1144 event_ptr->event_data.dto_completion_event_data.
1145 transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
1146 }
1147
1148 dapls_cookie_dealloc(buffer, cookie);
1149 break;
1150 }
1151
1152 case DAPL_COOKIE_TYPE_RMR:
1153 {
1154 dapl_os_atomic_dec(&ep_ptr->req_count);
1155
1156 event_ptr->event_number = DAT_RMR_BIND_COMPLETION_EVENT;
1157
1158 event_ptr->event_data.rmr_completion_event_data.rmr_handle =
1159 cookie->val.rmr.rmr;
1160 event_ptr->event_data.rmr_completion_event_data.user_cookie =
1161 cookie->val.rmr.cookie;
1162 if (ib_status == IB_COMP_ST_SUCCESS) {
1163 ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
1164
1165 event_ptr->event_data.rmr_completion_event_data.status =
1166 DAT_RMR_BIND_SUCCESS;
1167 dapl_os_assert(ibtype == OP_BIND_MW);
1168 } else {
1169 event_ptr->event_data.rmr_completion_event_data.status =
1170 DAT_RMR_BIND_FAILURE;
1171 dto_error = 1;
1172 }
1173
1174 dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
1175 break;
1176 }
1177 default:
1178 {
1179 dapl_os_assert(!"Invalid Operation type");
1180 break;
1181 }
1182 }
1183
1184 /*
1185 * A DTO failed this will cause the connection to be broken
1186 */
1187 if ((dto_error) && (ep_ptr->param.ep_state == DAT_EP_STATE_CONNECTED)) {
1188 ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED;
1189 /*
1190 * Disconnect at the IB level.
1191 */
1192 dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, IB_CME_CONNECTED);
1193 }
1194 /* convert premature rec to error flush on disconnect */
1195 if (process_premature_events && (ep_ptr->param.ep_state ==
1196 DAT_EP_STATE_DISCONNECTED) && (ib_status == IB_COMP_ST_SUCCESS)) {
1197 dapl_os_assert(ibtype == OP_RECEIVE &&
1198 cookie->val.dto.type == DAPL_DTO_TYPE_RECV);
1199 event_ptr->event_data.dto_completion_event_data.status =
1200 DAT_DTO_ERR_FLUSHED;
1201 }
1202 return (DAT_TRUE);
1203 }
1204
1205 /*
1206 * dapls_evd_copy_cq
1207 *
1208 * Copy all entries on a CQ associated with the EVD onto that EVD
1209 * Up to caller to handle races, if any. Note that no EVD waiters will
1210 * be awoken by this copy.
1211 *
1212 * Input:
1213 * evd_ptr
1214 *
1215 * Output:
1216 * nevents
1217 *
1218 * Returns:
1219 * none
1220 *
1221 */
1222 void
dapls_evd_copy_cq(DAPL_EVD * evd_ptr,int * nevents)1223 dapls_evd_copy_cq(
1224 DAPL_EVD *evd_ptr,
1225 int *nevents)
1226 {
1227 ib_work_completion_t cqe[MAX_CQES_PER_POLL];
1228 DAT_RETURN dat_status;
1229 ib_cq_handle_t cq_handle;
1230 DAT_EVENT *event;
1231 uint_t num_cqes_polled = 0;
1232 int cqe_events;
1233 int i;
1234
1235 cq_handle = evd_ptr->ib_cq_handle;
1236
1237 *nevents = 0;
1238
1239 if (cq_handle == IB_INVALID_HANDLE) {
1240 /* Nothing to do if no CQ. */
1241 return;
1242 }
1243 dat_status = DAPL_POLL(evd_ptr)(cq_handle,
1244 cqe, MAX_CQES_PER_POLL, &num_cqes_polled);
1245
1246 if (dat_status == DAT_SUCCESS) {
1247 dapl_dbg_log(DAPL_DBG_TYPE_EVD, "dapls_evd_copy_cq: %u\n",
1248 num_cqes_polled);
1249 cqe_events = 0;
1250 for (i = 0; i < num_cqes_polled; i++) {
1251 #ifdef DAPL_DBG /* For debugging. */
1252 dapli_evd_eh_print_cqe(cqe[i]);
1253 #endif
1254
1255 /*
1256 * Can use DAT_DTO_COMPLETION_EVENT because
1257 * dapli_evd_cqe_to_event will overwrite.
1258 */
1259
1260 event = dapli_evd_get_and_init_event(
1261 evd_ptr, DAT_DTO_COMPLETION_EVENT);
1262 if (event == NULL) {
1263 /*
1264 * We've already attempted the overflow post; return.
1265 */
1266 return;
1267 }
1268 if (dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_FALSE,
1269 event)) {
1270 dapli_evd_post_event_nosignal(evd_ptr, event);
1271 cqe_events++;
1272 } else {
1273 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1274 "dapls_evd_copy_cq: premature event\n");
1275 /*
1276 * We've deferred processing the CQE, so add
1277 * the event_ptr back to free queue
1278 */
1279 dat_status = dapls_rbuf_add(&evd_ptr->
1280 free_event_queue, (void *)event);
1281 dapl_os_assert(dat_status == DAT_SUCCESS);
1282 if (evd_ptr->evd_producer_locking_needed) {
1283 dapl_os_unlock(&evd_ptr->header.lock);
1284 }
1285 }
1286 }
1287 *nevents = cqe_events;
1288 } else if (DAT_GET_TYPE(dat_status) != DAT_QUEUE_EMPTY) {
1289 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1290 "dapls_evd_copy_cq: dapls_ib_completion_poll "
1291 "returned 0x%x\n", dat_status);
1292 dapl_os_assert(!"Bad return from dapls_ib_completion_poll");
1293 }
1294 }
1295
1296 /*
1297 * dapls_evd_copy_events
1298 *
1299 * Copy all events associated with the EVD onto that EVD
1300 *
1301 * Input:
1302 * evd_ptr
1303 * timeout
1304 *
1305 * Output:
1306 * return status
1307 *
1308 * Returns:
1309 * none
1310 *
1311 */
1312 DAT_RETURN
dapls_evd_copy_events(DAPL_EVD * evd_ptr,DAT_TIMEOUT timeout)1313 dapls_evd_copy_events(DAPL_EVD *evd_ptr, DAT_TIMEOUT timeout)
1314 {
1315 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL];
1316 dapl_ib_event_t *evpp_start;
1317 dapl_ib_event_t *evpp;
1318 DAPL_IA *ia_ptr;
1319 DAT_RETURN dat_status;
1320 int waited;
1321 uint64_t curr_time;
1322 uint64_t final_time;
1323 uint64_t time_left;
1324 int events_needed = 0;
1325 int nevents = 0;
1326 int num_cqe = 0;
1327 int num_ke = 0; /* kernel events - CM or ASYNC events */
1328 int i;
1329
1330 /* rbuf count is zero on entry */
1331
1332 if (evd_ptr->evd_flags & (DAT_EVD_CONNECTION_FLAG |
1333 DAT_EVD_CR_FLAG | DAT_EVD_ASYNC_FLAG)) {
1334 if (evd_ptr->threshold <= NUM_EVENTS_PER_POLL) {
1335 evpp = evp_arr;
1336 } else {
1337 /* need to allocate on the heap */
1338 evpp = (dapl_ib_event_t *)dapl_os_alloc(
1339 evd_ptr->threshold * sizeof (dapl_ib_event_t));
1340 if (evpp == NULL) {
1341 return (DAT_INSUFFICIENT_RESOURCES);
1342 }
1343 }
1344 evpp_start = evpp;
1345 /* for evd_dequeue, check for ke before returning Q_EMPTY */
1346 if (evd_ptr->threshold == 0 && timeout == 0)
1347 evd_ptr->threshold = 1;
1348 } else {
1349 evpp = NULL;
1350 evpp_start = NULL;
1351 }
1352 ia_ptr = evd_ptr->header.owner_ia;
1353 waited = 0;
1354 dat_status = DAT_SUCCESS;
1355
1356 /* calculate various time wait elements */
1357 if (timeout == 0) {
1358 final_time = 0;
1359 time_left = 0;
1360 } else if (timeout == DAT_TIMEOUT_INFINITE) {
1361 /*
1362 * The real value of DAT_TIMEOUT_INFINITE is fairly small
1363 * ~71 mins, to prevent premature timeouts map it to
1364 * 1 year. NOTE: 64-bit integers are needed here
1365 * because 32 bits is not enough. Other types,
1366 * such as clock_t are not 64-bit, so are not
1367 * sufficient for this. Similarly, hrtime_t is
1368 * defined as a "nanosecond counter", which does not
1369 * match our need for time in microseconds, so we
1370 * just use the more general uint64_t here.
1371 */
1372 #define DAPL_ONE_YEAR_IN_USEC ((365 * 24 * 3600) * 1000000LL)
1373 curr_time = gethrtime();
1374 time_left = DAPL_ONE_YEAR_IN_USEC;
1375 final_time = curr_time + DAPL_ONE_YEAR_IN_USEC * 1000;
1376 } else {
1377 /*
1378 * maximum time by which the routine needs to return
1379 * DAT_TIMEOUT_INFINITE is defined as ~0 but its of type int
1380 * so mask the MSB to avoid overflow
1381 */
1382 curr_time = gethrtime();
1383 final_time = curr_time + (uint64_t)(timeout&0x7fffffff)*1000;
1384 time_left = (final_time - curr_time)/1000;
1385 }
1386
1387 do {
1388 /*
1389 * If this evd has a CQ event stream check the CQs first
1390 */
1391 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG |
1392 DAT_EVD_RMR_BIND_FLAG)) {
1393 /*
1394 * Poll CQ for events, update the total number of CQEs
1395 * so far
1396 */
1397 nevents = 0;
1398 dapls_evd_copy_cq(evd_ptr, &nevents);
1399 num_cqe += nevents;
1400 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1401 "dapls_evd_copy_event: copy_cq num_cqe(%d)\n",
1402 num_cqe);
1403 }
1404
1405 /*
1406 * We use the dapls_rbuf_count since it includes
1407 * - CQ events pulled by dapls_evd_copy_cq
1408 * - events added by dat_evd_post_se()
1409 */
1410 events_needed = evd_ptr->threshold - num_ke -
1411 dapls_rbuf_count(&evd_ptr->pending_event_queue);
1412
1413 /*
1414 * check for pending events
1415 * note: threshold=0 implies dapl_evd_dequeue
1416 */
1417 if (events_needed < 0) {
1418 /* There are more than sufficient events */
1419 break;
1420 } else if (events_needed == 0) {
1421 /* report queue empty on dat_evd_dequeue */
1422 /* non CQ events are expected to be polled */
1423 /* by dat_evd_wait */
1424 if (evd_ptr->threshold == 0)
1425 dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
1426 /*
1427 * when threshold > 0, we have sufficient events
1428 */
1429 break;
1430 } else {
1431 /*
1432 * when we reach here, this implies dat_evd_wait
1433 * return on any dto completion as
1434 * threshold > 1 will be taken as hint only
1435 */
1436 if (num_cqe)
1437 break;
1438 }
1439
1440 /* check we've already waited */
1441 if (waited > 0) {
1442 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1443 "dapls_evd_copy_event: waited[%d]\n", waited);
1444 if (dat_status != DAT_SUCCESS)
1445 break;
1446 curr_time = gethrtime();
1447 /* exit on time expired */
1448 if (curr_time >= final_time)
1449 break;
1450 time_left = (final_time - curr_time)/1000;
1451 }
1452
1453 /* check for DTO type evd's */
1454 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG |
1455 DAT_EVD_RMR_BIND_FLAG)) {
1456 if (events_needed == 1) {
1457 /*
1458 * Need only one event so enable cq
1459 * notification
1460 */
1461 /*
1462 * XXX: Things need to be modified here to
1463 * implement the NOTIFICATION suppression
1464 * correctly - relies on THRESHOLD flag
1465 * and UNSIGNALLED flag to be stored
1466 * in the evd.
1467 */
1468 dat_status = dapls_set_cq_notify(ia_ptr,
1469 evd_ptr);
1470 if (dat_status != DAT_SUCCESS) {
1471 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1472 "dapls_evd_copy_event:"
1473 " set_cq_notify(%d)\n", dat_status);
1474 return (dat_status);
1475 }
1476 } else if (events_needed > 1) {
1477 /*
1478 * We need multiple events so lets enable CQ for
1479 * notification on N events.
1480 * dat_status = dapls_set_cqN_notify(ia_ptr,
1481 * evd_ptr, (uint32_t)events_needed);
1482 */
1483 dat_status = dapls_set_cq_notify(ia_ptr,
1484 evd_ptr);
1485 if (dat_status != DAT_SUCCESS) {
1486 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1487 "dapls_evd_copy_event:"
1488 " set_cqN_notify:%d\n", dat_status);
1489 return (dat_status);
1490 }
1491 }
1492
1493 /*
1494 * Per Tavor PRM if completions occur after polling
1495 * the CQ and before arming it, upon arming the CQ
1496 * handler will be immediately fired. Hence it
1497 * recommends that a re-poll of the CQ can be skipped
1498 * as an optimization.
1499 */
1500 }
1501
1502 nevents = 0;
1503
1504 /*
1505 * non-NULL evpp_start denotes either
1506 * DAT_EVD_CONNECTION_FLAG, DAT_EVD_CR_FLAG, DAT_EVD_ASYNC_FLAG
1507 * is set and thus needs to check events from kernel
1508 */
1509 if (evpp_start) {
1510 /*
1511 * Even if dat_status is not DAT_SUCCESS, num_events
1512 * could be non-zero.
1513 */
1514 dat_status = dapls_ib_event_poll(evd_ptr, time_left,
1515 (evd_ptr->threshold - (num_cqe + num_ke)), evpp,
1516 &nevents);
1517 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1518 "dapls_evd_copy_event: poll returned 0x%x(%d)\n",
1519 dat_status, nevents);
1520
1521 num_ke += nevents;
1522 evpp += nevents;
1523 } else {
1524 /* perform a timewait */
1525 dat_status = dapls_ib_event_poll(evd_ptr, time_left,
1526 0, NULL, &nevents);
1527 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1528 "dapls_evd_copy_event: poll(cq_notification) "
1529 "returned 0x%x\n", dat_status);
1530 if (DAT_GET_TYPE(dat_status) == DAT_INTERRUPTED_CALL)
1531 return (dat_status);
1532 }
1533
1534 waited++;
1535 } while (dapls_rbuf_count(&evd_ptr->pending_event_queue) + num_ke <
1536 evd_ptr->threshold);
1537
1538 /* process the cm events now */
1539 for (i = 0; i < num_ke; i++) {
1540 switch (evpp_start[i].ibe_ev_family) {
1541 case DAPL_CR_EVENTS: /* PASSIVE side events */
1542 case DAPL_PASSIVE_CONNECTION_EVENTS:
1543 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1544 "dapls_evd_copy_event: Passive side Event %d\n",
1545 evpp_start[i].ibe_ce.ibce_event);
1546 dapls_cr_callback((ib_cm_handle_t)
1547 evpp_start[i].ibe_ce.ibce_psep_cookie,
1548 evpp_start[i].ibe_ce.ibce_event,
1549 evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *)
1550 (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie);
1551 break;
1552 case DAPL_ACTIVE_CONNECTION_EVENTS: /* ACTIVE side events */
1553 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1554 "dapls_evd_copy_event: Active Conn Event %d\n",
1555 evpp_start[i].ibe_ce.ibce_event);
1556 dapl_evd_connection_callback((ib_cm_handle_t)
1557 IB_INVALID_HANDLE,
1558 evpp_start[i].ibe_ce.ibce_event,
1559 evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *)
1560 (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie);
1561 break;
1562 case DAPL_ASYNC_EVENTS:
1563 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1564 "dapls_evd_copy_event: Async Event %d\n",
1565 evpp_start[i].ibe_async.ibae_type);
1566 dapls_ib_async_callback(evd_ptr,
1567 ia_ptr->hca_ptr->ib_hca_handle,
1568 &(evpp_start[i].ibe_async), ia_ptr);
1569 break;
1570 default:
1571 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1572 "dapls_evd_copy_event: dapls_ib_event_poll %d "
1573 "returned 0x%x\n", i, evpp_start[i].ibe_ev_family);
1574 dapl_os_assert(!"Bad return from dapls_ib_event_poll");
1575 break;
1576 }
1577 }
1578
1579 return (dat_status);
1580 }
1581
1582 /*
1583 * dapls_evd_cq_poll_to_event
1584 *
1585 * Attempt to dequeue a single CQE from a CQ and turn it into
1586 * an event.
1587 *
1588 * Input:
1589 * evd_ptr
1590 *
1591 * Output:
1592 * event
1593 *
1594 * Returns:
1595 * Status of operation
1596 *
1597 */
1598 DAT_RETURN
dapls_evd_cq_poll_to_event(IN DAPL_EVD * evd_ptr,OUT DAT_EVENT * event)1599 dapls_evd_cq_poll_to_event(IN DAPL_EVD *evd_ptr, OUT DAT_EVENT *event)
1600 {
1601 DAT_RETURN dat_status;
1602 ib_work_completion_t cur_cqe;
1603
1604 /* skip one layer of do-nothing function */
1605 dat_status = DAPL_POLL1(evd_ptr)(evd_ptr->ib_cq_handle, &cur_cqe);
1606
1607 if (dat_status == DAT_SUCCESS) {
1608 #ifdef DAPL_DBG /* For debugging. */
1609 dapli_evd_eh_print_cqe(cur_cqe);
1610 #endif
1611 (void) dapli_evd_cqe_to_event(evd_ptr, &cur_cqe, DAT_FALSE,
1612 event);
1613 }
1614
1615 return (dat_status);
1616 }
1617
1618 /*
1619 * Local variables:
1620 * c-indent-level: 4
1621 * c-basic-offset: 4
1622 * tab-width: 8
1623 * End:
1624 */
1625