1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 /*
32 *
33 * MODULE: dapl_evd_util.c
34 *
35 * PURPOSE: Manage EVD Info structure
36 *
37 * $Id: dapl_evd_util.c,v 1.41 2003/08/20 13:18:36 sjs2 Exp $
38 */
39
40 #include <sys/time.h>
41 #include <strings.h>
42 #include "dapl_evd_util.h"
43 #include "dapl_ia_util.h"
44 #include "dapl_cno_util.h"
45 #include "dapl_ring_buffer_util.h"
46 #include "dapl_adapter_util.h"
47 #include "dapl_tavor_ibtf_impl.h"
48 #include "dapl_cookie.h"
49 #include "dapl.h"
50
51
52 #ifdef DAPL_DBG /* For debugging. */
53 static void
54 dapli_evd_eh_print_cqe(
55 IN ib_work_completion_t cqe);
56 #endif
57
58 static DAT_BOOLEAN
59 dapli_evd_cqe_to_event(
60 IN DAPL_EVD *evd_ptr,
61 IN ib_work_completion_t *cqe_ptr,
62 IN DAT_BOOLEAN process_premature_events,
63 OUT DAT_EVENT *event_ptr);
64
65 static DAT_RETURN
66 dapli_evd_event_alloc(
67 IN DAPL_EVD *evd_ptr,
68 IN DAPL_CNO *cno_ptr,
69 IN DAT_COUNT qlen);
70
71
72 /*
73 * dapls_evd_internal_create
74 *
75 * actually create the evd. this is called after all parameter checking
76 * has been performed in dapl_ep_create. it is also called from dapl_ia_open
77 * to create the default async evd.
78 *
79 * Input:
80 * ia_ptr
81 * cno_ptr
82 * qlen
83 * evd_flags
84 *
85 * Output:
86 * evd_ptr_ptr
87 *
88 * Returns:
89 * none
90 *
91 */
92
93 DAT_RETURN
dapls_evd_internal_create(DAPL_IA * ia_ptr,DAPL_CNO * cno_ptr,DAT_COUNT min_qlen,DAT_EVD_FLAGS evd_flags,DAPL_EVD ** evd_ptr_ptr)94 dapls_evd_internal_create(
95 DAPL_IA *ia_ptr,
96 DAPL_CNO *cno_ptr,
97 DAT_COUNT min_qlen,
98 DAT_EVD_FLAGS evd_flags,
99 DAPL_EVD **evd_ptr_ptr)
100 {
101 DAPL_EVD *evd_ptr;
102 DAT_COUNT cq_len;
103 DAT_RETURN dat_status;
104
105 dat_status = DAT_SUCCESS;
106 *evd_ptr_ptr = NULL;
107 cq_len = min_qlen;
108
109 evd_ptr = dapls_evd_alloc(ia_ptr,
110 cno_ptr,
111 evd_flags,
112 min_qlen);
113 if (!evd_ptr) {
114 dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
115 DAT_RESOURCE_MEMORY);
116 goto bail;
117 }
118
119 /*
120 * If we are dealing with event streams besides a CQ event stream,
121 * be conservative and set producer side locking. Otherwise, no.
122 */
123 evd_ptr->evd_producer_locking_needed =
124 ((evd_flags & ~ (DAT_EVD_DTO_FLAG|DAT_EVD_RMR_BIND_FLAG)) != 0);
125
126 /* Before we setup any callbacks, transition state to OPEN. */
127 evd_ptr->evd_state = DAPL_EVD_STATE_OPEN;
128
129 /*
130 * we need to call cq_alloc even for connection/cr/async evds
131 * since all the allocation happens there.
132 */
133 dat_status = dapls_ib_cq_alloc(ia_ptr,
134 evd_ptr, cno_ptr, &cq_len);
135 if (dat_status != DAT_SUCCESS) {
136 goto bail;
137 }
138
139 dat_status = dapls_ib_setup_async_callback(
140 ia_ptr,
141 DAPL_ASYNC_CQ_COMPLETION,
142 (unsigned int *) evd_ptr->ib_cq_handle,
143 (ib_async_handler_t)dapl_evd_dto_callback,
144 evd_ptr);
145 if (dat_status != DAT_SUCCESS) {
146 goto bail;
147 }
148 /*
149 * cq_notify is not required since when evd_wait is called
150 * time we go and poll cq anyways.
151 * dat_status = dapls_set_cq_notify(ia_ptr, evd_ptr);
152 */
153
154 /*
155 * We now have an accurate count of events, so allocate them into
156 * the EVD
157 */
158 dat_status = dapli_evd_event_alloc(evd_ptr, cno_ptr, cq_len);
159 if (dat_status != DAT_SUCCESS) {
160 goto bail;
161 }
162
163 /* We're assuming success in the following. */
164 dapl_os_assert(dat_status == DAT_SUCCESS);
165 dapl_ia_link_evd(ia_ptr, evd_ptr);
166 *evd_ptr_ptr = evd_ptr;
167
168 bail:
169 if (dat_status != DAT_SUCCESS) {
170 if (evd_ptr) {
171 (void) dapls_evd_dealloc(evd_ptr);
172 }
173 }
174
175 return (dat_status);
176 }
177
178 /*
179 * dapls_evd_alloc
180 *
181 * alloc and initialize an EVD struct
182 *
183 * Input:
184 * ia
185 *
186 * Output:
187 * evd_ptr
188 *
189 * Returns:
190 * none
191 *
192 */
193 DAPL_EVD *
dapls_evd_alloc(IN DAPL_IA * ia_ptr,IN DAPL_CNO * cno_ptr,IN DAT_EVD_FLAGS evd_flags,IN DAT_COUNT qlen)194 dapls_evd_alloc(
195 IN DAPL_IA *ia_ptr,
196 IN DAPL_CNO *cno_ptr,
197 IN DAT_EVD_FLAGS evd_flags,
198 IN DAT_COUNT qlen) /* ARGSUSED */
199 {
200 DAPL_EVD *evd_ptr;
201
202 evd_ptr = NULL;
203
204 /* Allocate EVD */
205 evd_ptr = (DAPL_EVD *)dapl_os_alloc(sizeof (DAPL_EVD));
206 if (!evd_ptr) {
207 goto bail;
208 }
209
210 /* zero the structure */
211 (void) dapl_os_memzero(evd_ptr, sizeof (DAPL_EVD));
212
213 /*
214 * initialize the header
215 */
216 evd_ptr->header.provider = ia_ptr->header.provider;
217 evd_ptr->header.magic = DAPL_MAGIC_EVD;
218 evd_ptr->header.handle_type = DAT_HANDLE_TYPE_EVD;
219 evd_ptr->header.owner_ia = ia_ptr;
220 evd_ptr->header.user_context.as_64 = 0;
221 evd_ptr->header.user_context.as_ptr = NULL;
222 dapl_llist_init_entry(&evd_ptr->header.ia_list_entry);
223 dapl_os_lock_init(&evd_ptr->header.lock);
224
225 /*
226 * Initialize the body
227 */
228 evd_ptr->evd_state = DAPL_EVD_STATE_INITIAL;
229 evd_ptr->evd_flags = evd_flags;
230 evd_ptr->evd_enabled = DAT_TRUE;
231 evd_ptr->evd_waitable = DAT_TRUE;
232 evd_ptr->evd_producer_locking_needed = 1; /* Conservative value. */
233 evd_ptr->ib_cq_handle = IB_INVALID_HANDLE;
234 evd_ptr->evd_ref_count = 0;
235 evd_ptr->catastrophic_overflow = DAT_FALSE;
236 evd_ptr->qlen = qlen;
237
238 dapl_llist_init_entry(&evd_ptr->cno_list_entry);
239 evd_ptr->completion_type = DAPL_EVD_STATE_THRESHOLD;
240 (void) dapl_os_wait_object_init(&evd_ptr->wait_object);
241
242 bail:
243 return (evd_ptr);
244 }
245
246
247 /*
248 * dapls_evd_event_alloc
249 *
250 * alloc events into an EVD.
251 *
252 * Input:
253 * evd_ptr
254 * qlen
255 *
256 * Output:
257 * NONE
258 *
259 * Returns:
260 * DAT_SUCCESS
261 * ERROR
262 *
263 */
264 DAT_RETURN
dapli_evd_event_alloc(IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr,IN DAT_COUNT qlen)265 dapli_evd_event_alloc(
266 IN DAPL_EVD *evd_ptr,
267 IN DAPL_CNO *cno_ptr,
268 IN DAT_COUNT qlen)
269 {
270 DAT_EVENT *event_ptr;
271 DAT_COUNT i;
272 DAT_RETURN dat_status;
273
274 dat_status = DAT_SUCCESS;
275 event_ptr = NULL;
276
277 /* Allocate EVENTs */
278 event_ptr = (DAT_EVENT *) dapl_os_alloc(qlen * sizeof (DAT_EVENT));
279 if (!event_ptr) {
280 goto bail;
281 }
282 evd_ptr->events = event_ptr;
283 evd_ptr->qlen = qlen;
284
285 /* allocate free event queue */
286 dat_status = dapls_rbuf_alloc(&evd_ptr->free_event_queue, qlen);
287 if (dat_status != DAT_SUCCESS) {
288 goto bail;
289 }
290
291 /* allocate pending event queue */
292 dat_status = dapls_rbuf_alloc(&evd_ptr->pending_event_queue, qlen);
293 if (dat_status != DAT_SUCCESS) {
294 goto bail;
295 }
296
297 /* add events to free event queue */
298 for (i = 0; i < qlen; i++) {
299 dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue,
300 (void *)event_ptr);
301 dapl_os_assert(dat_status == DAT_SUCCESS);
302 event_ptr++;
303 }
304 evd_ptr->cq_notified = DAT_FALSE;
305 evd_ptr->cq_notified_when = 0;
306 evd_ptr->cno_active_count = 0;
307 if (cno_ptr != NULL) {
308 dapl_os_lock(&cno_ptr->header.lock);
309 dapl_llist_add_head(&cno_ptr->evd_list_head,
310 &evd_ptr->cno_list_entry, evd_ptr);
311 /* Take a reference count on the CNO */
312 dapl_os_atomic_inc(&cno_ptr->cno_ref_count);
313 dapl_os_unlock(&cno_ptr->header.lock);
314 }
315 evd_ptr->cno_ptr = cno_ptr;
316 evd_ptr->threshold = 0;
317
318 bail:
319 return (dat_status);
320 }
321
322
323 /*
324 * dapls_evd_dealloc
325 *
326 * Free the passed in EVD structure. If an error occurs, this function
327 * will clean up all of the internal data structures and report the
328 * error.
329 *
330 * Input:
331 * evd_ptr
332 *
333 * Output:
334 * none
335 *
336 * Returns:
337 * status
338 *
339 */
340 DAT_RETURN
dapls_evd_dealloc(IN DAPL_EVD * evd_ptr)341 dapls_evd_dealloc(
342 IN DAPL_EVD *evd_ptr)
343 {
344 DAT_RETURN dat_status;
345 DAPL_IA *ia_ptr;
346
347 dat_status = DAT_SUCCESS;
348
349 dapl_os_assert(evd_ptr->header.magic == DAPL_MAGIC_EVD);
350 dapl_os_assert(evd_ptr->evd_ref_count == 0);
351
352 /*
353 * Destroy the CQ first, to keep any more callbacks from coming
354 * up from it.
355 */
356 if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
357 ia_ptr = evd_ptr->header.owner_ia;
358
359 dat_status = dapls_ib_cq_free(ia_ptr, evd_ptr);
360 if (dat_status != DAT_SUCCESS) {
361 goto bail;
362 }
363 }
364
365 /*
366 * We should now be safe to invalidate the EVD; reset the
367 * magic to prevent reuse.
368 */
369 evd_ptr->header.magic = DAPL_MAGIC_INVALID;
370
371 /* Release reference on the CNO if it exists */
372 if (evd_ptr->cno_ptr != NULL) {
373 dapl_os_lock(&evd_ptr->cno_ptr->header.lock);
374 (void) dapl_llist_remove_entry(&evd_ptr->cno_ptr->evd_list_head,
375 &evd_ptr->cno_list_entry);
376 dapl_os_atomic_dec(&evd_ptr->cno_ptr->cno_ref_count);
377 dapl_os_unlock(&evd_ptr->cno_ptr->header.lock);
378 }
379
380 /*
381 * If the ring buffer allocation failed, then the dapls_rbuf_destroy
382 * function will detect that the ring buffer's internal data (ex. base
383 * pointer) are invalid and will handle the situation appropriately
384 */
385 dapls_rbuf_destroy(&evd_ptr->free_event_queue);
386 dapls_rbuf_destroy(&evd_ptr->pending_event_queue);
387
388 if (evd_ptr->events) {
389 dapl_os_free(evd_ptr->events,
390 evd_ptr->qlen * sizeof (DAT_EVENT));
391 }
392
393 (void) dapl_os_wait_object_destroy(&evd_ptr->wait_object);
394 dapl_os_free(evd_ptr, sizeof (DAPL_EVD));
395
396 bail:
397 return (dat_status);
398 }
399
400
401 /*
402 * dapli_evd_eh_print_cqe
403 *
404 * Input:
405 * cqe
406 *
407 * Output:
408 * none
409 *
410 * Prints out a CQE for debug purposes
411 *
412 */
413
414 #ifdef DAPL_DBG /* For debugging. */
415 void
dapli_evd_eh_print_cqe(IN ib_work_completion_t cqe)416 dapli_evd_eh_print_cqe(
417 IN ib_work_completion_t cqe)
418 {
419 static char *optable[] = {
420 "",
421 "OP_SEND",
422 "OP_RDMA_READ",
423 "OP_RDMA_WRITE",
424 "OP_COMP_AND_SWAP",
425 "OP_FETCH_AND_ADD",
426 "OP_BIND_MW",
427 "OP_RECEIVE",
428 "OP_RECEIVE_RDMAWI",
429 0
430 };
431 DAPL_COOKIE *dto_cookie;
432
433 dto_cookie = (DAPL_COOKIE *) (uintptr_t)DAPL_GET_CQE_WRID(&cqe);
434
435 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
436 "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n");
437 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
438 "\t dapl_evd_dto_callback : CQE \n");
439 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
440 "\t\t work_req_id 0x%llx\n", DAPL_GET_CQE_WRID(&cqe));
441 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
442 "\t\t op_type: %s\n", optable[DAPL_GET_CQE_OPTYPE(&cqe)]);
443 if ((DAPL_GET_CQE_OPTYPE(&cqe) == OP_SEND) ||
444 (DAPL_GET_CQE_OPTYPE(&cqe) == OP_RDMA_WRITE)) {
445 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
446 "\t\t bytes_num %d\n", dto_cookie->val.dto.size);
447 } else {
448 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
449 "\t\t bytes_num %d\n", DAPL_GET_CQE_BYTESNUM(&cqe));
450 }
451 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
452 "\t\t status %d\n", DAPL_GET_CQE_STATUS(&cqe));
453 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
454 "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n");
455 }
456 #endif
457
458 /*
459 * Event posting code follows.
460 */
461
462 /*
463 * These next two functions (dapli_evd_get_event and dapli_evd_post_event)
464 * are a pair. They are always called together, from one of the functions
465 * at the end of this file (dapl_evd_post_*_event).
466 *
467 * Note that if producer side locking is enabled, the first one takes the
468 * EVD lock and the second releases it.
469 */
470
471 /*
472 * dapli_evd_get_event
473 *
474 * Get an event struct from the evd. The caller should fill in the event
475 * and call dapl_evd_post_event.
476 *
477 * If there are no events available, an overflow event is generated to the
478 * async EVD handler.
479 *
480 * If this EVD required producer locking, a successful return implies
481 * that the lock is held.
482 *
483 * Input:
484 * evd_ptr
485 *
486 * Output:
487 * event
488 *
489 */
490
491 static DAT_EVENT *
dapli_evd_get_event(DAPL_EVD * evd_ptr)492 dapli_evd_get_event(
493 DAPL_EVD *evd_ptr)
494 {
495 DAT_EVENT *event;
496
497 if (evd_ptr->evd_producer_locking_needed) {
498 dapl_os_lock(&evd_ptr->header.lock);
499 }
500
501 event = (DAT_EVENT *)dapls_rbuf_remove(&evd_ptr->free_event_queue);
502
503 /* Release the lock if it was taken and the call failed. */
504 if (!event && evd_ptr->evd_producer_locking_needed) {
505 dapl_os_unlock(&evd_ptr->header.lock);
506 }
507
508 return (event);
509 }
510
511 /*
512 * dapli_evd_post_event
513 *
514 * Post the <event> to the evd. If possible, invoke the evd's CNO.
515 * Otherwise post the event on the pending queue.
516 *
517 * If producer side locking is required, the EVD lock must be held upon
518 * entry to this function.
519 *
520 * Input:
521 * evd_ptr
522 * event
523 *
524 * Output:
525 * none
526 *
527 */
528
529 static void
dapli_evd_post_event(IN DAPL_EVD * evd_ptr,IN const DAT_EVENT * event_ptr)530 dapli_evd_post_event(
531 IN DAPL_EVD *evd_ptr,
532 IN const DAT_EVENT *event_ptr)
533 {
534 DAT_RETURN dat_status;
535 DAPL_CNO *cno_to_trigger = NULL;
536
537 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
538 "dapli_evd_post_event: Called with event # %x\n",
539 event_ptr->event_number);
540
541 dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue,
542 (void *)event_ptr);
543 dapl_os_assert(dat_status == DAT_SUCCESS);
544
545 dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED ||
546 evd_ptr->evd_state == DAPL_EVD_STATE_OPEN);
547
548 if (evd_ptr->evd_state == DAPL_EVD_STATE_OPEN) {
549 /* No waiter. Arrange to trigger a CNO if it exists. */
550
551 if (evd_ptr->evd_enabled) {
552 cno_to_trigger = evd_ptr->cno_ptr;
553 }
554 if (evd_ptr->evd_producer_locking_needed) {
555 dapl_os_unlock(&evd_ptr->header.lock);
556 }
557 } else {
558 /*
559 * This routine gets called
560 * - In the context of the waiting thread when CQ, CM or ASYNC
561 * events need to be put on to the EVD ring buffer.
562 * - Due to a post of a software event.
563 *
564 * In the first case the waiting thread is pulling the events
565 * from various streams into the evd so there is no need to
566 * wake any thread. In the second case if the evd is in waited
567 * state then we need to wakeup the waiting thread.
568 */
569 if (event_ptr->event_number == DAT_SOFTWARE_EVENT) {
570 /*
571 * We're in DAPL_EVD_STATE_WAITED. Take the lock if
572 * we don't have it, recheck, and signal.
573 */
574
575 if (!evd_ptr->evd_producer_locking_needed) {
576 dapl_os_lock(&evd_ptr->header.lock);
577 }
578
579 if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED) {
580 dapl_os_unlock(&evd_ptr->header.lock);
581 (void) dapls_ib_event_wakeup(evd_ptr);
582 } else {
583 dapl_os_unlock(&evd_ptr->header.lock);
584 }
585 } else {
586 if (evd_ptr->evd_producer_locking_needed) {
587 dapl_os_unlock(&evd_ptr->header.lock);
588 }
589 }
590 }
591
592 if (cno_to_trigger != NULL) {
593 dapl_cno_trigger(cno_to_trigger, evd_ptr);
594 }
595 }
596
597 /*
598 * dapli_evd_post_event_nosignal
599 *
600 * Post the <event> to the evd. Do not do any wakeup processing.
601 * This function should only be called if it is known that there are
602 * no waiters that it is appropriate to wakeup on this EVD. An example
603 * of such a situation is during internal dat_evd_wait() processing.
604 *
605 * If producer side locking is required, the EVD lock must be held upon
606 * entry to this function.
607 *
608 * Input:
609 * evd_ptr
610 * event
611 *
612 * Output:
613 * none
614 *
615 */
616
617 static void
dapli_evd_post_event_nosignal(IN DAPL_EVD * evd_ptr,IN const DAT_EVENT * event_ptr)618 dapli_evd_post_event_nosignal(
619 IN DAPL_EVD *evd_ptr,
620 IN const DAT_EVENT *event_ptr)
621 {
622 DAT_RETURN dat_status;
623
624 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
625 "dapli_evd_post_event: Called with event # %x\n",
626 event_ptr->event_number);
627
628 dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue,
629 (void *)event_ptr);
630 dapl_os_assert(dat_status == DAT_SUCCESS);
631
632 dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED ||
633 evd_ptr->evd_state == DAPL_EVD_STATE_OPEN);
634
635 if (evd_ptr->evd_producer_locking_needed) {
636 dapl_os_unlock(&evd_ptr->header.lock);
637 }
638 }
639
640 /*
641 * dapli_evd_format_overflow_event
642 *
643 * format an overflow event for posting
644 *
645 * Input:
646 * evd_ptr
647 * event_ptr
648 *
649 * Output:
650 * none
651 *
652 */
653 static void
dapli_evd_format_overflow_event(IN DAPL_EVD * evd_ptr,OUT DAT_EVENT * event_ptr)654 dapli_evd_format_overflow_event(
655 IN DAPL_EVD *evd_ptr,
656 OUT DAT_EVENT *event_ptr)
657 {
658 DAPL_IA *ia_ptr;
659
660 ia_ptr = evd_ptr->header.owner_ia;
661
662 event_ptr->evd_handle = (DAT_EVD_HANDLE)evd_ptr;
663 event_ptr->event_number = DAT_ASYNC_ERROR_EVD_OVERFLOW;
664 event_ptr->event_data.asynch_error_event_data.dat_handle =
665 (DAT_HANDLE)ia_ptr;
666 }
667
668 /*
669 * dapli_evd_post_overflow_event
670 *
671 * post an overflow event
672 *
673 * Input:
674 * async_evd_ptr
675 * evd_ptr
676 *
677 * Output:
678 * none
679 *
680 */
681 static void
dapli_evd_post_overflow_event(IN DAPL_EVD * async_evd_ptr,IN DAPL_EVD * overflow_evd_ptr)682 dapli_evd_post_overflow_event(
683 IN DAPL_EVD *async_evd_ptr,
684 IN DAPL_EVD *overflow_evd_ptr)
685 {
686 DAT_EVENT *overflow_event;
687
688 /*
689 * The overflow_evd_ptr mght be the same as evd.
690 * In that case we've got a catastrophic overflow.
691 */
692 if (async_evd_ptr == overflow_evd_ptr) {
693 async_evd_ptr->catastrophic_overflow = DAT_TRUE;
694 async_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD;
695 return;
696 }
697
698 overflow_event = dapli_evd_get_event(overflow_evd_ptr);
699 if (!overflow_event) {
700 /* this is not good */
701 overflow_evd_ptr->catastrophic_overflow = DAT_TRUE;
702 overflow_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD;
703 return;
704 }
705 dapli_evd_format_overflow_event(overflow_evd_ptr, overflow_event);
706 dapli_evd_post_event(overflow_evd_ptr, overflow_event);
707 }
708
709 static DAT_EVENT *
dapli_evd_get_and_init_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number)710 dapli_evd_get_and_init_event(
711 IN DAPL_EVD *evd_ptr,
712 IN DAT_EVENT_NUMBER event_number)
713 {
714 DAT_EVENT *event_ptr;
715
716 event_ptr = dapli_evd_get_event(evd_ptr);
717 if (NULL == event_ptr) {
718 dapli_evd_post_overflow_event(
719 evd_ptr->header.owner_ia->async_error_evd, evd_ptr);
720 } else {
721 event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr;
722 event_ptr->event_number = event_number;
723 }
724
725 return (event_ptr);
726 }
727
728 DAT_RETURN
dapls_evd_post_cr_arrival_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_SP_HANDLE sp_handle,DAT_IA_ADDRESS_PTR ia_address_ptr,DAT_CONN_QUAL conn_qual,DAT_CR_HANDLE cr_handle)729 dapls_evd_post_cr_arrival_event(
730 IN DAPL_EVD *evd_ptr,
731 IN DAT_EVENT_NUMBER event_number,
732 IN DAT_SP_HANDLE sp_handle,
733 DAT_IA_ADDRESS_PTR ia_address_ptr,
734 DAT_CONN_QUAL conn_qual,
735 DAT_CR_HANDLE cr_handle)
736 {
737 DAT_EVENT *event_ptr;
738 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
739 /*
740 * Note event lock may be held on successful return
741 * to be released by dapli_evd_post_event(), if provider side locking
742 * is needed.
743 */
744
745 if (!event_ptr) {
746 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
747 }
748
749 event_ptr->event_data.cr_arrival_event_data.sp_handle = sp_handle;
750 event_ptr->event_data.cr_arrival_event_data.local_ia_address_ptr
751 = ia_address_ptr;
752 event_ptr->event_data.cr_arrival_event_data.conn_qual = conn_qual;
753 event_ptr->event_data.cr_arrival_event_data.cr_handle = cr_handle;
754
755 dapli_evd_post_event(evd_ptr, event_ptr);
756 return (DAT_SUCCESS);
757 }
758
759
760 DAT_RETURN
dapls_evd_post_connection_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_EP_HANDLE ep_handle,IN DAT_COUNT private_data_size,IN DAT_PVOID private_data)761 dapls_evd_post_connection_event(
762 IN DAPL_EVD *evd_ptr,
763 IN DAT_EVENT_NUMBER event_number,
764 IN DAT_EP_HANDLE ep_handle,
765 IN DAT_COUNT private_data_size,
766 IN DAT_PVOID private_data)
767 {
768 DAT_EVENT *event_ptr;
769 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
770 /*
771 * Note event lock may be held on successful return
772 * to be released by dapli_evd_post_event(), if provider side locking
773 * is needed.
774 */
775
776 if (!event_ptr) {
777 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
778 }
779
780 event_ptr->event_data.connect_event_data.ep_handle = ep_handle;
781 event_ptr->event_data.connect_event_data.private_data_size
782 = private_data_size;
783 event_ptr->event_data.connect_event_data.private_data = private_data;
784
785 dapli_evd_post_event(evd_ptr, event_ptr);
786 return (DAT_SUCCESS);
787 }
788
789
790 DAT_RETURN
dapls_evd_post_async_error_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_IA_HANDLE ia_handle)791 dapls_evd_post_async_error_event(
792 IN DAPL_EVD *evd_ptr,
793 IN DAT_EVENT_NUMBER event_number,
794 IN DAT_IA_HANDLE ia_handle)
795 {
796 DAT_EVENT *event_ptr;
797 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
798 /*
799 * Note event lock may be held on successful return
800 * to be released by dapli_evd_post_event(), if provider side locking
801 * is needed.
802 */
803
804 if (!event_ptr) {
805 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
806 }
807
808 event_ptr->event_data.asynch_error_event_data.dat_handle = ia_handle;
809
810 dapli_evd_post_event(evd_ptr, event_ptr);
811 return (DAT_SUCCESS);
812 }
813
814
815 DAT_RETURN
dapls_evd_post_software_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_PVOID pointer)816 dapls_evd_post_software_event(
817 IN DAPL_EVD *evd_ptr,
818 IN DAT_EVENT_NUMBER event_number,
819 IN DAT_PVOID pointer)
820 {
821 DAT_EVENT *event_ptr;
822 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
823 /*
824 * Note event lock may be held on successful return
825 * to be released by dapli_evd_post_event(), if provider side locking
826 * is needed.
827 */
828
829 if (!event_ptr) {
830 return (DAT_QUEUE_FULL);
831 }
832
833 event_ptr->event_data.software_event_data.pointer = pointer;
834
835 dapli_evd_post_event(evd_ptr, event_ptr);
836 return (DAT_SUCCESS);
837 }
838
839 void
dapls_evd_post_premature_events(IN DAPL_EP * ep_ptr)840 dapls_evd_post_premature_events(IN DAPL_EP *ep_ptr)
841 {
842 DAPL_EVD *evd_ptr;
843 DAT_EVENT *event;
844 ib_work_completion_t *cqe;
845 uint32_t qpn;
846 int prm_idx;
847 int nevents;
848 int i;
849
850 dapls_ib_poll_premature_events(ep_ptr, &cqe, &nevents);
851 /* premature events are always recv events */
852 evd_ptr = ep_ptr->param.recv_evd_handle;
853 qpn = ep_ptr->qpn;
854
855 i = 0;
856 prm_idx = 0;
857 while (i < nevents) {
858 /*
859 * If srq_attached, premature events cannot exceed max_recv_dtos
860 */
861 dapl_os_assert(!ep_ptr->srq_attached ||
862 (prm_idx <= ((DAPL_SRQ *)ep_ptr->param.srq_handle)->
863 param.max_recv_dtos));
864
865 /*
866 * The SRQ premature event list could potentially have
867 * holes (ie. free entries in the middle) or premature
868 * events for other QPs. These need to be skipped.
869 */
870 if (ep_ptr->srq_attached &&
871 (!DAPL_CQE_IS_VALID(&cqe[prm_idx]) ||
872 (DAPL_GET_CQE_QPN(&cqe[prm_idx]) != qpn))) {
873 prm_idx++;
874 continue;
875 }
876
877 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
878 " Premature DTO processing\n");
879
880 #ifdef DAPL_DBG /* For debugging. */
881 dapli_evd_eh_print_cqe(cqe[i]);
882 #endif
883 /*
884 * Can use DAT_DTO_COMPLETION_EVENT because
885 * dapli_evd_cqe_to_event will overwrite.
886 */
887 event = dapli_evd_get_and_init_event(evd_ptr,
888 DAT_DTO_COMPLETION_EVENT);
889 if (event == NULL) {
890 /* We've already attempted the overflow post, return */
891 return;
892 }
893 (void) dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_TRUE,
894 event);
895 dapli_evd_post_event_nosignal(evd_ptr, event);
896 /*
897 * For SRQ attached QPs recycle the premature event
898 */
899 if (ep_ptr->srq_attached) {
900 dapls_ib_free_premature_events(ep_ptr, prm_idx);
901 prm_idx++;
902 }
903 i++;
904 }
905 }
906
907 /*
908 * dapli_evd_cqe_to_event
909 *
910 * Convert a CQE into an event structure.
911 *
912 * Input:
913 * evd_ptr
914 * cqe_ptr
915 *
916 * Output:
917 * event_ptr
918 *
919 * Returns:
920 * none
921 *
922 */
923 static DAT_BOOLEAN
dapli_evd_cqe_to_event(IN DAPL_EVD * evd_ptr,IN ib_work_completion_t * cqe_ptr,IN DAT_BOOLEAN process_premature_events,OUT DAT_EVENT * event_ptr)924 dapli_evd_cqe_to_event(
925 IN DAPL_EVD *evd_ptr,
926 IN ib_work_completion_t *cqe_ptr,
927 IN DAT_BOOLEAN process_premature_events,
928 OUT DAT_EVENT *event_ptr)
929 {
930 DAPL_EP *ep_ptr;
931 DAPL_SRQ *srq_ptr;
932 DAPL_COOKIE *cookie;
933 DAT_EP_STATE ep_state;
934 ib_qp_handle_t qp;
935 ib_uint32_t ib_status;
936 ib_uint32_t ibtype;
937 int srq_enabled;
938 int dto_error = 0;
939
940
941 /*
942 * All that can be relied on if the status is bad is the status
943 * and WRID.
944 */
945 ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
946
947 cookie = (DAPL_COOKIE *)((uintptr_t)DAPL_GET_CQE_WRID(cqe_ptr));
948 dapl_os_assert((NULL != cookie));
949
950 if (cookie->queue_type == DAPL_COOKIE_QUEUE_EP) {
951 srq_enabled = 0;
952 ep_ptr = cookie->queue.ep;
953 } else {
954 srq_enabled = 1;
955 srq_ptr = cookie->queue.srq;
956 dapl_os_assert(NULL != srq_ptr);
957 dapl_os_assert(srq_ptr->header.magic == DAPL_MAGIC_SRQ);
958 ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
959 ep_ptr = dapls_ib_srq_lookup_ep(srq_ptr, cqe_ptr);
960 }
961
962 dapl_os_assert((NULL != ep_ptr));
963 dapl_os_assert((ep_ptr->header.magic == DAPL_MAGIC_EP) ||
964 (ep_ptr->header.magic == DAPL_MAGIC_EP_EXIT));
965
966 event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr;
967
968 /*
969 * Check if the DTO completion arrived before CONNECTION_ESTABLISHED
970 * event -
971 *
972 * Send DTOs can occur only if ep state is CONNECTED/DISCONNECTED
973 * therefore it cannot occur before connection established event.
974 * Receive DTO can potentially complete before connection established
975 * event has been delivered to the client. In this case if the
976 * ep state is ACTIVE_CONNECTION_PENDING (active side) or
977 * COMPLETION_PENDING (passive side) the event is put in a special
978 * event queue in the qp_handle.
979 *
980 */
981 if (!process_premature_events &&
982 (cookie->type == DAPL_COOKIE_TYPE_DTO) &&
983 (ib_status == IB_COMP_ST_SUCCESS)) {
984 ep_state = ep_ptr->param.ep_state;
985 qp = ep_ptr->qp_handle;
986 if ((ep_state == DAT_EP_STATE_ACTIVE_CONNECTION_PENDING) ||
987 (ep_state == DAT_EP_STATE_COMPLETION_PENDING) ||
988 (qp->qp_num_premature_events > 0)) {
989 /*
990 * not yet ready to put the event in the evd ring
991 * buffer
992 */
993 dapls_ib_store_premature_events(qp, cqe_ptr);
994 return (DAT_FALSE);
995 }
996 }
997
998 switch (cookie->type) {
999 case DAPL_COOKIE_TYPE_DTO:
1000 {
1001 DAPL_COOKIE_BUFFER *buffer;
1002
1003 if (DAPL_DTO_TYPE_RECV == cookie->val.dto.type) {
1004 if (srq_enabled) {
1005 dapl_os_atomic_dec(&srq_ptr->recv_count);
1006 buffer = &srq_ptr->recv_buffer;
1007 } else {
1008 dapl_os_atomic_dec(&ep_ptr->recv_count);
1009 buffer = &ep_ptr->recv_buffer;
1010 }
1011 } else {
1012 dapl_os_atomic_dec(&ep_ptr->req_count);
1013 buffer = &ep_ptr->req_buffer;
1014 }
1015
1016 event_ptr->event_number = DAT_DTO_COMPLETION_EVENT;
1017 event_ptr->event_data.dto_completion_event_data.ep_handle =
1018 ep_ptr;
1019 event_ptr->event_data.dto_completion_event_data.user_cookie =
1020 cookie->val.dto.cookie;
1021
1022 switch (ib_status) {
1023 case IB_COMP_ST_SUCCESS:
1024 {
1025 ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
1026
1027 event_ptr->event_data.dto_completion_event_data.status =
1028 DAT_DTO_SUCCESS;
1029 dapl_os_assert((ibtype == OP_SEND &&
1030 cookie->val.dto.type == DAPL_DTO_TYPE_SEND) ||
1031 (ibtype == OP_RECEIVE &&
1032 cookie->val.dto.type == DAPL_DTO_TYPE_RECV) ||
1033 (ibtype == OP_RDMA_WRITE &&
1034 cookie->val.dto.type ==
1035 DAPL_DTO_TYPE_RDMA_WRITE) ||
1036 (ibtype == OP_RDMA_READ &&
1037 cookie->val.dto.type ==
1038 DAPL_DTO_TYPE_RDMA_READ));
1039 break;
1040 }
1041 case IB_COMP_ST_LOCAL_LEN_ERR:
1042 {
1043 event_ptr->event_data.dto_completion_event_data.status =
1044 DAT_DTO_ERR_LOCAL_LENGTH;
1045 break;
1046 }
1047 case IB_COMP_ST_LOCAL_PROTECT_ERR:
1048 {
1049 event_ptr->event_data.dto_completion_event_data.status =
1050 DAT_DTO_ERR_LOCAL_PROTECTION;
1051 break;
1052 }
1053 case IB_COMP_ST_WR_FLUSHED_ERR:
1054 {
1055 event_ptr->event_data.dto_completion_event_data.status =
1056 DAT_DTO_ERR_FLUSHED;
1057 break;
1058 }
1059 case IB_COMP_ST_BAD_RESPONSE_ERR:
1060 {
1061 event_ptr->event_data.dto_completion_event_data.status =
1062 DAT_DTO_ERR_BAD_RESPONSE;
1063 break;
1064 }
1065 case IB_COMP_ST_REM_REQ_ERR:
1066 case IB_COMP_ST_REM_OP_ERR:
1067 {
1068 event_ptr->event_data.dto_completion_event_data.status =
1069 DAT_DTO_ERR_REMOTE_RESPONDER;
1070 break;
1071 }
1072 case IB_COMP_ST_REM_ACC_ERR:
1073 {
1074 event_ptr->event_data.dto_completion_event_data.status =
1075 DAT_DTO_ERR_REMOTE_ACCESS;
1076 break;
1077 }
1078 /*
1079 * Unsupported RD errors
1080 * case IB_COMP_ST_EE_STATE_ERR:
1081 * case IB_COMP_ST_EE_CTX_NO_ERR:
1082 */
1083 case IB_COMP_ST_TRANSP_COUNTER:
1084 {
1085 event_ptr->event_data.dto_completion_event_data.status =
1086 DAT_DTO_ERR_TRANSPORT;
1087 break;
1088 }
1089 case IB_COMP_ST_RNR_COUNTER:
1090 {
1091 event_ptr->event_data.dto_completion_event_data.status =
1092 DAT_DTO_ERR_RECEIVER_NOT_READY;
1093 break;
1094 }
1095 case IB_COMP_ST_MW_BIND_ERR:
1096 {
1097 event_ptr->event_data.dto_completion_event_data.status =
1098 DAT_RMR_OPERATION_FAILED;
1099 break;
1100 }
1101 case IB_COMP_ST_LOCAL_OP_ERR:
1102 {
1103 event_ptr->event_data.dto_completion_event_data.status =
1104 DAT_DTO_ERR_LOCAL_EP;
1105 break;
1106 }
1107 default:
1108 {
1109 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
1110 " DTO completion ERROR: %d: op %#x\n",
1111 DAPL_GET_CQE_STATUS(cqe_ptr),
1112 DAPL_GET_CQE_OPTYPE(cqe_ptr));
1113 event_ptr->event_data.dto_completion_event_data.status =
1114 DAT_DTO_FAILURE;
1115 break;
1116 }
1117 }
1118
1119 /* Most error DTO ops result in disconnecting the EP */
1120 if ((event_ptr->event_data.dto_completion_event_data.status !=
1121 DAT_DTO_SUCCESS) &&
1122 (event_ptr->event_data.dto_completion_event_data.status !=
1123 DAT_RMR_OPERATION_FAILED)) {
1124 dto_error = 1;
1125 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
1126 " DTO completion ERROR: %d: op %#x\n",
1127 DAPL_GET_CQE_STATUS(cqe_ptr),
1128 DAPL_GET_CQE_OPTYPE(cqe_ptr));
1129 }
1130
1131 if (cookie->val.dto.type == DAPL_DTO_TYPE_SEND ||
1132 cookie->val.dto.type == DAPL_DTO_TYPE_RDMA_WRITE) {
1133 /* Get size from DTO; CQE value may be off. */
1134 event_ptr->event_data.dto_completion_event_data.
1135 transfered_length = cookie->val.dto.size;
1136 } else {
1137 event_ptr->event_data.dto_completion_event_data.
1138 transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
1139 }
1140
1141 dapls_cookie_dealloc(buffer, cookie);
1142 break;
1143 }
1144
1145 case DAPL_COOKIE_TYPE_RMR:
1146 {
1147 dapl_os_atomic_dec(&ep_ptr->req_count);
1148
1149 event_ptr->event_number = DAT_RMR_BIND_COMPLETION_EVENT;
1150
1151 event_ptr->event_data.rmr_completion_event_data.rmr_handle =
1152 cookie->val.rmr.rmr;
1153 event_ptr->event_data.rmr_completion_event_data.user_cookie =
1154 cookie->val.rmr.cookie;
1155 if (ib_status == IB_COMP_ST_SUCCESS) {
1156 ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
1157
1158 event_ptr->event_data.rmr_completion_event_data.status =
1159 DAT_RMR_BIND_SUCCESS;
1160 dapl_os_assert(ibtype == OP_BIND_MW);
1161 } else {
1162 event_ptr->event_data.rmr_completion_event_data.status =
1163 DAT_RMR_BIND_FAILURE;
1164 dto_error = 1;
1165 }
1166
1167 dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
1168 break;
1169 }
1170 default:
1171 {
1172 dapl_os_assert(!"Invalid Operation type");
1173 break;
1174 }
1175 }
1176
1177 /*
1178 * A DTO failed this will cause the connection to be broken
1179 */
1180 if ((dto_error) && (ep_ptr->param.ep_state == DAT_EP_STATE_CONNECTED)) {
1181 ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED;
1182 /*
1183 * Disconnect at the IB level.
1184 */
1185 dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, IB_CME_CONNECTED);
1186 }
1187 /* convert premature rec to error flush on disconnect */
1188 if (process_premature_events && (ep_ptr->param.ep_state ==
1189 DAT_EP_STATE_DISCONNECTED) && (ib_status == IB_COMP_ST_SUCCESS)) {
1190 dapl_os_assert(ibtype == OP_RECEIVE &&
1191 cookie->val.dto.type == DAPL_DTO_TYPE_RECV);
1192 event_ptr->event_data.dto_completion_event_data.status =
1193 DAT_DTO_ERR_FLUSHED;
1194 }
1195 return (DAT_TRUE);
1196 }
1197
1198 /*
1199 * dapls_evd_copy_cq
1200 *
1201 * Copy all entries on a CQ associated with the EVD onto that EVD
1202 * Up to caller to handle races, if any. Note that no EVD waiters will
1203 * be awoken by this copy.
1204 *
1205 * Input:
1206 * evd_ptr
1207 *
1208 * Output:
1209 * nevents
1210 *
1211 * Returns:
1212 * none
1213 *
1214 */
1215 void
dapls_evd_copy_cq(DAPL_EVD * evd_ptr,int * nevents)1216 dapls_evd_copy_cq(
1217 DAPL_EVD *evd_ptr,
1218 int *nevents)
1219 {
1220 ib_work_completion_t cqe[MAX_CQES_PER_POLL];
1221 DAT_RETURN dat_status;
1222 ib_cq_handle_t cq_handle;
1223 DAT_EVENT *event;
1224 uint_t num_cqes_polled = 0;
1225 int cqe_events;
1226 int i;
1227
1228 cq_handle = evd_ptr->ib_cq_handle;
1229
1230 *nevents = 0;
1231
1232 if (cq_handle == IB_INVALID_HANDLE) {
1233 /* Nothing to do if no CQ. */
1234 return;
1235 }
1236 dat_status = DAPL_POLL(evd_ptr)(cq_handle,
1237 cqe, MAX_CQES_PER_POLL, &num_cqes_polled);
1238
1239 if (dat_status == DAT_SUCCESS) {
1240 dapl_dbg_log(DAPL_DBG_TYPE_EVD, "dapls_evd_copy_cq: %u\n",
1241 num_cqes_polled);
1242 cqe_events = 0;
1243 for (i = 0; i < num_cqes_polled; i++) {
1244 #ifdef DAPL_DBG /* For debugging. */
1245 dapli_evd_eh_print_cqe(cqe[i]);
1246 #endif
1247
1248 /*
1249 * Can use DAT_DTO_COMPLETION_EVENT because
1250 * dapli_evd_cqe_to_event will overwrite.
1251 */
1252
1253 event = dapli_evd_get_and_init_event(
1254 evd_ptr, DAT_DTO_COMPLETION_EVENT);
1255 if (event == NULL) {
1256 /*
1257 * We've already attempted the overflow post; return.
1258 */
1259 return;
1260 }
1261 if (dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_FALSE,
1262 event)) {
1263 dapli_evd_post_event_nosignal(evd_ptr, event);
1264 cqe_events++;
1265 } else {
1266 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1267 "dapls_evd_copy_cq: premature event\n");
1268 /*
1269 * We've deferred processing the CQE, so add
1270 * the event_ptr back to free queue
1271 */
1272 dat_status = dapls_rbuf_add(&evd_ptr->
1273 free_event_queue, (void *)event);
1274 dapl_os_assert(dat_status == DAT_SUCCESS);
1275 if (evd_ptr->evd_producer_locking_needed) {
1276 dapl_os_unlock(&evd_ptr->header.lock);
1277 }
1278 }
1279 }
1280 *nevents = cqe_events;
1281 } else if (DAT_GET_TYPE(dat_status) != DAT_QUEUE_EMPTY) {
1282 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1283 "dapls_evd_copy_cq: dapls_ib_completion_poll "
1284 "returned 0x%x\n", dat_status);
1285 dapl_os_assert(!"Bad return from dapls_ib_completion_poll");
1286 }
1287 }
1288
1289 /*
1290 * dapls_evd_copy_events
1291 *
1292 * Copy all events associated with the EVD onto that EVD
1293 *
1294 * Input:
1295 * evd_ptr
1296 * timeout
1297 *
1298 * Output:
1299 * return status
1300 *
1301 * Returns:
1302 * none
1303 *
1304 */
1305 DAT_RETURN
dapls_evd_copy_events(DAPL_EVD * evd_ptr,DAT_TIMEOUT timeout)1306 dapls_evd_copy_events(
1307 DAPL_EVD *evd_ptr,
1308 DAT_TIMEOUT timeout)
1309 {
1310 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL];
1311 dapl_ib_event_t *evpp_start;
1312 dapl_ib_event_t *evpp;
1313 DAPL_IA *ia_ptr;
1314 DAT_RETURN dat_status;
1315 int waited;
1316 uint64_t curr_time;
1317 uint64_t final_time;
1318 uint64_t time_left;
1319 int events_needed = 0;
1320 int nevents = 0;
1321 int num_cqe = 0;
1322 int num_ke = 0; /* kernel events - CM or ASYNC events */
1323 int i;
1324
1325 /* rbuf count is zero on entry */
1326
1327 if (evd_ptr->evd_flags & (DAT_EVD_CONNECTION_FLAG |
1328 DAT_EVD_CR_FLAG | DAT_EVD_ASYNC_FLAG)) {
1329 if (evd_ptr->threshold <= NUM_EVENTS_PER_POLL) {
1330 evpp = evp_arr;
1331 } else {
1332 /* need to allocate on the heap */
1333 evpp = (dapl_ib_event_t *)dapl_os_alloc(
1334 evd_ptr->threshold * sizeof (dapl_ib_event_t));
1335 if (evpp == NULL) {
1336 return (DAT_INSUFFICIENT_RESOURCES);
1337 }
1338 }
1339 evpp_start = evpp;
1340 /* for evd_dequeue, check for ke before returning Q_EMPTY */
1341 if (evd_ptr->threshold == 0 && timeout == 0)
1342 evd_ptr->threshold = 1;
1343 } else {
1344 evpp = NULL;
1345 evpp_start = NULL;
1346 }
1347 ia_ptr = evd_ptr->header.owner_ia;
1348 waited = 0;
1349 dat_status = DAT_SUCCESS;
1350
1351 /* calculate various time wait elements */
1352 if (timeout == 0) {
1353 final_time = 0;
1354 time_left = 0;
1355 } else if (timeout == DAT_TIMEOUT_INFINITE) {
1356 /*
1357 * The real value of DAT_TIMEOUT_INFINITE is fairly small
1358 * ~71 mins, to prevent premature timeouts map it to
1359 * 1 year. NOTE: 64-bit integers are needed here
1360 * because 32 bits is not enough. Other types,
1361 * such as clock_t are not 64-bit, so are not
1362 * sufficient for this. Similarly, hrtime_t is
1363 * defined as a "nanosecond counter", which does not
1364 * match our need for time in microseconds, so we
1365 * just use the more general uint64_t here.
1366 */
1367 #define DAPL_ONE_YEAR_IN_USEC ((365 * 24 * 3600) * 1000000LL)
1368 curr_time = gethrtime();
1369 time_left = DAPL_ONE_YEAR_IN_USEC;
1370 final_time = curr_time + DAPL_ONE_YEAR_IN_USEC * 1000;
1371 } else {
1372 /*
1373 * maximum time by which the routine needs to return
1374 * DAT_TIMEOUT_INFINITE is defined as ~0 but its of type int
1375 * so mask the MSB to avoid overflow
1376 */
1377 curr_time = gethrtime();
1378 final_time = curr_time + (uint64_t)(timeout&0x7fffffff)*1000;
1379 time_left = (final_time - curr_time)/1000;
1380 }
1381
1382 do {
1383 /*
1384 * If this evd has a CQ event stream check the CQs first
1385 */
1386 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG |
1387 DAT_EVD_RMR_BIND_FLAG)) {
1388 /*
1389 * Poll CQ for events, update the total number of CQEs
1390 * so far
1391 */
1392 nevents = 0;
1393 dapls_evd_copy_cq(evd_ptr, &nevents);
1394 num_cqe += nevents;
1395 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1396 "dapls_evd_copy_event: copy_cq num_cqe(%d)\n",
1397 num_cqe);
1398 }
1399
1400 /*
1401 * We use the dapls_rbuf_count since it includes
1402 * - CQ events pulled by dapls_evd_copy_cq
1403 * - events added by dat_evd_post_se()
1404 */
1405 events_needed = evd_ptr->threshold - num_ke -
1406 dapls_rbuf_count(&evd_ptr->pending_event_queue);
1407
1408 /*
1409 * check for pending events
1410 * note: threshold=0 implies dapl_evd_dequeue
1411 */
1412 if (events_needed < 0) {
1413 /* There are more than sufficient events */
1414 break;
1415 } else if (events_needed == 0) {
1416 /* report queue empty on dat_evd_dequeue */
1417 /* non CQ events are expected to be polled */
1418 /* by dat_evd_wait */
1419 if (evd_ptr->threshold == 0)
1420 dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0);
1421 /*
1422 * when threshold > 0, we have sufficient events
1423 */
1424 break;
1425 } else {
1426 /*
1427 * when we reach here, this implies dat_evd_wait
1428 * return on any dto completion as
1429 * threshold > 1 will be taken as hint only
1430 */
1431 if (num_cqe)
1432 break;
1433 }
1434
1435 /* check we've already waited */
1436 if (waited > 0) {
1437 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1438 "dapls_evd_copy_event: waited[%d]\n", waited);
1439 if (dat_status != DAT_SUCCESS)
1440 break;
1441 curr_time = gethrtime();
1442 /* exit on time expired */
1443 if (curr_time >= final_time)
1444 break;
1445 time_left = (final_time - curr_time)/1000;
1446 }
1447
1448 /* check for DTO type evd's */
1449 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG |
1450 DAT_EVD_RMR_BIND_FLAG)) {
1451 if (events_needed == 1) {
1452 /*
1453 * Need only one event so enable cq
1454 * notification
1455 */
1456 /*
1457 * XXX: Things need to be modified here to
1458 * implement the NOTIFICATION suppression
1459 * correctly - relies on THRESHOLD flag
1460 * and UNSIGNALLED flag to be stored
1461 * in the evd.
1462 */
1463 dat_status = dapls_set_cq_notify(ia_ptr,
1464 evd_ptr);
1465 if (dat_status != DAT_SUCCESS) {
1466 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1467 "dapls_evd_copy_event:"
1468 " set_cq_notify(%d)\n", dat_status);
1469 return (dat_status);
1470 }
1471 } else if (events_needed > 1) {
1472 /*
1473 * We need multiple events so lets enable CQ for
1474 * notification on N events.
1475 * dat_status = dapls_set_cqN_notify(ia_ptr,
1476 * evd_ptr, (uint32_t)events_needed);
1477 */
1478 dat_status = dapls_set_cq_notify(ia_ptr,
1479 evd_ptr);
1480 if (dat_status != DAT_SUCCESS) {
1481 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1482 "dapls_evd_copy_event:"
1483 " set_cqN_notify:%d\n", dat_status);
1484 return (dat_status);
1485 }
1486 }
1487
1488 /*
1489 * Per Tavor PRM if completions occur after polling
1490 * the CQ and before arming it, upon arming the CQ
1491 * handler will be immediately fired. Hence it
1492 * recommends that a re-poll of the CQ can be skipped
1493 * as an optimization.
1494 */
1495 }
1496
1497 nevents = 0;
1498
1499 /*
1500 * non-NULL evpp_start denotes either
1501 * DAT_EVD_CONNECTION_FLAG, DAT_EVD_CR_FLAG, DAT_EVD_ASYNC_FLAG
1502 * is set and thus needs to check events from kernel
1503 */
1504 if (evpp_start) {
1505 /*
1506 * Even if dat_status is not DAT_SUCCESS, num_events
1507 * could be non-zero.
1508 */
1509 dat_status = dapls_ib_event_poll(evd_ptr, time_left,
1510 (evd_ptr->threshold - (num_cqe + num_ke)), evpp,
1511 &nevents);
1512 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1513 "dapls_evd_copy_event: poll returned 0x%x(%d)\n",
1514 dat_status, nevents);
1515
1516 num_ke += nevents;
1517 evpp += nevents;
1518 } else {
1519 /* perform a timewait */
1520 dat_status = dapls_ib_event_poll(evd_ptr, time_left,
1521 0, NULL, &nevents);
1522 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1523 "dapls_evd_copy_event: poll(cq_notification) "
1524 "returned 0x%x\n", dat_status);
1525 if (DAT_GET_TYPE(dat_status) == DAT_INTERRUPTED_CALL)
1526 return (dat_status);
1527 }
1528
1529 waited++;
1530 } while (dapls_rbuf_count(&evd_ptr->pending_event_queue) + num_ke <
1531 evd_ptr->threshold);
1532
1533 /* process the cm events now */
1534 for (i = 0; i < num_ke; i++) {
1535 switch (evpp_start[i].ibe_ev_family) {
1536 case DAPL_CR_EVENTS: /* PASSIVE side events */
1537 case DAPL_PASSIVE_CONNECTION_EVENTS:
1538 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1539 "dapls_evd_copy_event: Passive side Event %d\n",
1540 evpp_start[i].ibe_ce.ibce_event);
1541 dapls_cr_callback((ib_cm_handle_t)
1542 evpp_start[i].ibe_ce.ibce_psep_cookie,
1543 evpp_start[i].ibe_ce.ibce_event,
1544 evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *)
1545 (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie);
1546 break;
1547 case DAPL_ACTIVE_CONNECTION_EVENTS: /* ACTIVE side events */
1548 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1549 "dapls_evd_copy_event: Active Conn Event %d\n",
1550 evpp_start[i].ibe_ce.ibce_event);
1551 dapl_evd_connection_callback((ib_cm_handle_t)
1552 IB_INVALID_HANDLE,
1553 evpp_start[i].ibe_ce.ibce_event,
1554 evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *)
1555 (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie);
1556 break;
1557 case DAPL_ASYNC_EVENTS:
1558 dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1559 "dapls_evd_copy_event: Async Event %d\n",
1560 evpp_start[i].ibe_async.ibae_type);
1561 dapls_ib_async_callback(evd_ptr,
1562 ia_ptr->hca_ptr->ib_hca_handle,
1563 &(evpp_start[i].ibe_async), ia_ptr);
1564 break;
1565 default:
1566 dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1567 "dapls_evd_copy_event: dapls_ib_event_poll %d "
1568 "returned 0x%x\n", i, evpp_start[i].ibe_ev_family);
1569 dapl_os_assert(!"Bad return from dapls_ib_event_poll");
1570 break;
1571 }
1572 }
1573
1574 return (dat_status);
1575 }
1576
1577 /*
1578 * dapls_evd_cq_poll_to_event
1579 *
1580 * Attempt to dequeue a single CQE from a CQ and turn it into
1581 * an event.
1582 *
1583 * Input:
1584 * evd_ptr
1585 *
1586 * Output:
1587 * event
1588 *
1589 * Returns:
1590 * Status of operation
1591 *
1592 */
1593 DAT_RETURN
dapls_evd_cq_poll_to_event(IN DAPL_EVD * evd_ptr,OUT DAT_EVENT * event)1594 dapls_evd_cq_poll_to_event(
1595 IN DAPL_EVD *evd_ptr,
1596 OUT DAT_EVENT *event)
1597 {
1598 DAT_RETURN dat_status;
1599 ib_work_completion_t cur_cqe;
1600
1601 /* skip one layer of do-nothing function */
1602 dat_status = DAPL_POLL1(evd_ptr)(evd_ptr->ib_cq_handle, &cur_cqe);
1603
1604 if (dat_status == DAT_SUCCESS) {
1605 #ifdef DAPL_DBG /* For debugging. */
1606 dapli_evd_eh_print_cqe(cur_cqe);
1607 #endif
1608 (void) dapli_evd_cqe_to_event(evd_ptr, &cur_cqe, DAT_FALSE,
1609 event);
1610 }
1611
1612 return (dat_status);
1613 }
1614
1615 /*
1616 * Local variables:
1617 * c-indent-level: 4
1618 * c-basic-offset: 4
1619 * tab-width: 8
1620 * End:
1621 */
1622