1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 /* 32 * 33 * MODULE: dapl_evd_util.c 34 * 35 * PURPOSE: Manage EVD Info structure 36 * 37 * $Id: dapl_evd_util.c,v 1.41 2003/08/20 13:18:36 sjs2 Exp $ 38 */ 39 40 #include <sys/time.h> 41 #include <strings.h> 42 #include "dapl_evd_util.h" 43 #include "dapl_ia_util.h" 44 #include "dapl_cno_util.h" 45 #include "dapl_ring_buffer_util.h" 46 #include "dapl_adapter_util.h" 47 #include "dapl_tavor_ibtf_impl.h" 48 #include "dapl_cookie.h" 49 #include "dapl.h" 50 51 52 #ifdef DAPL_DBG /* For debugging. */ 53 static void 54 dapli_evd_eh_print_cqe( 55 IN ib_work_completion_t cqe); 56 #endif 57 58 static DAT_BOOLEAN 59 dapli_evd_cqe_to_event( 60 IN DAPL_EVD *evd_ptr, 61 IN ib_work_completion_t *cqe_ptr, 62 IN DAT_BOOLEAN process_premature_events, 63 OUT DAT_EVENT *event_ptr); 64 65 static DAT_RETURN 66 dapli_evd_event_alloc( 67 IN DAPL_EVD *evd_ptr, 68 IN DAPL_CNO *cno_ptr, 69 IN DAT_COUNT qlen); 70 71 72 /* 73 * dapls_evd_internal_create 74 * 75 * actually create the evd. this is called after all parameter checking 76 * has been performed in dapl_ep_create. it is also called from dapl_ia_open 77 * to create the default async evd. 78 * 79 * Input: 80 * ia_ptr 81 * cno_ptr 82 * qlen 83 * evd_flags 84 * 85 * Output: 86 * evd_ptr_ptr 87 * 88 * Returns: 89 * none 90 * 91 */ 92 93 DAT_RETURN 94 dapls_evd_internal_create( 95 DAPL_IA *ia_ptr, 96 DAPL_CNO *cno_ptr, 97 DAT_COUNT min_qlen, 98 DAT_EVD_FLAGS evd_flags, 99 DAPL_EVD **evd_ptr_ptr) 100 { 101 DAPL_EVD *evd_ptr; 102 DAT_COUNT cq_len; 103 DAT_RETURN dat_status; 104 105 dat_status = DAT_SUCCESS; 106 *evd_ptr_ptr = NULL; 107 cq_len = min_qlen; 108 109 evd_ptr = dapls_evd_alloc(ia_ptr, 110 cno_ptr, 111 evd_flags, 112 min_qlen); 113 if (!evd_ptr) { 114 dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, 115 DAT_RESOURCE_MEMORY); 116 goto bail; 117 } 118 119 /* 120 * If we are dealing with event streams besides a CQ event stream, 121 * be conservative and set producer side locking. Otherwise, no. 122 */ 123 evd_ptr->evd_producer_locking_needed = 124 ((evd_flags & ~ (DAT_EVD_DTO_FLAG|DAT_EVD_RMR_BIND_FLAG)) != 0); 125 126 /* Before we setup any callbacks, transition state to OPEN. */ 127 evd_ptr->evd_state = DAPL_EVD_STATE_OPEN; 128 129 /* 130 * we need to call cq_alloc even for connection/cr/async evds 131 * since all the allocation happens there. 132 */ 133 dat_status = dapls_ib_cq_alloc(ia_ptr, 134 evd_ptr, cno_ptr, &cq_len); 135 if (dat_status != DAT_SUCCESS) { 136 goto bail; 137 } 138 139 #if 0 140 /* 141 * Current implementation of dapls_ib_setup_async_callback() does 142 * nothing and returns DAT_SUCCESS. However, it is declared to expect 143 * function pointers with different signatures. We do leave the code 144 * block out till dapls_ib_setup_async_callback() is implemented. 145 */ 146 dat_status = dapls_ib_setup_async_callback( 147 ia_ptr, 148 DAPL_ASYNC_CQ_COMPLETION, 149 (unsigned int *) evd_ptr->ib_cq_handle, 150 (ib_async_handler_t)dapl_evd_dto_callback, 151 evd_ptr); 152 if (dat_status != DAT_SUCCESS) { 153 goto bail; 154 } 155 #endif 156 /* 157 * cq_notify is not required since when evd_wait is called 158 * time we go and poll cq anyways. 159 * dat_status = dapls_set_cq_notify(ia_ptr, evd_ptr); 160 */ 161 162 /* 163 * We now have an accurate count of events, so allocate them into 164 * the EVD 165 */ 166 dat_status = dapli_evd_event_alloc(evd_ptr, cno_ptr, cq_len); 167 if (dat_status != DAT_SUCCESS) { 168 goto bail; 169 } 170 171 /* We're assuming success in the following. */ 172 dapl_os_assert(dat_status == DAT_SUCCESS); 173 dapl_ia_link_evd(ia_ptr, evd_ptr); 174 *evd_ptr_ptr = evd_ptr; 175 176 bail: 177 if (dat_status != DAT_SUCCESS) { 178 if (evd_ptr) { 179 (void) dapls_evd_dealloc(evd_ptr); 180 } 181 } 182 183 return (dat_status); 184 } 185 186 /* 187 * dapls_evd_alloc 188 * 189 * alloc and initialize an EVD struct 190 * 191 * Input: 192 * ia 193 * 194 * Output: 195 * evd_ptr 196 * 197 * Returns: 198 * none 199 * 200 */ 201 DAPL_EVD * 202 dapls_evd_alloc( 203 IN DAPL_IA *ia_ptr, 204 IN DAPL_CNO *cno_ptr, 205 IN DAT_EVD_FLAGS evd_flags, 206 IN DAT_COUNT qlen) /* ARGSUSED */ 207 { 208 DAPL_EVD *evd_ptr; 209 210 evd_ptr = NULL; 211 212 /* Allocate EVD */ 213 evd_ptr = (DAPL_EVD *)dapl_os_alloc(sizeof (DAPL_EVD)); 214 if (!evd_ptr) { 215 goto bail; 216 } 217 218 /* zero the structure */ 219 (void) dapl_os_memzero(evd_ptr, sizeof (DAPL_EVD)); 220 221 /* 222 * initialize the header 223 */ 224 evd_ptr->header.provider = ia_ptr->header.provider; 225 evd_ptr->header.magic = DAPL_MAGIC_EVD; 226 evd_ptr->header.handle_type = DAT_HANDLE_TYPE_EVD; 227 evd_ptr->header.owner_ia = ia_ptr; 228 evd_ptr->header.user_context.as_64 = 0; 229 evd_ptr->header.user_context.as_ptr = NULL; 230 dapl_llist_init_entry(&evd_ptr->header.ia_list_entry); 231 dapl_os_lock_init(&evd_ptr->header.lock); 232 233 /* 234 * Initialize the body 235 */ 236 evd_ptr->evd_state = DAPL_EVD_STATE_INITIAL; 237 evd_ptr->evd_flags = evd_flags; 238 evd_ptr->evd_enabled = DAT_TRUE; 239 evd_ptr->evd_waitable = DAT_TRUE; 240 evd_ptr->evd_producer_locking_needed = 1; /* Conservative value. */ 241 evd_ptr->ib_cq_handle = IB_INVALID_HANDLE; 242 evd_ptr->evd_ref_count = 0; 243 evd_ptr->catastrophic_overflow = DAT_FALSE; 244 evd_ptr->qlen = qlen; 245 246 dapl_llist_init_entry(&evd_ptr->cno_list_entry); 247 evd_ptr->completion_type = DAPL_EVD_STATE_THRESHOLD; 248 (void) dapl_os_wait_object_init(&evd_ptr->wait_object); 249 250 bail: 251 return (evd_ptr); 252 } 253 254 255 /* 256 * dapls_evd_event_alloc 257 * 258 * alloc events into an EVD. 259 * 260 * Input: 261 * evd_ptr 262 * qlen 263 * 264 * Output: 265 * NONE 266 * 267 * Returns: 268 * DAT_SUCCESS 269 * ERROR 270 * 271 */ 272 DAT_RETURN 273 dapli_evd_event_alloc( 274 IN DAPL_EVD *evd_ptr, 275 IN DAPL_CNO *cno_ptr, 276 IN DAT_COUNT qlen) 277 { 278 DAT_EVENT *event_ptr; 279 DAT_COUNT i; 280 DAT_RETURN dat_status; 281 282 dat_status = DAT_SUCCESS; 283 event_ptr = NULL; 284 285 /* Allocate EVENTs */ 286 event_ptr = (DAT_EVENT *) dapl_os_alloc(qlen * sizeof (DAT_EVENT)); 287 if (!event_ptr) { 288 goto bail; 289 } 290 evd_ptr->events = event_ptr; 291 evd_ptr->qlen = qlen; 292 293 /* allocate free event queue */ 294 dat_status = dapls_rbuf_alloc(&evd_ptr->free_event_queue, qlen); 295 if (dat_status != DAT_SUCCESS) { 296 goto bail; 297 } 298 299 /* allocate pending event queue */ 300 dat_status = dapls_rbuf_alloc(&evd_ptr->pending_event_queue, qlen); 301 if (dat_status != DAT_SUCCESS) { 302 goto bail; 303 } 304 305 /* add events to free event queue */ 306 for (i = 0; i < qlen; i++) { 307 dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue, 308 (void *)event_ptr); 309 dapl_os_assert(dat_status == DAT_SUCCESS); 310 event_ptr++; 311 } 312 evd_ptr->cq_notified = DAT_FALSE; 313 evd_ptr->cq_notified_when = 0; 314 evd_ptr->cno_active_count = 0; 315 if (cno_ptr != NULL) { 316 dapl_os_lock(&cno_ptr->header.lock); 317 dapl_llist_add_head(&cno_ptr->evd_list_head, 318 &evd_ptr->cno_list_entry, evd_ptr); 319 /* Take a reference count on the CNO */ 320 dapl_os_atomic_inc(&cno_ptr->cno_ref_count); 321 dapl_os_unlock(&cno_ptr->header.lock); 322 } 323 evd_ptr->cno_ptr = cno_ptr; 324 evd_ptr->threshold = 0; 325 326 bail: 327 return (dat_status); 328 } 329 330 331 /* 332 * dapls_evd_dealloc 333 * 334 * Free the passed in EVD structure. If an error occurs, this function 335 * will clean up all of the internal data structures and report the 336 * error. 337 * 338 * Input: 339 * evd_ptr 340 * 341 * Output: 342 * none 343 * 344 * Returns: 345 * status 346 * 347 */ 348 DAT_RETURN 349 dapls_evd_dealloc( 350 IN DAPL_EVD *evd_ptr) 351 { 352 DAT_RETURN dat_status; 353 DAPL_IA *ia_ptr; 354 355 dat_status = DAT_SUCCESS; 356 357 dapl_os_assert(evd_ptr->header.magic == DAPL_MAGIC_EVD); 358 dapl_os_assert(evd_ptr->evd_ref_count == 0); 359 360 /* 361 * Destroy the CQ first, to keep any more callbacks from coming 362 * up from it. 363 */ 364 if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) { 365 ia_ptr = evd_ptr->header.owner_ia; 366 367 dat_status = dapls_ib_cq_free(ia_ptr, evd_ptr); 368 if (dat_status != DAT_SUCCESS) { 369 goto bail; 370 } 371 } 372 373 /* 374 * We should now be safe to invalidate the EVD; reset the 375 * magic to prevent reuse. 376 */ 377 evd_ptr->header.magic = DAPL_MAGIC_INVALID; 378 379 /* Release reference on the CNO if it exists */ 380 if (evd_ptr->cno_ptr != NULL) { 381 dapl_os_lock(&evd_ptr->cno_ptr->header.lock); 382 (void) dapl_llist_remove_entry(&evd_ptr->cno_ptr->evd_list_head, 383 &evd_ptr->cno_list_entry); 384 dapl_os_atomic_dec(&evd_ptr->cno_ptr->cno_ref_count); 385 dapl_os_unlock(&evd_ptr->cno_ptr->header.lock); 386 } 387 388 /* 389 * If the ring buffer allocation failed, then the dapls_rbuf_destroy 390 * function will detect that the ring buffer's internal data (ex. base 391 * pointer) are invalid and will handle the situation appropriately 392 */ 393 dapls_rbuf_destroy(&evd_ptr->free_event_queue); 394 dapls_rbuf_destroy(&evd_ptr->pending_event_queue); 395 396 if (evd_ptr->events) { 397 dapl_os_free(evd_ptr->events, 398 evd_ptr->qlen * sizeof (DAT_EVENT)); 399 } 400 401 (void) dapl_os_wait_object_destroy(&evd_ptr->wait_object); 402 dapl_os_free(evd_ptr, sizeof (DAPL_EVD)); 403 404 bail: 405 return (dat_status); 406 } 407 408 409 /* 410 * dapli_evd_eh_print_cqe 411 * 412 * Input: 413 * cqe 414 * 415 * Output: 416 * none 417 * 418 * Prints out a CQE for debug purposes 419 * 420 */ 421 422 #ifdef DAPL_DBG /* For debugging. */ 423 void 424 dapli_evd_eh_print_cqe(IN ib_work_completion_t cqe) 425 { 426 static char *optable[] = { 427 "", 428 "OP_SEND", 429 "OP_RDMA_READ", 430 "OP_RDMA_WRITE", 431 "OP_COMP_AND_SWAP", 432 "OP_FETCH_AND_ADD", 433 "OP_BIND_MW", 434 "OP_RECEIVE", 435 "OP_RECEIVE_RDMAWI", 436 0 437 }; 438 DAPL_COOKIE *dto_cookie; 439 440 dto_cookie = (DAPL_COOKIE *) (uintptr_t)DAPL_GET_CQE_WRID(&cqe); 441 442 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 443 "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n"); 444 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 445 "\t dapl_evd_dto_callback : CQE \n"); 446 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 447 "\t\t work_req_id 0x%llx\n", DAPL_GET_CQE_WRID(&cqe)); 448 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 449 "\t\t op_type: %s\n", optable[DAPL_GET_CQE_OPTYPE(&cqe)]); 450 if ((DAPL_GET_CQE_OPTYPE(&cqe) == OP_SEND) || 451 (DAPL_GET_CQE_OPTYPE(&cqe) == OP_RDMA_WRITE)) { 452 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 453 "\t\t bytes_num %d\n", dto_cookie->val.dto.size); 454 } else { 455 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 456 "\t\t bytes_num %d\n", DAPL_GET_CQE_BYTESNUM(&cqe)); 457 } 458 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 459 "\t\t status %d\n", DAPL_GET_CQE_STATUS(&cqe)); 460 dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK, 461 "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n"); 462 } 463 #endif 464 465 /* 466 * Event posting code follows. 467 */ 468 469 /* 470 * These next two functions (dapli_evd_get_event and dapli_evd_post_event) 471 * are a pair. They are always called together, from one of the functions 472 * at the end of this file (dapl_evd_post_*_event). 473 * 474 * Note that if producer side locking is enabled, the first one takes the 475 * EVD lock and the second releases it. 476 */ 477 478 /* 479 * dapli_evd_get_event 480 * 481 * Get an event struct from the evd. The caller should fill in the event 482 * and call dapl_evd_post_event. 483 * 484 * If there are no events available, an overflow event is generated to the 485 * async EVD handler. 486 * 487 * If this EVD required producer locking, a successful return implies 488 * that the lock is held. 489 * 490 * Input: 491 * evd_ptr 492 * 493 * Output: 494 * event 495 * 496 */ 497 498 static DAT_EVENT * 499 dapli_evd_get_event( 500 DAPL_EVD *evd_ptr) 501 { 502 DAT_EVENT *event; 503 504 if (evd_ptr->evd_producer_locking_needed) { 505 dapl_os_lock(&evd_ptr->header.lock); 506 } 507 508 event = (DAT_EVENT *)dapls_rbuf_remove(&evd_ptr->free_event_queue); 509 510 /* Release the lock if it was taken and the call failed. */ 511 if (!event && evd_ptr->evd_producer_locking_needed) { 512 dapl_os_unlock(&evd_ptr->header.lock); 513 } 514 515 return (event); 516 } 517 518 /* 519 * dapli_evd_post_event 520 * 521 * Post the <event> to the evd. If possible, invoke the evd's CNO. 522 * Otherwise post the event on the pending queue. 523 * 524 * If producer side locking is required, the EVD lock must be held upon 525 * entry to this function. 526 * 527 * Input: 528 * evd_ptr 529 * event 530 * 531 * Output: 532 * none 533 * 534 */ 535 536 static void 537 dapli_evd_post_event( 538 IN DAPL_EVD *evd_ptr, 539 IN const DAT_EVENT *event_ptr) 540 { 541 DAT_RETURN dat_status; 542 DAPL_CNO *cno_to_trigger = NULL; 543 544 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 545 "dapli_evd_post_event: Called with event # %x\n", 546 event_ptr->event_number); 547 548 dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue, 549 (void *)event_ptr); 550 dapl_os_assert(dat_status == DAT_SUCCESS); 551 552 dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED || 553 evd_ptr->evd_state == DAPL_EVD_STATE_OPEN); 554 555 if (evd_ptr->evd_state == DAPL_EVD_STATE_OPEN) { 556 /* No waiter. Arrange to trigger a CNO if it exists. */ 557 558 if (evd_ptr->evd_enabled) { 559 cno_to_trigger = evd_ptr->cno_ptr; 560 } 561 if (evd_ptr->evd_producer_locking_needed) { 562 dapl_os_unlock(&evd_ptr->header.lock); 563 } 564 } else { 565 /* 566 * This routine gets called 567 * - In the context of the waiting thread when CQ, CM or ASYNC 568 * events need to be put on to the EVD ring buffer. 569 * - Due to a post of a software event. 570 * 571 * In the first case the waiting thread is pulling the events 572 * from various streams into the evd so there is no need to 573 * wake any thread. In the second case if the evd is in waited 574 * state then we need to wakeup the waiting thread. 575 */ 576 if (event_ptr->event_number == DAT_SOFTWARE_EVENT) { 577 /* 578 * We're in DAPL_EVD_STATE_WAITED. Take the lock if 579 * we don't have it, recheck, and signal. 580 */ 581 582 if (!evd_ptr->evd_producer_locking_needed) { 583 dapl_os_lock(&evd_ptr->header.lock); 584 } 585 586 if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED) { 587 dapl_os_unlock(&evd_ptr->header.lock); 588 (void) dapls_ib_event_wakeup(evd_ptr); 589 } else { 590 dapl_os_unlock(&evd_ptr->header.lock); 591 } 592 } else { 593 if (evd_ptr->evd_producer_locking_needed) { 594 dapl_os_unlock(&evd_ptr->header.lock); 595 } 596 } 597 } 598 599 if (cno_to_trigger != NULL) { 600 dapl_cno_trigger(cno_to_trigger, evd_ptr); 601 } 602 } 603 604 /* 605 * dapli_evd_post_event_nosignal 606 * 607 * Post the <event> to the evd. Do not do any wakeup processing. 608 * This function should only be called if it is known that there are 609 * no waiters that it is appropriate to wakeup on this EVD. An example 610 * of such a situation is during internal dat_evd_wait() processing. 611 * 612 * If producer side locking is required, the EVD lock must be held upon 613 * entry to this function. 614 * 615 * Input: 616 * evd_ptr 617 * event 618 * 619 * Output: 620 * none 621 * 622 */ 623 624 static void 625 dapli_evd_post_event_nosignal( 626 IN DAPL_EVD *evd_ptr, 627 IN const DAT_EVENT *event_ptr) 628 { 629 DAT_RETURN dat_status; 630 631 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 632 "dapli_evd_post_event: Called with event # %x\n", 633 event_ptr->event_number); 634 635 dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue, 636 (void *)event_ptr); 637 dapl_os_assert(dat_status == DAT_SUCCESS); 638 639 dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED || 640 evd_ptr->evd_state == DAPL_EVD_STATE_OPEN); 641 642 if (evd_ptr->evd_producer_locking_needed) { 643 dapl_os_unlock(&evd_ptr->header.lock); 644 } 645 } 646 647 /* 648 * dapli_evd_format_overflow_event 649 * 650 * format an overflow event for posting 651 * 652 * Input: 653 * evd_ptr 654 * event_ptr 655 * 656 * Output: 657 * none 658 * 659 */ 660 static void 661 dapli_evd_format_overflow_event( 662 IN DAPL_EVD *evd_ptr, 663 OUT DAT_EVENT *event_ptr) 664 { 665 DAPL_IA *ia_ptr; 666 667 ia_ptr = evd_ptr->header.owner_ia; 668 669 event_ptr->evd_handle = (DAT_EVD_HANDLE)evd_ptr; 670 event_ptr->event_number = DAT_ASYNC_ERROR_EVD_OVERFLOW; 671 event_ptr->event_data.asynch_error_event_data.dat_handle = 672 (DAT_HANDLE)ia_ptr; 673 } 674 675 /* 676 * dapli_evd_post_overflow_event 677 * 678 * post an overflow event 679 * 680 * Input: 681 * async_evd_ptr 682 * evd_ptr 683 * 684 * Output: 685 * none 686 * 687 */ 688 static void 689 dapli_evd_post_overflow_event( 690 IN DAPL_EVD *async_evd_ptr, 691 IN DAPL_EVD *overflow_evd_ptr) 692 { 693 DAT_EVENT *overflow_event; 694 695 /* 696 * The overflow_evd_ptr mght be the same as evd. 697 * In that case we've got a catastrophic overflow. 698 */ 699 if (async_evd_ptr == overflow_evd_ptr) { 700 async_evd_ptr->catastrophic_overflow = DAT_TRUE; 701 async_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD; 702 return; 703 } 704 705 overflow_event = dapli_evd_get_event(overflow_evd_ptr); 706 if (!overflow_event) { 707 /* this is not good */ 708 overflow_evd_ptr->catastrophic_overflow = DAT_TRUE; 709 overflow_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD; 710 return; 711 } 712 dapli_evd_format_overflow_event(overflow_evd_ptr, overflow_event); 713 dapli_evd_post_event(overflow_evd_ptr, overflow_event); 714 } 715 716 static DAT_EVENT * 717 dapli_evd_get_and_init_event( 718 IN DAPL_EVD *evd_ptr, 719 IN DAT_EVENT_NUMBER event_number) 720 { 721 DAT_EVENT *event_ptr; 722 723 event_ptr = dapli_evd_get_event(evd_ptr); 724 if (NULL == event_ptr) { 725 dapli_evd_post_overflow_event( 726 evd_ptr->header.owner_ia->async_error_evd, evd_ptr); 727 } else { 728 event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr; 729 event_ptr->event_number = event_number; 730 } 731 732 return (event_ptr); 733 } 734 735 DAT_RETURN 736 dapls_evd_post_cr_arrival_event( 737 IN DAPL_EVD *evd_ptr, 738 IN DAT_EVENT_NUMBER event_number, 739 IN DAT_SP_HANDLE sp_handle, 740 DAT_IA_ADDRESS_PTR ia_address_ptr, 741 DAT_CONN_QUAL conn_qual, 742 DAT_CR_HANDLE cr_handle) 743 { 744 DAT_EVENT *event_ptr; 745 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number); 746 /* 747 * Note event lock may be held on successful return 748 * to be released by dapli_evd_post_event(), if provider side locking 749 * is needed. 750 */ 751 752 if (!event_ptr) { 753 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY); 754 } 755 756 event_ptr->event_data.cr_arrival_event_data.sp_handle = sp_handle; 757 event_ptr->event_data.cr_arrival_event_data.local_ia_address_ptr 758 = ia_address_ptr; 759 event_ptr->event_data.cr_arrival_event_data.conn_qual = conn_qual; 760 event_ptr->event_data.cr_arrival_event_data.cr_handle = cr_handle; 761 762 dapli_evd_post_event(evd_ptr, event_ptr); 763 return (DAT_SUCCESS); 764 } 765 766 767 DAT_RETURN 768 dapls_evd_post_connection_event( 769 IN DAPL_EVD *evd_ptr, 770 IN DAT_EVENT_NUMBER event_number, 771 IN DAT_EP_HANDLE ep_handle, 772 IN DAT_COUNT private_data_size, 773 IN DAT_PVOID private_data) 774 { 775 DAT_EVENT *event_ptr; 776 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number); 777 /* 778 * Note event lock may be held on successful return 779 * to be released by dapli_evd_post_event(), if provider side locking 780 * is needed. 781 */ 782 783 if (!event_ptr) { 784 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY); 785 } 786 787 event_ptr->event_data.connect_event_data.ep_handle = ep_handle; 788 event_ptr->event_data.connect_event_data.private_data_size 789 = private_data_size; 790 event_ptr->event_data.connect_event_data.private_data = private_data; 791 792 dapli_evd_post_event(evd_ptr, event_ptr); 793 return (DAT_SUCCESS); 794 } 795 796 797 DAT_RETURN 798 dapls_evd_post_async_error_event( 799 IN DAPL_EVD *evd_ptr, 800 IN DAT_EVENT_NUMBER event_number, 801 IN DAT_IA_HANDLE ia_handle) 802 { 803 DAT_EVENT *event_ptr; 804 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number); 805 /* 806 * Note event lock may be held on successful return 807 * to be released by dapli_evd_post_event(), if provider side locking 808 * is needed. 809 */ 810 811 if (!event_ptr) { 812 return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY); 813 } 814 815 event_ptr->event_data.asynch_error_event_data.dat_handle = ia_handle; 816 817 dapli_evd_post_event(evd_ptr, event_ptr); 818 return (DAT_SUCCESS); 819 } 820 821 822 DAT_RETURN 823 dapls_evd_post_software_event( 824 IN DAPL_EVD *evd_ptr, 825 IN DAT_EVENT_NUMBER event_number, 826 IN DAT_PVOID pointer) 827 { 828 DAT_EVENT *event_ptr; 829 event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number); 830 /* 831 * Note event lock may be held on successful return 832 * to be released by dapli_evd_post_event(), if provider side locking 833 * is needed. 834 */ 835 836 if (!event_ptr) { 837 return (DAT_QUEUE_FULL); 838 } 839 840 event_ptr->event_data.software_event_data.pointer = pointer; 841 842 dapli_evd_post_event(evd_ptr, event_ptr); 843 return (DAT_SUCCESS); 844 } 845 846 void 847 dapls_evd_post_premature_events(IN DAPL_EP *ep_ptr) 848 { 849 DAPL_EVD *evd_ptr; 850 DAT_EVENT *event; 851 ib_work_completion_t *cqe; 852 uint32_t qpn; 853 int prm_idx; 854 int nevents; 855 int i; 856 857 dapls_ib_poll_premature_events(ep_ptr, &cqe, &nevents); 858 /* premature events are always recv events */ 859 evd_ptr = ep_ptr->param.recv_evd_handle; 860 qpn = ep_ptr->qpn; 861 862 i = 0; 863 prm_idx = 0; 864 while (i < nevents) { 865 /* 866 * If srq_attached, premature events cannot exceed max_recv_dtos 867 */ 868 dapl_os_assert(!ep_ptr->srq_attached || 869 (prm_idx <= ((DAPL_SRQ *)ep_ptr->param.srq_handle)-> 870 param.max_recv_dtos)); 871 872 /* 873 * The SRQ premature event list could potentially have 874 * holes (ie. free entries in the middle) or premature 875 * events for other QPs. These need to be skipped. 876 */ 877 if (ep_ptr->srq_attached && 878 (!DAPL_CQE_IS_VALID(&cqe[prm_idx]) || 879 (DAPL_GET_CQE_QPN(&cqe[prm_idx]) != qpn))) { 880 prm_idx++; 881 continue; 882 } 883 884 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR, 885 " Premature DTO processing\n"); 886 887 #ifdef DAPL_DBG /* For debugging. */ 888 dapli_evd_eh_print_cqe(cqe[i]); 889 #endif 890 /* 891 * Can use DAT_DTO_COMPLETION_EVENT because 892 * dapli_evd_cqe_to_event will overwrite. 893 */ 894 event = dapli_evd_get_and_init_event(evd_ptr, 895 DAT_DTO_COMPLETION_EVENT); 896 if (event == NULL) { 897 /* We've already attempted the overflow post, return */ 898 return; 899 } 900 (void) dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_TRUE, 901 event); 902 dapli_evd_post_event_nosignal(evd_ptr, event); 903 /* 904 * For SRQ attached QPs recycle the premature event 905 */ 906 if (ep_ptr->srq_attached) { 907 dapls_ib_free_premature_events(ep_ptr, prm_idx); 908 prm_idx++; 909 } 910 i++; 911 } 912 } 913 914 /* 915 * dapli_evd_cqe_to_event 916 * 917 * Convert a CQE into an event structure. 918 * 919 * Input: 920 * evd_ptr 921 * cqe_ptr 922 * 923 * Output: 924 * event_ptr 925 * 926 * Returns: 927 * none 928 * 929 */ 930 static DAT_BOOLEAN 931 dapli_evd_cqe_to_event( 932 IN DAPL_EVD *evd_ptr, 933 IN ib_work_completion_t *cqe_ptr, 934 IN DAT_BOOLEAN process_premature_events, 935 OUT DAT_EVENT *event_ptr) 936 { 937 DAPL_EP *ep_ptr; 938 DAPL_SRQ *srq_ptr; 939 DAPL_COOKIE *cookie; 940 DAT_EP_STATE ep_state; 941 ib_qp_handle_t qp; 942 ib_uint32_t ib_status; 943 ib_uint32_t ibtype; 944 int srq_enabled; 945 int dto_error = 0; 946 947 948 /* 949 * All that can be relied on if the status is bad is the status 950 * and WRID. 951 */ 952 ib_status = DAPL_GET_CQE_STATUS(cqe_ptr); 953 954 cookie = (DAPL_COOKIE *)((uintptr_t)DAPL_GET_CQE_WRID(cqe_ptr)); 955 dapl_os_assert((NULL != cookie)); 956 957 if (cookie->queue_type == DAPL_COOKIE_QUEUE_EP) { 958 srq_enabled = 0; 959 ep_ptr = cookie->queue.ep; 960 } else { 961 srq_enabled = 1; 962 srq_ptr = cookie->queue.srq; 963 dapl_os_assert(NULL != srq_ptr); 964 dapl_os_assert(srq_ptr->header.magic == DAPL_MAGIC_SRQ); 965 ib_status = DAPL_GET_CQE_STATUS(cqe_ptr); 966 ep_ptr = dapls_ib_srq_lookup_ep(srq_ptr, cqe_ptr); 967 } 968 969 dapl_os_assert((NULL != ep_ptr)); 970 dapl_os_assert((ep_ptr->header.magic == DAPL_MAGIC_EP) || 971 (ep_ptr->header.magic == DAPL_MAGIC_EP_EXIT)); 972 973 event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr; 974 975 /* 976 * Check if the DTO completion arrived before CONNECTION_ESTABLISHED 977 * event - 978 * 979 * Send DTOs can occur only if ep state is CONNECTED/DISCONNECTED 980 * therefore it cannot occur before connection established event. 981 * Receive DTO can potentially complete before connection established 982 * event has been delivered to the client. In this case if the 983 * ep state is ACTIVE_CONNECTION_PENDING (active side) or 984 * COMPLETION_PENDING (passive side) the event is put in a special 985 * event queue in the qp_handle. 986 * 987 */ 988 if (!process_premature_events && 989 (cookie->type == DAPL_COOKIE_TYPE_DTO) && 990 (ib_status == IB_COMP_ST_SUCCESS)) { 991 ep_state = ep_ptr->param.ep_state; 992 qp = ep_ptr->qp_handle; 993 if ((ep_state == DAT_EP_STATE_ACTIVE_CONNECTION_PENDING) || 994 (ep_state == DAT_EP_STATE_COMPLETION_PENDING) || 995 (qp->qp_num_premature_events > 0)) { 996 /* 997 * not yet ready to put the event in the evd ring 998 * buffer 999 */ 1000 dapls_ib_store_premature_events(qp, cqe_ptr); 1001 return (DAT_FALSE); 1002 } 1003 } 1004 1005 switch (cookie->type) { 1006 case DAPL_COOKIE_TYPE_DTO: 1007 { 1008 DAPL_COOKIE_BUFFER *buffer; 1009 1010 if (DAPL_DTO_TYPE_RECV == cookie->val.dto.type) { 1011 if (srq_enabled) { 1012 dapl_os_atomic_dec(&srq_ptr->recv_count); 1013 buffer = &srq_ptr->recv_buffer; 1014 } else { 1015 dapl_os_atomic_dec(&ep_ptr->recv_count); 1016 buffer = &ep_ptr->recv_buffer; 1017 } 1018 } else { 1019 dapl_os_atomic_dec(&ep_ptr->req_count); 1020 buffer = &ep_ptr->req_buffer; 1021 } 1022 1023 event_ptr->event_number = DAT_DTO_COMPLETION_EVENT; 1024 event_ptr->event_data.dto_completion_event_data.ep_handle = 1025 ep_ptr; 1026 event_ptr->event_data.dto_completion_event_data.user_cookie = 1027 cookie->val.dto.cookie; 1028 1029 switch (ib_status) { 1030 case IB_COMP_ST_SUCCESS: 1031 { 1032 ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr); 1033 1034 event_ptr->event_data.dto_completion_event_data.status = 1035 DAT_DTO_SUCCESS; 1036 dapl_os_assert((ibtype == OP_SEND && 1037 cookie->val.dto.type == DAPL_DTO_TYPE_SEND) || 1038 (ibtype == OP_RECEIVE && 1039 cookie->val.dto.type == DAPL_DTO_TYPE_RECV) || 1040 (ibtype == OP_RDMA_WRITE && 1041 cookie->val.dto.type == 1042 DAPL_DTO_TYPE_RDMA_WRITE) || 1043 (ibtype == OP_RDMA_READ && 1044 cookie->val.dto.type == 1045 DAPL_DTO_TYPE_RDMA_READ)); 1046 break; 1047 } 1048 case IB_COMP_ST_LOCAL_LEN_ERR: 1049 { 1050 event_ptr->event_data.dto_completion_event_data.status = 1051 DAT_DTO_ERR_LOCAL_LENGTH; 1052 break; 1053 } 1054 case IB_COMP_ST_LOCAL_PROTECT_ERR: 1055 { 1056 event_ptr->event_data.dto_completion_event_data.status = 1057 DAT_DTO_ERR_LOCAL_PROTECTION; 1058 break; 1059 } 1060 case IB_COMP_ST_WR_FLUSHED_ERR: 1061 { 1062 event_ptr->event_data.dto_completion_event_data.status = 1063 DAT_DTO_ERR_FLUSHED; 1064 break; 1065 } 1066 case IB_COMP_ST_BAD_RESPONSE_ERR: 1067 { 1068 event_ptr->event_data.dto_completion_event_data.status = 1069 DAT_DTO_ERR_BAD_RESPONSE; 1070 break; 1071 } 1072 case IB_COMP_ST_REM_REQ_ERR: 1073 case IB_COMP_ST_REM_OP_ERR: 1074 { 1075 event_ptr->event_data.dto_completion_event_data.status = 1076 DAT_DTO_ERR_REMOTE_RESPONDER; 1077 break; 1078 } 1079 case IB_COMP_ST_REM_ACC_ERR: 1080 { 1081 event_ptr->event_data.dto_completion_event_data.status = 1082 DAT_DTO_ERR_REMOTE_ACCESS; 1083 break; 1084 } 1085 /* 1086 * Unsupported RD errors 1087 * case IB_COMP_ST_EE_STATE_ERR: 1088 * case IB_COMP_ST_EE_CTX_NO_ERR: 1089 */ 1090 case IB_COMP_ST_TRANSP_COUNTER: 1091 { 1092 event_ptr->event_data.dto_completion_event_data.status = 1093 DAT_DTO_ERR_TRANSPORT; 1094 break; 1095 } 1096 case IB_COMP_ST_RNR_COUNTER: 1097 { 1098 event_ptr->event_data.dto_completion_event_data.status = 1099 DAT_DTO_ERR_RECEIVER_NOT_READY; 1100 break; 1101 } 1102 case IB_COMP_ST_MW_BIND_ERR: 1103 { 1104 event_ptr->event_data.dto_completion_event_data.status = 1105 DAT_RMR_OPERATION_FAILED; 1106 break; 1107 } 1108 case IB_COMP_ST_LOCAL_OP_ERR: 1109 { 1110 event_ptr->event_data.dto_completion_event_data.status = 1111 DAT_DTO_ERR_LOCAL_EP; 1112 break; 1113 } 1114 default: 1115 { 1116 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR, 1117 " DTO completion ERROR: %d: op %#x\n", 1118 DAPL_GET_CQE_STATUS(cqe_ptr), 1119 DAPL_GET_CQE_OPTYPE(cqe_ptr)); 1120 event_ptr->event_data.dto_completion_event_data.status = 1121 DAT_DTO_FAILURE; 1122 break; 1123 } 1124 } 1125 1126 /* Most error DTO ops result in disconnecting the EP */ 1127 if ((event_ptr->event_data.dto_completion_event_data.status != 1128 DAT_DTO_SUCCESS) && 1129 (event_ptr->event_data.dto_completion_event_data.status != 1130 DAT_RMR_OPERATION_FAILED)) { 1131 dto_error = 1; 1132 dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR, 1133 " DTO completion ERROR: %d: op %#x\n", 1134 DAPL_GET_CQE_STATUS(cqe_ptr), 1135 DAPL_GET_CQE_OPTYPE(cqe_ptr)); 1136 } 1137 1138 if (cookie->val.dto.type == DAPL_DTO_TYPE_SEND || 1139 cookie->val.dto.type == DAPL_DTO_TYPE_RDMA_WRITE) { 1140 /* Get size from DTO; CQE value may be off. */ 1141 event_ptr->event_data.dto_completion_event_data. 1142 transfered_length = cookie->val.dto.size; 1143 } else { 1144 event_ptr->event_data.dto_completion_event_data. 1145 transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr); 1146 } 1147 1148 dapls_cookie_dealloc(buffer, cookie); 1149 break; 1150 } 1151 1152 case DAPL_COOKIE_TYPE_RMR: 1153 { 1154 dapl_os_atomic_dec(&ep_ptr->req_count); 1155 1156 event_ptr->event_number = DAT_RMR_BIND_COMPLETION_EVENT; 1157 1158 event_ptr->event_data.rmr_completion_event_data.rmr_handle = 1159 cookie->val.rmr.rmr; 1160 event_ptr->event_data.rmr_completion_event_data.user_cookie = 1161 cookie->val.rmr.cookie; 1162 if (ib_status == IB_COMP_ST_SUCCESS) { 1163 ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr); 1164 1165 event_ptr->event_data.rmr_completion_event_data.status = 1166 DAT_RMR_BIND_SUCCESS; 1167 dapl_os_assert(ibtype == OP_BIND_MW); 1168 } else { 1169 event_ptr->event_data.rmr_completion_event_data.status = 1170 DAT_RMR_BIND_FAILURE; 1171 dto_error = 1; 1172 } 1173 1174 dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie); 1175 break; 1176 } 1177 default: 1178 { 1179 dapl_os_assert(!"Invalid Operation type"); 1180 break; 1181 } 1182 } 1183 1184 /* 1185 * A DTO failed this will cause the connection to be broken 1186 */ 1187 if ((dto_error) && (ep_ptr->param.ep_state == DAT_EP_STATE_CONNECTED)) { 1188 ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED; 1189 /* 1190 * Disconnect at the IB level. 1191 */ 1192 dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, IB_CME_CONNECTED); 1193 } 1194 /* convert premature rec to error flush on disconnect */ 1195 if (process_premature_events && (ep_ptr->param.ep_state == 1196 DAT_EP_STATE_DISCONNECTED) && (ib_status == IB_COMP_ST_SUCCESS)) { 1197 dapl_os_assert(ibtype == OP_RECEIVE && 1198 cookie->val.dto.type == DAPL_DTO_TYPE_RECV); 1199 event_ptr->event_data.dto_completion_event_data.status = 1200 DAT_DTO_ERR_FLUSHED; 1201 } 1202 return (DAT_TRUE); 1203 } 1204 1205 /* 1206 * dapls_evd_copy_cq 1207 * 1208 * Copy all entries on a CQ associated with the EVD onto that EVD 1209 * Up to caller to handle races, if any. Note that no EVD waiters will 1210 * be awoken by this copy. 1211 * 1212 * Input: 1213 * evd_ptr 1214 * 1215 * Output: 1216 * nevents 1217 * 1218 * Returns: 1219 * none 1220 * 1221 */ 1222 void 1223 dapls_evd_copy_cq( 1224 DAPL_EVD *evd_ptr, 1225 int *nevents) 1226 { 1227 ib_work_completion_t cqe[MAX_CQES_PER_POLL]; 1228 DAT_RETURN dat_status; 1229 ib_cq_handle_t cq_handle; 1230 DAT_EVENT *event; 1231 uint_t num_cqes_polled = 0; 1232 int cqe_events; 1233 int i; 1234 1235 cq_handle = evd_ptr->ib_cq_handle; 1236 1237 *nevents = 0; 1238 1239 if (cq_handle == IB_INVALID_HANDLE) { 1240 /* Nothing to do if no CQ. */ 1241 return; 1242 } 1243 dat_status = DAPL_POLL(evd_ptr)(cq_handle, 1244 cqe, MAX_CQES_PER_POLL, &num_cqes_polled); 1245 1246 if (dat_status == DAT_SUCCESS) { 1247 dapl_dbg_log(DAPL_DBG_TYPE_EVD, "dapls_evd_copy_cq: %u\n", 1248 num_cqes_polled); 1249 cqe_events = 0; 1250 for (i = 0; i < num_cqes_polled; i++) { 1251 #ifdef DAPL_DBG /* For debugging. */ 1252 dapli_evd_eh_print_cqe(cqe[i]); 1253 #endif 1254 1255 /* 1256 * Can use DAT_DTO_COMPLETION_EVENT because 1257 * dapli_evd_cqe_to_event will overwrite. 1258 */ 1259 1260 event = dapli_evd_get_and_init_event( 1261 evd_ptr, DAT_DTO_COMPLETION_EVENT); 1262 if (event == NULL) { 1263 /* 1264 * We've already attempted the overflow post; return. 1265 */ 1266 return; 1267 } 1268 if (dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_FALSE, 1269 event)) { 1270 dapli_evd_post_event_nosignal(evd_ptr, event); 1271 cqe_events++; 1272 } else { 1273 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1274 "dapls_evd_copy_cq: premature event\n"); 1275 /* 1276 * We've deferred processing the CQE, so add 1277 * the event_ptr back to free queue 1278 */ 1279 dat_status = dapls_rbuf_add(&evd_ptr-> 1280 free_event_queue, (void *)event); 1281 dapl_os_assert(dat_status == DAT_SUCCESS); 1282 if (evd_ptr->evd_producer_locking_needed) { 1283 dapl_os_unlock(&evd_ptr->header.lock); 1284 } 1285 } 1286 } 1287 *nevents = cqe_events; 1288 } else if (DAT_GET_TYPE(dat_status) != DAT_QUEUE_EMPTY) { 1289 dapl_dbg_log(DAPL_DBG_TYPE_ERR, 1290 "dapls_evd_copy_cq: dapls_ib_completion_poll " 1291 "returned 0x%x\n", dat_status); 1292 dapl_os_assert(!"Bad return from dapls_ib_completion_poll"); 1293 } 1294 } 1295 1296 /* 1297 * dapls_evd_copy_events 1298 * 1299 * Copy all events associated with the EVD onto that EVD 1300 * 1301 * Input: 1302 * evd_ptr 1303 * timeout 1304 * 1305 * Output: 1306 * return status 1307 * 1308 * Returns: 1309 * none 1310 * 1311 */ 1312 DAT_RETURN 1313 dapls_evd_copy_events(DAPL_EVD *evd_ptr, DAT_TIMEOUT timeout) 1314 { 1315 dapl_ib_event_t evp_arr[NUM_EVENTS_PER_POLL]; 1316 dapl_ib_event_t *evpp_start; 1317 dapl_ib_event_t *evpp; 1318 DAPL_IA *ia_ptr; 1319 DAT_RETURN dat_status; 1320 int waited; 1321 uint64_t curr_time; 1322 uint64_t final_time; 1323 uint64_t time_left; 1324 int events_needed = 0; 1325 int nevents = 0; 1326 int num_cqe = 0; 1327 int num_ke = 0; /* kernel events - CM or ASYNC events */ 1328 int i; 1329 1330 /* rbuf count is zero on entry */ 1331 1332 if (evd_ptr->evd_flags & (DAT_EVD_CONNECTION_FLAG | 1333 DAT_EVD_CR_FLAG | DAT_EVD_ASYNC_FLAG)) { 1334 if (evd_ptr->threshold <= NUM_EVENTS_PER_POLL) { 1335 evpp = evp_arr; 1336 } else { 1337 /* need to allocate on the heap */ 1338 evpp = (dapl_ib_event_t *)dapl_os_alloc( 1339 evd_ptr->threshold * sizeof (dapl_ib_event_t)); 1340 if (evpp == NULL) { 1341 return (DAT_INSUFFICIENT_RESOURCES); 1342 } 1343 } 1344 evpp_start = evpp; 1345 /* for evd_dequeue, check for ke before returning Q_EMPTY */ 1346 if (evd_ptr->threshold == 0 && timeout == 0) 1347 evd_ptr->threshold = 1; 1348 } else { 1349 evpp = NULL; 1350 evpp_start = NULL; 1351 } 1352 ia_ptr = evd_ptr->header.owner_ia; 1353 waited = 0; 1354 dat_status = DAT_SUCCESS; 1355 1356 /* calculate various time wait elements */ 1357 if (timeout == 0) { 1358 final_time = 0; 1359 time_left = 0; 1360 } else if (timeout == DAT_TIMEOUT_INFINITE) { 1361 /* 1362 * The real value of DAT_TIMEOUT_INFINITE is fairly small 1363 * ~71 mins, to prevent premature timeouts map it to 1364 * 1 year. NOTE: 64-bit integers are needed here 1365 * because 32 bits is not enough. Other types, 1366 * such as clock_t are not 64-bit, so are not 1367 * sufficient for this. Similarly, hrtime_t is 1368 * defined as a "nanosecond counter", which does not 1369 * match our need for time in microseconds, so we 1370 * just use the more general uint64_t here. 1371 */ 1372 #define DAPL_ONE_YEAR_IN_USEC ((365 * 24 * 3600) * 1000000LL) 1373 curr_time = gethrtime(); 1374 time_left = DAPL_ONE_YEAR_IN_USEC; 1375 final_time = curr_time + DAPL_ONE_YEAR_IN_USEC * 1000; 1376 } else { 1377 /* 1378 * maximum time by which the routine needs to return 1379 * DAT_TIMEOUT_INFINITE is defined as ~0 but its of type int 1380 * so mask the MSB to avoid overflow 1381 */ 1382 curr_time = gethrtime(); 1383 final_time = curr_time + (uint64_t)(timeout&0x7fffffff)*1000; 1384 time_left = (final_time - curr_time)/1000; 1385 } 1386 1387 do { 1388 /* 1389 * If this evd has a CQ event stream check the CQs first 1390 */ 1391 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | 1392 DAT_EVD_RMR_BIND_FLAG)) { 1393 /* 1394 * Poll CQ for events, update the total number of CQEs 1395 * so far 1396 */ 1397 nevents = 0; 1398 dapls_evd_copy_cq(evd_ptr, &nevents); 1399 num_cqe += nevents; 1400 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1401 "dapls_evd_copy_event: copy_cq num_cqe(%d)\n", 1402 num_cqe); 1403 } 1404 1405 /* 1406 * We use the dapls_rbuf_count since it includes 1407 * - CQ events pulled by dapls_evd_copy_cq 1408 * - events added by dat_evd_post_se() 1409 */ 1410 events_needed = evd_ptr->threshold - num_ke - 1411 dapls_rbuf_count(&evd_ptr->pending_event_queue); 1412 1413 /* 1414 * check for pending events 1415 * note: threshold=0 implies dapl_evd_dequeue 1416 */ 1417 if (events_needed < 0) { 1418 /* There are more than sufficient events */ 1419 break; 1420 } else if (events_needed == 0) { 1421 /* report queue empty on dat_evd_dequeue */ 1422 /* non CQ events are expected to be polled */ 1423 /* by dat_evd_wait */ 1424 if (evd_ptr->threshold == 0) 1425 dat_status = DAT_ERROR(DAT_QUEUE_EMPTY, 0); 1426 /* 1427 * when threshold > 0, we have sufficient events 1428 */ 1429 break; 1430 } else { 1431 /* 1432 * when we reach here, this implies dat_evd_wait 1433 * return on any dto completion as 1434 * threshold > 1 will be taken as hint only 1435 */ 1436 if (num_cqe) 1437 break; 1438 } 1439 1440 /* check we've already waited */ 1441 if (waited > 0) { 1442 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1443 "dapls_evd_copy_event: waited[%d]\n", waited); 1444 if (dat_status != DAT_SUCCESS) 1445 break; 1446 curr_time = gethrtime(); 1447 /* exit on time expired */ 1448 if (curr_time >= final_time) 1449 break; 1450 time_left = (final_time - curr_time)/1000; 1451 } 1452 1453 /* check for DTO type evd's */ 1454 if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG | 1455 DAT_EVD_RMR_BIND_FLAG)) { 1456 if (events_needed == 1) { 1457 /* 1458 * Need only one event so enable cq 1459 * notification 1460 */ 1461 /* 1462 * XXX: Things need to be modified here to 1463 * implement the NOTIFICATION suppression 1464 * correctly - relies on THRESHOLD flag 1465 * and UNSIGNALLED flag to be stored 1466 * in the evd. 1467 */ 1468 dat_status = dapls_set_cq_notify(ia_ptr, 1469 evd_ptr); 1470 if (dat_status != DAT_SUCCESS) { 1471 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1472 "dapls_evd_copy_event:" 1473 " set_cq_notify(%d)\n", dat_status); 1474 return (dat_status); 1475 } 1476 } else if (events_needed > 1) { 1477 /* 1478 * We need multiple events so lets enable CQ for 1479 * notification on N events. 1480 * dat_status = dapls_set_cqN_notify(ia_ptr, 1481 * evd_ptr, (uint32_t)events_needed); 1482 */ 1483 dat_status = dapls_set_cq_notify(ia_ptr, 1484 evd_ptr); 1485 if (dat_status != DAT_SUCCESS) { 1486 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1487 "dapls_evd_copy_event:" 1488 " set_cqN_notify:%d\n", dat_status); 1489 return (dat_status); 1490 } 1491 } 1492 1493 /* 1494 * Per Tavor PRM if completions occur after polling 1495 * the CQ and before arming it, upon arming the CQ 1496 * handler will be immediately fired. Hence it 1497 * recommends that a re-poll of the CQ can be skipped 1498 * as an optimization. 1499 */ 1500 } 1501 1502 nevents = 0; 1503 1504 /* 1505 * non-NULL evpp_start denotes either 1506 * DAT_EVD_CONNECTION_FLAG, DAT_EVD_CR_FLAG, DAT_EVD_ASYNC_FLAG 1507 * is set and thus needs to check events from kernel 1508 */ 1509 if (evpp_start) { 1510 /* 1511 * Even if dat_status is not DAT_SUCCESS, num_events 1512 * could be non-zero. 1513 */ 1514 dat_status = dapls_ib_event_poll(evd_ptr, time_left, 1515 (evd_ptr->threshold - (num_cqe + num_ke)), evpp, 1516 &nevents); 1517 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1518 "dapls_evd_copy_event: poll returned 0x%x(%d)\n", 1519 dat_status, nevents); 1520 1521 num_ke += nevents; 1522 evpp += nevents; 1523 } else { 1524 /* perform a timewait */ 1525 dat_status = dapls_ib_event_poll(evd_ptr, time_left, 1526 0, NULL, &nevents); 1527 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1528 "dapls_evd_copy_event: poll(cq_notification) " 1529 "returned 0x%x\n", dat_status); 1530 if (DAT_GET_TYPE(dat_status) == DAT_INTERRUPTED_CALL) 1531 return (dat_status); 1532 } 1533 1534 waited++; 1535 } while (dapls_rbuf_count(&evd_ptr->pending_event_queue) + num_ke < 1536 evd_ptr->threshold); 1537 1538 /* process the cm events now */ 1539 for (i = 0; i < num_ke; i++) { 1540 switch (evpp_start[i].ibe_ev_family) { 1541 case DAPL_CR_EVENTS: /* PASSIVE side events */ 1542 case DAPL_PASSIVE_CONNECTION_EVENTS: 1543 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1544 "dapls_evd_copy_event: Passive side Event %d\n", 1545 evpp_start[i].ibe_ce.ibce_event); 1546 dapls_cr_callback((ib_cm_handle_t) 1547 evpp_start[i].ibe_ce.ibce_psep_cookie, 1548 evpp_start[i].ibe_ce.ibce_event, 1549 evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *) 1550 (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie); 1551 break; 1552 case DAPL_ACTIVE_CONNECTION_EVENTS: /* ACTIVE side events */ 1553 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1554 "dapls_evd_copy_event: Active Conn Event %d\n", 1555 evpp_start[i].ibe_ce.ibce_event); 1556 dapl_evd_connection_callback((ib_cm_handle_t) 1557 IB_INVALID_HANDLE, 1558 evpp_start[i].ibe_ce.ibce_event, 1559 evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *) 1560 (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie); 1561 break; 1562 case DAPL_ASYNC_EVENTS: 1563 dapl_dbg_log(DAPL_DBG_TYPE_EVD, 1564 "dapls_evd_copy_event: Async Event %d\n", 1565 evpp_start[i].ibe_async.ibae_type); 1566 dapls_ib_async_callback(evd_ptr, 1567 ia_ptr->hca_ptr->ib_hca_handle, 1568 &(evpp_start[i].ibe_async), ia_ptr); 1569 break; 1570 default: 1571 dapl_dbg_log(DAPL_DBG_TYPE_ERR, 1572 "dapls_evd_copy_event: dapls_ib_event_poll %d " 1573 "returned 0x%x\n", i, evpp_start[i].ibe_ev_family); 1574 dapl_os_assert(!"Bad return from dapls_ib_event_poll"); 1575 break; 1576 } 1577 } 1578 1579 return (dat_status); 1580 } 1581 1582 /* 1583 * dapls_evd_cq_poll_to_event 1584 * 1585 * Attempt to dequeue a single CQE from a CQ and turn it into 1586 * an event. 1587 * 1588 * Input: 1589 * evd_ptr 1590 * 1591 * Output: 1592 * event 1593 * 1594 * Returns: 1595 * Status of operation 1596 * 1597 */ 1598 DAT_RETURN 1599 dapls_evd_cq_poll_to_event(IN DAPL_EVD *evd_ptr, OUT DAT_EVENT *event) 1600 { 1601 DAT_RETURN dat_status; 1602 ib_work_completion_t cur_cqe; 1603 1604 /* skip one layer of do-nothing function */ 1605 dat_status = DAPL_POLL1(evd_ptr)(evd_ptr->ib_cq_handle, &cur_cqe); 1606 1607 if (dat_status == DAT_SUCCESS) { 1608 #ifdef DAPL_DBG /* For debugging. */ 1609 dapli_evd_eh_print_cqe(cur_cqe); 1610 #endif 1611 (void) dapli_evd_cqe_to_event(evd_ptr, &cur_cqe, DAT_FALSE, 1612 event); 1613 } 1614 1615 return (dat_status); 1616 } 1617 1618 /* 1619 * Local variables: 1620 * c-indent-level: 4 1621 * c-basic-offset: 4 1622 * tab-width: 8 1623 * End: 1624 */ 1625