1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * An implementation of the IPoIB-CM standard based on PSARC 2009/593. 29 */ 30 #include <sys/types.h> 31 #include <sys/conf.h> 32 #include <sys/ddi.h> 33 #include <sys/sunddi.h> 34 #include <sys/modctl.h> 35 #include <sys/stropts.h> 36 #include <sys/stream.h> 37 #include <sys/strsun.h> 38 #include <sys/strsubr.h> 39 #include <sys/dlpi.h> 40 #include <sys/mac_provider.h> 41 42 #include <sys/pattr.h> /* for HCK_FULLCKSUM */ 43 #include <sys/atomic.h> /* for atomic_add*() */ 44 #include <sys/ethernet.h> /* for ETHERTYPE_IP */ 45 #include <netinet/in.h> /* for netinet/ip.h below */ 46 #include <netinet/ip.h> /* for struct ip */ 47 #include <inet/common.h> /* for inet/ip.h below */ 48 #include <inet/ip.h> /* for ipha_t */ 49 #include <inet/ip_if.h> /* for ETHERTYPE_IPV6 */ 50 #include <inet/ip6.h> /* for ip6_t */ 51 #include <netinet/icmp6.h> /* for icmp6_t */ 52 53 #include <sys/ib/clients/ibd/ibd.h> 54 55 extern ibd_global_state_t ibd_gstate; 56 extern int ibd_rc_conn_timeout; 57 uint_t ibd_rc_tx_softintr = 1; 58 /* 59 * If the number of WRs in receive queue of each RC connection less than 60 * IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it. 61 */ 62 #define IBD_RC_RX_WR_THRESHOLD 0x20 63 64 /* 65 * If the number of free SWQEs (or large Tx buf) is larger than or equal to 66 * IBD_RC_TX_FREE_THRESH, we will call mac_tx_update to notify GLD to continue 67 * transmitting packets. 68 */ 69 #define IBD_RC_TX_FREE_THRESH 8 70 71 #define IBD_RC_QPN_TO_SID(qpn) \ 72 ((uint64_t)(IBD_RC_SERVICE_ID | ((qpn) & 0xffffff))) 73 74 /* For interop with legacy OFED */ 75 #define IBD_RC_QPN_TO_SID_OFED_INTEROP(qpn) \ 76 ((uint64_t)(IBD_RC_SERVICE_ID_OFED_INTEROP | ((qpn) & 0xffffff))) 77 78 /* Internet Header + 64 bits of Data Datagram. Refer to RFC 792 */ 79 #define IBD_RC_IP_ICMP_RETURN_DATA_BYTES 64 80 81 82 /* Functions for Reliable Connected Mode */ 83 /* Connection Setup/Close Functions */ 84 static ibt_cm_status_t ibd_rc_dispatch_pass_mad(void *, 85 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 86 static ibt_cm_status_t ibd_rc_dispatch_actv_mad(void *, 87 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 88 static void ibd_rc_act_close(ibd_rc_chan_t *, boolean_t); 89 90 static inline void ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *, 91 ibd_rc_chan_t *); 92 static inline ibd_rc_chan_t *ibd_rc_rm_header_chan_list( 93 ibd_rc_chan_list_t *); 94 static inline ibd_rc_chan_t *ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *, 95 ibd_rc_chan_t *); 96 97 /* CQ handlers */ 98 static void ibd_rc_rcq_handler(ibt_cq_hdl_t, void *); 99 static void ibd_rc_scq_handler(ibt_cq_hdl_t, void *); 100 static void ibd_rc_poll_rcq(ibd_rc_chan_t *, ibt_cq_hdl_t); 101 102 /* Receive Functions */ 103 static int ibd_rc_post_srq(ibd_state_t *, ibd_rwqe_t *); 104 static void ibd_rc_srq_freemsg_cb(char *); 105 static void ibd_rc_srq_free_rwqe(ibd_state_t *, ibd_rwqe_t *); 106 107 static int ibd_rc_post_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *); 108 static void ibd_rc_freemsg_cb(char *); 109 static void ibd_rc_process_rx(ibd_rc_chan_t *, ibd_rwqe_t *, ibt_wc_t *); 110 static void ibd_rc_free_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *); 111 static void ibd_rc_fini_rxlist(ibd_rc_chan_t *); 112 113 114 /* Send Functions */ 115 static void ibd_rc_release_swqe(ibd_rc_chan_t *, ibd_swqe_t *); 116 static int ibd_rc_init_txlist(ibd_rc_chan_t *); 117 static void ibd_rc_fini_txlist(ibd_rc_chan_t *); 118 static uint_t ibd_rc_tx_recycle(caddr_t); 119 120 121 void 122 ibd_async_rc_close_act_chan(ibd_state_t *state, ibd_req_t *req) 123 { 124 ibd_rc_chan_t *rc_chan = req->rq_ptr; 125 ibd_ace_t *ace; 126 127 while (rc_chan != NULL) { 128 ace = rc_chan->ace; 129 ASSERT(ace != NULL); 130 /* Close old RC channel */ 131 ibd_rc_act_close(rc_chan, B_TRUE); 132 mutex_enter(&state->id_ac_mutex); 133 ASSERT(ace->ac_ref != 0); 134 atomic_dec_32(&ace->ac_ref); 135 ace->ac_chan = NULL; 136 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 137 IBD_ACACHE_INSERT_FREE(state, ace); 138 ace->ac_ref = 0; 139 } else { 140 ace->ac_ref |= CYCLEVAL; 141 state->rc_delay_ace_recycle++; 142 } 143 mutex_exit(&state->id_ac_mutex); 144 rc_chan = ibd_rc_rm_header_chan_list( 145 &state->rc_obs_act_chan_list); 146 } 147 } 148 149 void 150 ibd_async_rc_recycle_ace(ibd_state_t *state, ibd_req_t *req) 151 { 152 ibd_ace_t *ace = req->rq_ptr; 153 ibd_rc_chan_t *rc_chan; 154 155 ASSERT(ace != NULL); 156 rc_chan = ace->ac_chan; 157 ASSERT(rc_chan != NULL); 158 /* Close old RC channel */ 159 ibd_rc_act_close(rc_chan, B_TRUE); 160 mutex_enter(&state->id_ac_mutex); 161 ASSERT(ace->ac_ref != 0); 162 atomic_dec_32(&ace->ac_ref); 163 ace->ac_chan = NULL; 164 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 165 IBD_ACACHE_INSERT_FREE(state, ace); 166 ace->ac_ref = 0; 167 } else { 168 ace->ac_ref |= CYCLEVAL; 169 state->rc_delay_ace_recycle++; 170 } 171 mutex_exit(&state->id_ac_mutex); 172 mutex_enter(&state->rc_ace_recycle_lock); 173 state->rc_ace_recycle = NULL; 174 mutex_exit(&state->rc_ace_recycle_lock); 175 } 176 177 /* Simple ICMP IP Header Template */ 178 static const ipha_t icmp_ipha = { 179 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP 180 }; 181 182 /* Packet is too big. Send ICMP packet to GLD to request a smaller MTU */ 183 void 184 ibd_async_rc_process_too_big(ibd_state_t *state, ibd_req_t *req) 185 { 186 mblk_t *mp = req->rq_ptr; 187 ibd_ace_t *ace = req->rq_ptr2; 188 uint16_t mtu = state->id_mtu - IPOIB_HDRSIZE; 189 uint_t len_needed; 190 size_t msg_len; 191 mblk_t *pmtu_mp; 192 ushort_t sap; 193 ib_header_info_t *ibha; /* ib header for pmtu_pkt */ 194 /* 195 * ipha: IP header for pmtu_pkt 196 * old_ipha: IP header for old packet 197 */ 198 ipha_t *ipha, *old_ipha; 199 icmph_t *icmph; 200 201 sap = ntohs(((ipoib_hdr_t *)mp->b_rptr)->ipoib_type); 202 203 if (!pullupmsg(mp, -1)) { 204 DPRINT(40, "ibd_async_rc_process_too_big: pullupmsg fail"); 205 goto too_big_fail; 206 } 207 /* move to IP header. */ 208 mp->b_rptr += IPOIB_HDRSIZE; 209 old_ipha = (ipha_t *)mp->b_rptr; 210 211 len_needed = IPH_HDR_LENGTH(old_ipha); 212 if (old_ipha->ipha_protocol == IPPROTO_ENCAP) { 213 len_needed += IPH_HDR_LENGTH(((uchar_t *)old_ipha + 214 len_needed)); 215 } else if (old_ipha->ipha_protocol == IPPROTO_IPV6) { 216 ip6_t *ip6h = (ip6_t *)((uchar_t *)old_ipha 217 + len_needed); 218 len_needed += ip_hdr_length_v6(mp, ip6h); 219 } 220 len_needed += IBD_RC_IP_ICMP_RETURN_DATA_BYTES; 221 msg_len = msgdsize(mp); 222 if (msg_len > len_needed) { 223 (void) adjmsg(mp, len_needed - msg_len); 224 msg_len = len_needed; 225 } 226 227 if ((pmtu_mp = allocb(sizeof (ib_header_info_t) + sizeof (ipha_t) 228 + sizeof (icmph_t), BPRI_MED)) == NULL) { 229 DPRINT(40, "ibd_async_rc_process_too_big: allocb fail"); 230 goto too_big_fail; 231 } 232 pmtu_mp->b_cont = mp; 233 pmtu_mp->b_wptr = pmtu_mp->b_rptr + sizeof (ib_header_info_t) 234 + sizeof (ipha_t) + sizeof (icmph_t); 235 236 ibha = (ib_header_info_t *)pmtu_mp->b_rptr; 237 238 /* Fill IB header */ 239 bcopy(&state->id_macaddr, &ibha->ib_dst, IPOIB_ADDRL); 240 /* 241 * If the GRH is not valid, indicate to GLDv3 by setting 242 * the VerTcFlow field to 0. 243 */ 244 ibha->ib_grh.ipoib_vertcflow = 0; 245 ibha->ipib_rhdr.ipoib_type = htons(sap); 246 ibha->ipib_rhdr.ipoib_mbz = 0; 247 248 /* Fill IP header */ 249 ipha = (ipha_t *)&ibha[1]; 250 *ipha = icmp_ipha; 251 ipha->ipha_src = old_ipha->ipha_dst; 252 ipha->ipha_dst = old_ipha->ipha_src; 253 ipha->ipha_ttl = old_ipha->ipha_ttl; 254 msg_len += sizeof (icmp_ipha) + sizeof (icmph_t); 255 if (msg_len > IP_MAXPACKET) { 256 ibd_print_warn(state, "ibd_rc_process_too_big_pkt: msg_len(%d) " 257 "> IP_MAXPACKET", (uint32_t)msg_len); 258 (void) adjmsg(mp, IP_MAXPACKET - msg_len); 259 msg_len = IP_MAXPACKET; 260 } 261 ipha->ipha_length = htons((uint16_t)msg_len); 262 ipha->ipha_hdr_checksum = 0; 263 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 264 265 /* Fill ICMP body */ 266 icmph = (icmph_t *)&ipha[1]; 267 bzero(icmph, sizeof (icmph_t)); 268 icmph->icmph_type = ICMP_DEST_UNREACHABLE; 269 icmph->icmph_code = ICMP_FRAGMENTATION_NEEDED; 270 icmph->icmph_du_mtu = htons(mtu); 271 icmph->icmph_checksum = 0; 272 icmph->icmph_checksum = IP_CSUM(pmtu_mp, 273 (int32_t)sizeof (ib_header_info_t) + (int32_t)sizeof (ipha_t), 0); 274 275 (void) hcksum_assoc(pmtu_mp, NULL, NULL, 0, 0, 0, 0, 276 HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); 277 278 DPRINT(30, "ibd_async_rc_process_too_big: sap=0x%x, ip_src=0x%x, " 279 "ip_dst=0x%x, ttl=%d, len_needed=%d, msg_len=%d", 280 sap, ipha->ipha_src, ipha->ipha_dst, ipha->ipha_ttl, 281 len_needed, (uint32_t)msg_len); 282 283 mac_rx(state->id_mh, state->id_rh, pmtu_mp); 284 285 mutex_enter(&ace->tx_too_big_mutex); 286 ace->tx_too_big_ongoing = B_FALSE; 287 mutex_exit(&ace->tx_too_big_mutex); 288 return; 289 290 too_big_fail: 291 /* Drop packet */ 292 freemsg(mp); 293 mutex_enter(&ace->tx_too_big_mutex); 294 ace->tx_too_big_ongoing = B_FALSE; 295 mutex_exit(&ace->tx_too_big_mutex); 296 } 297 298 /* 299 * Check all active/passive channels. If any ative/passive 300 * channel has not been used for a long time, close it. 301 */ 302 void 303 ibd_rc_conn_timeout_call(void *carg) 304 { 305 ibd_state_t *state = carg; 306 ibd_ace_t *ace, *pre_ace; 307 ibd_rc_chan_t *chan, *pre_chan, *next_chan; 308 ibd_req_t *req; 309 310 /* Check all active channels. If chan->is_used == B_FALSE, close it */ 311 mutex_enter(&state->id_ac_mutex); 312 ace = list_head(&state->id_ah_active); 313 while ((pre_ace = ace) != NULL) { 314 ace = list_next(&state->id_ah_active, ace); 315 if (pre_ace->ac_chan != NULL) { 316 chan = pre_ace->ac_chan; 317 ASSERT(state->id_enable_rc == B_TRUE); 318 if (chan->chan_state == IBD_RC_STATE_ACT_ESTAB) { 319 if (chan->is_used == B_FALSE) { 320 state->rc_timeout_act++; 321 INC_REF(pre_ace, 1); 322 IBD_ACACHE_PULLOUT_ACTIVE(state, 323 pre_ace); 324 chan->chan_state = 325 IBD_RC_STATE_ACT_CLOSING; 326 ibd_rc_signal_act_close(state, pre_ace); 327 } else { 328 chan->is_used = B_FALSE; 329 } 330 } 331 } 332 } 333 mutex_exit(&state->id_ac_mutex); 334 335 /* Check all passive channels. If chan->is_used == B_FALSE, close it */ 336 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 337 next_chan = state->rc_pass_chan_list.chan_list; 338 pre_chan = NULL; 339 while ((chan = next_chan) != NULL) { 340 next_chan = chan->next; 341 if (chan->is_used == B_FALSE) { 342 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 343 if (req != NULL) { 344 /* remove it */ 345 state->rc_timeout_pas++; 346 req->rq_ptr = chan; 347 ibd_queue_work_slot(state, req, 348 IBD_ASYNC_RC_CLOSE_PAS_CHAN); 349 } else { 350 ibd_print_warn(state, "ibd_rc_conn_timeout: " 351 "alloc ibd_req_t fail"); 352 if (pre_chan == NULL) { 353 state->rc_pass_chan_list.chan_list = 354 chan; 355 } else { 356 pre_chan->next = chan; 357 } 358 pre_chan = chan; 359 } 360 } else { 361 if (pre_chan == NULL) { 362 state->rc_pass_chan_list.chan_list = chan; 363 } else { 364 pre_chan->next = chan; 365 } 366 pre_chan = chan; 367 chan->is_used = B_FALSE; 368 } 369 } 370 if (pre_chan != NULL) { 371 pre_chan->next = NULL; 372 } else { 373 state->rc_pass_chan_list.chan_list = NULL; 374 } 375 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 376 377 mutex_enter(&state->rc_timeout_lock); 378 if (state->rc_timeout_start == B_TRUE) { 379 state->rc_timeout = timeout(ibd_rc_conn_timeout_call, state, 380 SEC_TO_TICK(ibd_rc_conn_timeout)); 381 } 382 mutex_exit(&state->rc_timeout_lock); 383 } 384 385 #ifdef DEBUG 386 /* 387 * ibd_rc_update_stats - update driver private kstat counters 388 * 389 * This routine will dump the internal statistics counters for ibd's 390 * Reliable Connected Mode. The current stats dump values will 391 * be sent to the kernel status area. 392 */ 393 static int 394 ibd_rc_update_stats(kstat_t *ksp, int rw) 395 { 396 ibd_state_t *state; 397 ibd_rc_stat_t *ibd_rc_ksp; 398 399 if (rw == KSTAT_WRITE) 400 return (EACCES); 401 402 state = (ibd_state_t *)ksp->ks_private; 403 ASSERT(state != NULL); 404 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data; 405 406 ibd_rc_ksp->rc_rcv_trans_byte.value.ul = state->rc_rcv_trans_byte; 407 ibd_rc_ksp->rc_rcv_trans_pkt.value.ul = state->rc_rcv_trans_pkt; 408 ibd_rc_ksp->rc_rcv_copy_byte.value.ul = state->rc_rcv_copy_byte; 409 ibd_rc_ksp->rc_rcv_copy_pkt.value.ul = state->rc_rcv_copy_pkt; 410 ibd_rc_ksp->rc_rcv_alloc_fail.value.ul = state->rc_rcv_alloc_fail; 411 412 ibd_rc_ksp->rc_rcq_err.value.ul = state->rc_rcq_err; 413 414 ibd_rc_ksp->rc_rwqe_short.value.ul = state->rc_rwqe_short; 415 416 ibd_rc_ksp->rc_xmt_bytes.value.ul = state->rc_xmt_bytes; 417 ibd_rc_ksp->rc_xmt_small_pkt.value.ul = state->rc_xmt_small_pkt; 418 ibd_rc_ksp->rc_xmt_fragmented_pkt.value.ul = 419 state->rc_xmt_fragmented_pkt; 420 ibd_rc_ksp->rc_xmt_map_fail_pkt.value.ul = state->rc_xmt_map_fail_pkt; 421 ibd_rc_ksp->rc_xmt_map_succ_pkt.value.ul = state->rc_xmt_map_succ_pkt; 422 ibd_rc_ksp->rc_ace_not_found.value.ul = state->rc_ace_not_found; 423 424 ibd_rc_ksp->rc_scq_no_swqe.value.ul = state->rc_scq_no_swqe; 425 ibd_rc_ksp->rc_scq_no_largebuf.value.ul = state->rc_scq_no_largebuf; 426 ibd_rc_ksp->rc_swqe_short.value.ul = state->rc_swqe_short; 427 ibd_rc_ksp->rc_swqe_mac_update.value.ul = state->rc_swqe_mac_update; 428 ibd_rc_ksp->rc_xmt_buf_short.value.ul = state->rc_xmt_buf_short; 429 ibd_rc_ksp->rc_xmt_buf_mac_update.value.ul = 430 state->rc_xmt_buf_mac_update; 431 432 ibd_rc_ksp->rc_conn_succ.value.ul = state->rc_conn_succ; 433 ibd_rc_ksp->rc_conn_fail.value.ul = state->rc_conn_fail; 434 ibd_rc_ksp->rc_null_conn.value.ul = state->rc_null_conn; 435 ibd_rc_ksp->rc_no_estab_conn.value.ul = state->rc_no_estab_conn; 436 437 ibd_rc_ksp->rc_act_close.value.ul = state->rc_act_close; 438 ibd_rc_ksp->rc_pas_close.value.ul = state->rc_pas_close; 439 ibd_rc_ksp->rc_delay_ace_recycle.value.ul = state->rc_delay_ace_recycle; 440 ibd_rc_ksp->rc_act_close_simultaneous.value.ul = 441 state->rc_act_close_simultaneous; 442 ibd_rc_ksp->rc_reset_cnt.value.ul = state->rc_reset_cnt; 443 ibd_rc_ksp->rc_timeout_act.value.ul = state->rc_timeout_act; 444 ibd_rc_ksp->rc_timeout_pas.value.ul = state->rc_timeout_pas; 445 446 return (0); 447 } 448 449 450 /* 451 * ibd_rc_init_stats - initialize kstat data structures 452 * 453 * This routine will create and initialize the driver private 454 * statistics counters. 455 */ 456 int 457 ibd_rc_init_stats(ibd_state_t *state) 458 { 459 kstat_t *ksp; 460 ibd_rc_stat_t *ibd_rc_ksp; 461 char stat_name[KSTAT_STRLEN]; 462 int inst; 463 464 /* 465 * Create and init kstat 466 */ 467 inst = ddi_get_instance(state->id_dip); 468 (void) snprintf(stat_name, KSTAT_STRLEN, "statistics%d_%x_%u", inst, 469 state->id_pkey, state->id_plinkid); 470 ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED, 471 sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0); 472 473 if (ksp == NULL) { 474 ibd_print_warn(state, "ibd_rc_init_stats: Could not create " 475 "kernel statistics"); 476 return (DDI_FAILURE); 477 } 478 479 state->rc_ksp = ksp; /* Fill in the ksp of ibd over RC mode */ 480 481 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data; 482 483 /* 484 * Initialize all the statistics 485 */ 486 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_byte, "RC: Rx Bytes, " 487 "transfer mode", KSTAT_DATA_ULONG); 488 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_pkt, "RC: Rx Pkts, " 489 "transfer mode", KSTAT_DATA_ULONG); 490 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_byte, "RC: Rx Bytes, " 491 "copy mode", KSTAT_DATA_ULONG); 492 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_pkt, "RC: Rx Pkts, " 493 "copy mode", KSTAT_DATA_ULONG); 494 kstat_named_init(&ibd_rc_ksp->rc_rcv_alloc_fail, "RC: Rx alloc fail", 495 KSTAT_DATA_ULONG); 496 497 kstat_named_init(&ibd_rc_ksp->rc_rcq_err, "RC: fail in Recv CQ handler", 498 KSTAT_DATA_ULONG); 499 500 kstat_named_init(&ibd_rc_ksp->rc_rwqe_short, "RC: Short rwqe", 501 KSTAT_DATA_ULONG); 502 503 kstat_named_init(&ibd_rc_ksp->rc_xmt_bytes, "RC: Sent Bytes", 504 KSTAT_DATA_ULONG); 505 kstat_named_init(&ibd_rc_ksp->rc_xmt_small_pkt, 506 "RC: Tx pkt small size", KSTAT_DATA_ULONG); 507 kstat_named_init(&ibd_rc_ksp->rc_xmt_fragmented_pkt, 508 "RC: Tx pkt fragmentary", KSTAT_DATA_ULONG); 509 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_fail_pkt, 510 "RC: Tx pkt fail ibt_map_mem_iov()", KSTAT_DATA_ULONG); 511 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_succ_pkt, 512 "RC: Tx pkt succ ibt_map_mem_iov()", KSTAT_DATA_ULONG); 513 kstat_named_init(&ibd_rc_ksp->rc_ace_not_found, "RC: ace not found", 514 KSTAT_DATA_ULONG); 515 516 kstat_named_init(&ibd_rc_ksp->rc_scq_no_swqe, "RC: No swqe after " 517 "recycle", KSTAT_DATA_ULONG); 518 kstat_named_init(&ibd_rc_ksp->rc_scq_no_largebuf, "RC: No large tx buf " 519 "after recycle", KSTAT_DATA_ULONG); 520 kstat_named_init(&ibd_rc_ksp->rc_swqe_short, "RC: No swqe in ibd_send", 521 KSTAT_DATA_ULONG); 522 kstat_named_init(&ibd_rc_ksp->rc_swqe_mac_update, "RC: mac_tx_update " 523 "#, swqe available", KSTAT_DATA_ULONG); 524 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_short, "RC: No buf in " 525 "ibd_send", KSTAT_DATA_ULONG); 526 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_mac_update, "RC: " 527 "mac_tx_update #, buf available", KSTAT_DATA_ULONG); 528 529 kstat_named_init(&ibd_rc_ksp->rc_conn_succ, "RC: succ connected", 530 KSTAT_DATA_ULONG); 531 kstat_named_init(&ibd_rc_ksp->rc_conn_fail, "RC: fail connect", 532 KSTAT_DATA_ULONG); 533 kstat_named_init(&ibd_rc_ksp->rc_null_conn, "RC: null conn for unicast " 534 "pkt", KSTAT_DATA_ULONG); 535 kstat_named_init(&ibd_rc_ksp->rc_no_estab_conn, "RC: not in act estab " 536 "state", KSTAT_DATA_ULONG); 537 538 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: call ibd_rc_act_close", 539 KSTAT_DATA_ULONG); 540 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: call ibd_rc_pas_close", 541 KSTAT_DATA_ULONG); 542 kstat_named_init(&ibd_rc_ksp->rc_delay_ace_recycle, "RC: delay ace " 543 "recycle", KSTAT_DATA_ULONG); 544 kstat_named_init(&ibd_rc_ksp->rc_act_close_simultaneous, "RC: " 545 "simultaneous ibd_rc_act_close", KSTAT_DATA_ULONG); 546 kstat_named_init(&ibd_rc_ksp->rc_reset_cnt, "RC: Reset RC channel", 547 KSTAT_DATA_ULONG); 548 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: timeout act side", 549 KSTAT_DATA_ULONG); 550 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: timeout pas side", 551 KSTAT_DATA_ULONG); 552 553 /* 554 * Function to provide kernel stat update on demand 555 */ 556 ksp->ks_update = ibd_rc_update_stats; 557 558 /* 559 * Pointer into provider's raw statistics 560 */ 561 ksp->ks_private = (void *)state; 562 563 /* 564 * Add kstat to systems kstat chain 565 */ 566 kstat_install(ksp); 567 568 return (DDI_SUCCESS); 569 } 570 #endif 571 572 static ibt_status_t 573 ibd_rc_alloc_chan(ibd_rc_chan_t **ret_chan, ibd_state_t *state, 574 boolean_t is_tx_chan) 575 { 576 ibt_status_t result; 577 ibd_rc_chan_t *chan; 578 ibt_rc_chan_alloc_args_t alloc_args; 579 ibt_chan_alloc_flags_t alloc_flags; 580 ibt_chan_sizes_t sizes; 581 ibt_cq_attr_t cq_atts; 582 int rv; 583 584 chan = kmem_zalloc(sizeof (ibd_rc_chan_t), KM_SLEEP); 585 586 chan->state = state; 587 mutex_init(&chan->rx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&chan->rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 589 mutex_init(&chan->tx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 590 mutex_init(&chan->tx_rel_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 591 mutex_init(&chan->tx_post_lock, NULL, MUTEX_DRIVER, NULL); 592 mutex_init(&chan->tx_poll_lock, NULL, MUTEX_DRIVER, NULL); 593 594 /* Allocate IB structures for a new RC channel. */ 595 if (is_tx_chan) { 596 chan->scq_size = state->id_rc_num_swqe; 597 chan->rcq_size = IBD_RC_MIN_CQ_SIZE; 598 } else { 599 chan->scq_size = IBD_RC_MIN_CQ_SIZE; 600 chan->rcq_size = state->id_rc_num_rwqe; 601 } 602 cq_atts.cq_size = chan->scq_size; 603 cq_atts.cq_sched = NULL; 604 cq_atts.cq_flags = IBT_CQ_NO_FLAGS; 605 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->scq_hdl, 606 &chan->scq_size); 607 if (result != IBT_SUCCESS) { 608 DPRINT(40, "ibd_rc_alloc_chan: error <%d>" 609 "create scq completion queue (size <%d>)", 610 result, chan->scq_size); 611 goto alloc_scq_err; 612 } /* if failure to alloc cq */ 613 614 if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count, 615 state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) { 616 DPRINT(30, "ibd_rc_alloc_chan: Send CQ " 617 "interrupt moderation failed"); 618 } 619 620 ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan); 621 ibt_set_cq_handler(chan->scq_hdl, ibd_rc_scq_handler, 622 (void *) (uintptr_t)chan); 623 624 cq_atts.cq_size = chan->rcq_size; 625 cq_atts.cq_sched = NULL; 626 cq_atts.cq_flags = IBT_CQ_NO_FLAGS; 627 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->rcq_hdl, 628 &chan->rcq_size); 629 if (result != IBT_SUCCESS) { 630 ibd_print_warn(state, "ibd_rc_alloc_chan: error <%d> creating " 631 "rx completion queue (size <%d>)", result, chan->rcq_size); 632 goto alloc_rcq_err; 633 } /* if failure to alloc cq */ 634 635 if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count, 636 state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) { 637 DPRINT(30, "ibd_rc_alloc_chan: Receive CQ " 638 "interrupt moderation failed"); 639 } 640 641 ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan); 642 ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler, 643 (void *)(uintptr_t)chan); 644 645 if (is_tx_chan) { 646 chan->is_tx_chan = B_TRUE; 647 if (ibd_rc_init_txlist(chan) != DDI_SUCCESS) { 648 ibd_print_warn(state, "ibd_rc_alloc_chan: " 649 "ibd_rc_init_txlist failed"); 650 goto init_txlist_err; 651 } 652 if (ibd_rc_tx_softintr == 1) { 653 if ((rv = ddi_add_softintr(state->id_dip, 654 DDI_SOFTINT_LOW, &chan->scq_softintr, NULL, NULL, 655 ibd_rc_tx_recycle, (caddr_t)chan)) != 656 DDI_SUCCESS) { 657 DPRINT(10, "ibd_rc_alloc_chan: failed in " 658 "ddi_add_softintr(scq_softintr), ret=%d", 659 rv); 660 goto alloc_softintr_err; 661 } 662 } 663 } else { 664 chan->is_tx_chan = B_FALSE; 665 } 666 667 /* 668 * enable completions 669 */ 670 result = ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION); 671 if (result != IBT_SUCCESS) { 672 ibd_print_warn(state, "ibd_rc_alloc_chan: ibt_enable_cq_notify" 673 "(scq) failed: status %d\n", result); 674 goto alloc_scq_enable_err; 675 } 676 677 /* We will enable chan->rcq_hdl later. */ 678 679 /* alloc a RC channel */ 680 bzero(&alloc_args, sizeof (ibt_rc_chan_alloc_args_t)); 681 bzero(&sizes, sizeof (ibt_chan_sizes_t)); 682 683 alloc_args.rc_flags = IBT_WR_SIGNALED; 684 alloc_args.rc_control = IBT_CEP_NO_FLAGS; 685 686 alloc_args.rc_scq = chan->scq_hdl; 687 alloc_args.rc_rcq = chan->rcq_hdl; 688 alloc_args.rc_pd = state->id_pd_hdl; 689 690 alloc_args.rc_hca_port_num = state->id_port; 691 alloc_args.rc_clone_chan = NULL; 692 693 /* scatter/gather */ 694 alloc_args.rc_sizes.cs_sq_sgl = state->rc_tx_max_sqseg; 695 696 /* 697 * For the number of SGL elements in receive side, I think it 698 * should be 1. Because ibd driver allocates a whole block memory 699 * for each ibt_post_recv(). 700 */ 701 alloc_args.rc_sizes.cs_rq_sgl = 1; 702 703 /* The send queue size and the receive queue size */ 704 alloc_args.rc_sizes.cs_sq = chan->scq_size; 705 alloc_args.rc_sizes.cs_rq = chan->rcq_size; 706 707 if (state->id_hca_res_lkey_capab) { 708 alloc_args.rc_flags = IBT_FAST_REG_RES_LKEY; 709 } else { 710 DPRINT(40, "ibd_rc_alloc_chan: not support reserved lkey"); 711 } 712 713 if (state->rc_enable_srq) { 714 alloc_flags = IBT_ACHAN_USES_SRQ; 715 alloc_args.rc_srq = state->rc_srq_hdl; 716 } else { 717 alloc_flags = IBT_ACHAN_NO_FLAGS; 718 } 719 720 result = ibt_alloc_rc_channel(state->id_hca_hdl, 721 alloc_flags, &alloc_args, &chan->chan_hdl, &sizes); 722 if (result != IBT_SUCCESS) { 723 ibd_print_warn(state, "ibd_rc_alloc_chan: ibd_rc_open_channel" 724 " fail:<%d>", result); 725 goto alloc_scq_enable_err; 726 } 727 728 if (is_tx_chan) 729 atomic_inc_32(&state->rc_num_tx_chan); 730 else 731 atomic_inc_32(&state->rc_num_rx_chan); 732 733 /* For the connection reaper routine ibd_rc_conn_timeout_call() */ 734 chan->is_used = B_TRUE; 735 736 *ret_chan = chan; 737 return (IBT_SUCCESS); 738 739 alloc_scq_enable_err: 740 if (is_tx_chan) { 741 if (ibd_rc_tx_softintr == 1) { 742 ddi_remove_softintr(chan->scq_softintr); 743 } 744 } 745 alloc_softintr_err: 746 if (is_tx_chan) { 747 ibd_rc_fini_txlist(chan); 748 } 749 init_txlist_err: 750 (void) ibt_free_cq(chan->rcq_hdl); 751 alloc_rcq_err: 752 (void) ibt_free_cq(chan->scq_hdl); 753 alloc_scq_err: 754 mutex_destroy(&chan->tx_poll_lock); 755 mutex_destroy(&chan->tx_post_lock); 756 mutex_destroy(&chan->tx_rel_list.dl_mutex); 757 mutex_destroy(&chan->tx_wqe_list.dl_mutex); 758 mutex_destroy(&chan->rx_free_list.dl_mutex); 759 mutex_destroy(&chan->rx_wqe_list.dl_mutex); 760 kmem_free(chan, sizeof (ibd_rc_chan_t)); 761 return (result); 762 } 763 764 static void 765 ibd_rc_free_chan(ibd_rc_chan_t *chan) 766 { 767 ibt_status_t ret; 768 769 /* DPRINT(30, "ibd_rc_free_chan: chan=%p", chan); */ 770 771 if (chan->chan_hdl != NULL) { 772 ret = ibt_free_channel(chan->chan_hdl); 773 if (ret != IBT_SUCCESS) { 774 DPRINT(40, "ib_rc_free_chan: ibt_free_channel failed, " 775 "chan=%p, returned: %d", chan, ret); 776 return; 777 } 778 chan->chan_hdl = NULL; 779 } 780 781 if (chan->rcq_hdl != NULL) { 782 ret = ibt_free_cq(chan->rcq_hdl); 783 if (ret != IBT_SUCCESS) { 784 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(rcq) failed, " 785 "chan=%p, returned: %d", chan, ret); 786 return; 787 } 788 chan->rcq_hdl = NULL; 789 } 790 791 if (chan->scq_hdl != NULL) { 792 ret = ibt_free_cq(chan->scq_hdl); 793 if (ret != IBT_SUCCESS) { 794 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(scq) failed, " 795 "chan=%p, returned: %d", chan, ret); 796 return; 797 } 798 chan->scq_hdl = NULL; 799 } 800 801 /* Free buffers */ 802 if (chan->is_tx_chan) { 803 ibd_rc_fini_txlist(chan); 804 if (ibd_rc_tx_softintr == 1) { 805 ddi_remove_softintr(chan->scq_softintr); 806 } 807 atomic_dec_32(&chan->state->rc_num_tx_chan); 808 } else { 809 if (!chan->state->rc_enable_srq) { 810 ibd_rc_fini_rxlist(chan); 811 } 812 atomic_dec_32(&chan->state->rc_num_rx_chan); 813 } 814 815 mutex_destroy(&chan->tx_poll_lock); 816 mutex_destroy(&chan->tx_post_lock); 817 mutex_destroy(&chan->tx_rel_list.dl_mutex); 818 mutex_destroy(&chan->tx_wqe_list.dl_mutex); 819 mutex_destroy(&chan->rx_free_list.dl_mutex); 820 mutex_destroy(&chan->rx_wqe_list.dl_mutex); 821 822 /* 823 * If it is a passive channel, must make sure it has been removed 824 * from chan->state->rc_pass_chan_list 825 */ 826 kmem_free(chan, sizeof (ibd_rc_chan_t)); 827 } 828 829 /* Add a RC channel */ 830 static inline void 831 ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan) 832 { 833 mutex_enter(&list->chan_list_mutex); 834 if (list->chan_list == NULL) { 835 list->chan_list = chan; 836 chan->next = NULL; 837 } else { 838 chan->next = list->chan_list; 839 list->chan_list = chan; 840 } 841 mutex_exit(&list->chan_list_mutex); 842 } 843 844 static boolean_t 845 ibd_rc_re_add_to_pas_chan_list(ibd_rc_chan_t *chan) 846 { 847 ibd_state_t *state = chan->state; 848 849 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 850 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) { 851 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 852 return (B_FALSE); 853 } else { 854 if (state->rc_pass_chan_list.chan_list == NULL) { 855 state->rc_pass_chan_list.chan_list = chan; 856 chan->next = NULL; 857 } else { 858 chan->next = state->rc_pass_chan_list.chan_list; 859 state->rc_pass_chan_list.chan_list = chan; 860 } 861 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 862 return (B_TRUE); 863 } 864 } 865 866 /* Remove a RC channel */ 867 static inline ibd_rc_chan_t * 868 ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan) 869 { 870 ibd_rc_chan_t *pre_chan; 871 872 mutex_enter(&list->chan_list_mutex); 873 if (list->chan_list == chan) { 874 DPRINT(30, "ibd_rc_rm_from_chan_list(first): found chan(%p)" 875 " in chan_list", chan); 876 list->chan_list = chan->next; 877 } else { 878 pre_chan = list->chan_list; 879 while (pre_chan != NULL) { 880 if (pre_chan->next == chan) { 881 DPRINT(30, "ibd_rc_rm_from_chan_list" 882 "(middle): found chan(%p)", chan); 883 pre_chan->next = chan->next; 884 break; 885 } 886 pre_chan = pre_chan->next; 887 } 888 if (pre_chan == NULL) 889 chan = NULL; 890 } 891 mutex_exit(&list->chan_list_mutex); 892 return (chan); 893 } 894 895 static inline ibd_rc_chan_t * 896 ibd_rc_rm_header_chan_list(ibd_rc_chan_list_t *list) 897 { 898 ibd_rc_chan_t *rc_chan; 899 900 mutex_enter(&list->chan_list_mutex); 901 rc_chan = list->chan_list; 902 if (rc_chan != NULL) { 903 list->chan_list = rc_chan->next; 904 } 905 mutex_exit(&list->chan_list_mutex); 906 return (rc_chan); 907 } 908 909 static int 910 ibd_rc_alloc_srq_copybufs(ibd_state_t *state) 911 { 912 ibt_mr_attr_t mem_attr; 913 uint_t rc_rx_bufs_sz; 914 915 /* 916 * Allocate one big chunk for all regular rx copy bufs 917 */ 918 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * state->rc_srq_size; 919 920 state->rc_srq_rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP); 921 922 state->rc_srq_rwqes = kmem_zalloc(state->rc_srq_size * 923 sizeof (ibd_rwqe_t), KM_SLEEP); 924 925 /* 926 * Do one memory registration on the entire rxbuf area 927 */ 928 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_srq_rx_bufs; 929 mem_attr.mr_len = rc_rx_bufs_sz; 930 mem_attr.mr_as = NULL; 931 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 932 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 933 &state->rc_srq_rx_mr_hdl, &state->rc_srq_rx_mr_desc) 934 != IBT_SUCCESS) { 935 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr() " 936 "failed"); 937 kmem_free(state->rc_srq_rwqes, 938 state->rc_srq_size * sizeof (ibd_rwqe_t)); 939 kmem_free(state->rc_srq_rx_bufs, rc_rx_bufs_sz); 940 state->rc_srq_rx_bufs = NULL; 941 state->rc_srq_rwqes = NULL; 942 return (DDI_FAILURE); 943 } 944 945 return (DDI_SUCCESS); 946 } 947 948 static void 949 ibd_rc_free_srq_copybufs(ibd_state_t *state) 950 { 951 uint_t rc_rx_buf_sz; 952 953 /* 954 * Don't change the value of state->rc_mtu at the period from call 955 * ibd_rc_alloc_srq_copybufs() to call ibd_rc_free_srq_copybufs(). 956 */ 957 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE; 958 959 /* 960 * Unregister rxbuf mr 961 */ 962 if (ibt_deregister_mr(state->id_hca_hdl, 963 state->rc_srq_rx_mr_hdl) != IBT_SUCCESS) { 964 DPRINT(40, "ibd_rc_free_srq_copybufs: ibt_deregister_mr()" 965 " failed"); 966 } 967 state->rc_srq_rx_mr_hdl = NULL; 968 969 /* 970 * Free rxbuf memory 971 */ 972 kmem_free(state->rc_srq_rwqes, 973 state->rc_srq_size * sizeof (ibd_rwqe_t)); 974 kmem_free(state->rc_srq_rx_bufs, state->rc_srq_size * rc_rx_buf_sz); 975 state->rc_srq_rwqes = NULL; 976 state->rc_srq_rx_bufs = NULL; 977 } 978 979 /* 980 * Allocate and post a certain number of SRQ receive buffers and WRs. 981 */ 982 int 983 ibd_rc_init_srq_list(ibd_state_t *state) 984 { 985 ibd_rwqe_t *rwqe; 986 ibt_lkey_t lkey; 987 int i; 988 uint_t len; 989 uint8_t *bufaddr; 990 ibt_srq_sizes_t srq_sizes; 991 ibt_srq_sizes_t srq_real_sizes; 992 ibt_status_t ret; 993 994 srq_sizes.srq_sgl_sz = 1; 995 srq_sizes.srq_wr_sz = state->id_rc_num_srq; 996 ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS, 997 state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes); 998 if (ret != IBT_SUCCESS) { 999 /* 1000 * The following code is for CR 6932460 (can't configure ibd 1001 * interface on 32 bits x86 systems). 32 bits x86 system has 1002 * less memory resource than 64 bits x86 system. If current 1003 * resource request can't be satisfied, we request less 1004 * resource here. 1005 */ 1006 len = state->id_rc_num_srq; 1007 while ((ret == IBT_HCA_WR_EXCEEDED) && 1008 (len >= 2 * IBD_RC_MIN_CQ_SIZE)) { 1009 len = len/2; 1010 srq_sizes.srq_sgl_sz = 1; 1011 srq_sizes.srq_wr_sz = len; 1012 ret = ibt_alloc_srq(state->id_hca_hdl, 1013 IBT_SRQ_NO_FLAGS, state->id_pd_hdl, &srq_sizes, 1014 &state->rc_srq_hdl, &srq_real_sizes); 1015 } 1016 if (ret != IBT_SUCCESS) { 1017 DPRINT(10, "ibd_rc_init_srq_list: ibt_alloc_srq failed." 1018 "req_sgl_sz=%d, req_wr_sz=0x%x, final_req_wr_sz=" 1019 "0x%x, ret=%d", srq_sizes.srq_sgl_sz, 1020 srq_sizes.srq_wr_sz, len, ret); 1021 return (DDI_FAILURE); 1022 } 1023 state->id_rc_num_srq = len; 1024 state->id_rc_num_rwqe = state->id_rc_num_srq + 1; 1025 } 1026 1027 state->rc_srq_size = srq_real_sizes.srq_wr_sz; 1028 if (ibd_rc_alloc_srq_copybufs(state) != DDI_SUCCESS) { 1029 ret = ibt_free_srq(state->rc_srq_hdl); 1030 if (ret != IBT_SUCCESS) { 1031 ibd_print_warn(state, "ibd_rc_init_srq_list: " 1032 "ibt_free_srq fail, ret=%d", ret); 1033 } 1034 return (DDI_FAILURE); 1035 } 1036 1037 /* 1038 * Allocate and setup the rwqe list 1039 */ 1040 lkey = state->rc_srq_rx_mr_desc.md_lkey; 1041 rwqe = state->rc_srq_rwqes; 1042 bufaddr = state->rc_srq_rx_bufs; 1043 len = state->rc_mtu + IPOIB_GRH_SIZE; 1044 state->rc_srq_rwqe_list.dl_cnt = 0; 1045 state->rc_srq_rwqe_list.dl_bufs_outstanding = 0; 1046 for (i = 0; i < state->rc_srq_size; i++, rwqe++, bufaddr += len) { 1047 rwqe->w_state = state; 1048 rwqe->w_freeing_wqe = B_FALSE; 1049 rwqe->w_freemsg_cb.free_func = ibd_rc_srq_freemsg_cb; 1050 rwqe->w_freemsg_cb.free_arg = (char *)rwqe; 1051 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr; 1052 1053 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0, 1054 &rwqe->w_freemsg_cb)) == NULL) { 1055 DPRINT(40, "ibd_rc_init_srq_list : desballoc() failed"); 1056 rwqe->rwqe_copybuf.ic_bufaddr = NULL; 1057 if (atomic_dec_32_nv(&state->id_running) != 0) { 1058 cmn_err(CE_WARN, "ibd_rc_init_srq_list: " 1059 "id_running was not 1\n"); 1060 } 1061 ibd_rc_fini_srq_list(state); 1062 atomic_inc_32(&state->id_running); 1063 return (DDI_FAILURE); 1064 } 1065 1066 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey; 1067 /* Leave IPOIB_GRH_SIZE space */ 1068 rwqe->rwqe_copybuf.ic_sgl.ds_va = 1069 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE); 1070 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu; 1071 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe; 1072 rwqe->w_rwr.wr_nds = 1; 1073 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl; 1074 (void) ibd_rc_post_srq(state, rwqe); 1075 } 1076 1077 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1078 state->rc_srq_free_list.dl_head = NULL; 1079 state->rc_srq_free_list.dl_cnt = 0; 1080 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1081 1082 return (DDI_SUCCESS); 1083 } 1084 1085 /* 1086 * Free the statically allocated Rx buffer list for SRQ. 1087 */ 1088 void 1089 ibd_rc_fini_srq_list(ibd_state_t *state) 1090 { 1091 ibd_rwqe_t *rwqe; 1092 int i; 1093 ibt_status_t ret; 1094 1095 ASSERT(state->id_running == 0); 1096 ret = ibt_free_srq(state->rc_srq_hdl); 1097 if (ret != IBT_SUCCESS) { 1098 ibd_print_warn(state, "ibd_rc_fini_srq_list: " 1099 "ibt_free_srq fail, ret=%d", ret); 1100 } 1101 1102 mutex_enter(&state->rc_srq_rwqe_list.dl_mutex); 1103 rwqe = state->rc_srq_rwqes; 1104 for (i = 0; i < state->rc_srq_size; i++, rwqe++) { 1105 if (rwqe->rwqe_im_mblk != NULL) { 1106 rwqe->w_freeing_wqe = B_TRUE; 1107 freemsg(rwqe->rwqe_im_mblk); 1108 } 1109 } 1110 mutex_exit(&state->rc_srq_rwqe_list.dl_mutex); 1111 1112 ibd_rc_free_srq_copybufs(state); 1113 } 1114 1115 /* Repost the elements in state->ib_rc_free_list */ 1116 int 1117 ibd_rc_repost_srq_free_list(ibd_state_t *state) 1118 { 1119 ibd_rwqe_t *rwqe; 1120 ibd_wqe_t *list; 1121 uint_t len; 1122 1123 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1124 if (state->rc_srq_free_list.dl_head != NULL) { 1125 /* repost them */ 1126 len = state->rc_mtu + IPOIB_GRH_SIZE; 1127 list = state->rc_srq_free_list.dl_head; 1128 state->rc_srq_free_list.dl_head = NULL; 1129 state->rc_srq_free_list.dl_cnt = 0; 1130 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1131 while (list != NULL) { 1132 rwqe = WQE_TO_RWQE(list); 1133 if ((rwqe->rwqe_im_mblk == NULL) && 1134 ((rwqe->rwqe_im_mblk = desballoc( 1135 rwqe->rwqe_copybuf.ic_bufaddr, len, 0, 1136 &rwqe->w_freemsg_cb)) == NULL)) { 1137 DPRINT(40, "ibd_rc_repost_srq_free_list: " 1138 "failed in desballoc()"); 1139 do { 1140 ibd_rc_srq_free_rwqe(state, rwqe); 1141 list = list->w_next; 1142 rwqe = WQE_TO_RWQE(list); 1143 } while (list != NULL); 1144 return (DDI_FAILURE); 1145 } 1146 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1147 ibd_rc_srq_free_rwqe(state, rwqe); 1148 } 1149 list = list->w_next; 1150 } 1151 return (DDI_SUCCESS); 1152 } 1153 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1154 return (DDI_SUCCESS); 1155 } 1156 1157 /* 1158 * Free an allocated recv wqe. 1159 */ 1160 static void 1161 ibd_rc_srq_free_rwqe(ibd_state_t *state, ibd_rwqe_t *rwqe) 1162 { 1163 /* 1164 * desballoc() failed (no memory) or the posting of rwqe failed. 1165 * 1166 * This rwqe is placed on a free list so that it 1167 * can be reinstated in future. 1168 * 1169 * NOTE: no code currently exists to reinstate 1170 * these "lost" rwqes. 1171 */ 1172 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1173 state->rc_srq_free_list.dl_cnt++; 1174 rwqe->rwqe_next = state->rc_srq_free_list.dl_head; 1175 state->rc_srq_free_list.dl_head = RWQE_TO_WQE(rwqe); 1176 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1177 } 1178 1179 static void 1180 ibd_rc_srq_freemsg_cb(char *arg) 1181 { 1182 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg; 1183 ibd_state_t *state = rwqe->w_state; 1184 1185 ASSERT(state->rc_enable_srq); 1186 1187 /* 1188 * If the driver is stopped, just free the rwqe. 1189 */ 1190 if (atomic_add_32_nv(&state->id_running, 0) == 0) { 1191 if (!rwqe->w_freeing_wqe) { 1192 atomic_dec_32( 1193 &state->rc_srq_rwqe_list.dl_bufs_outstanding); 1194 DPRINT(6, "ibd_rc_srq_freemsg_cb: wqe being freed"); 1195 rwqe->rwqe_im_mblk = NULL; 1196 ibd_rc_srq_free_rwqe(state, rwqe); 1197 } 1198 return; 1199 } 1200 1201 atomic_dec_32(&state->rc_srq_rwqe_list.dl_bufs_outstanding); 1202 1203 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size); 1204 ASSERT(!rwqe->w_freeing_wqe); 1205 1206 /* 1207 * Upper layer has released held mblk, so we have 1208 * no more use for keeping the old pointer in 1209 * our rwqe. 1210 */ 1211 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr, 1212 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb); 1213 if (rwqe->rwqe_im_mblk == NULL) { 1214 DPRINT(40, "ibd_rc_srq_freemsg_cb: desballoc failed"); 1215 ibd_rc_srq_free_rwqe(state, rwqe); 1216 return; 1217 } 1218 1219 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1220 ibd_print_warn(state, "ibd_rc_srq_freemsg_cb: ibd_rc_post_srq" 1221 " failed"); 1222 ibd_rc_srq_free_rwqe(state, rwqe); 1223 return; 1224 } 1225 } 1226 1227 /* 1228 * Post a rwqe to the hardware and add it to the Rx list. 1229 */ 1230 static int 1231 ibd_rc_post_srq(ibd_state_t *state, ibd_rwqe_t *rwqe) 1232 { 1233 /* 1234 * Here we should add dl_cnt before post recv, because 1235 * we would have to make sure dl_cnt is updated before 1236 * the corresponding ibd_rc_process_rx() is called. 1237 */ 1238 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size); 1239 atomic_add_32(&state->rc_srq_rwqe_list.dl_cnt, 1); 1240 if (ibt_post_srq(state->rc_srq_hdl, &rwqe->w_rwr, 1, NULL) != 1241 IBT_SUCCESS) { 1242 atomic_dec_32(&state->rc_srq_rwqe_list.dl_cnt); 1243 DPRINT(40, "ibd_rc_post_srq : ibt_post_srq() failed"); 1244 return (DDI_FAILURE); 1245 } 1246 1247 return (DDI_SUCCESS); 1248 } 1249 1250 /* 1251 * Post a rwqe to the hardware and add it to the Rx list. 1252 */ 1253 static int 1254 ibd_rc_post_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe) 1255 { 1256 /* 1257 * Here we should add dl_cnt before post recv, because we would 1258 * have to make sure dl_cnt has already updated before 1259 * corresponding ibd_rc_process_rx() is called. 1260 */ 1261 atomic_add_32(&chan->rx_wqe_list.dl_cnt, 1); 1262 if (ibt_post_recv(chan->chan_hdl, &rwqe->w_rwr, 1, NULL) != 1263 IBT_SUCCESS) { 1264 atomic_dec_32(&chan->rx_wqe_list.dl_cnt); 1265 DPRINT(40, "ibd_rc_post_rwqe : failed in ibt_post_recv()"); 1266 return (DDI_FAILURE); 1267 } 1268 return (DDI_SUCCESS); 1269 } 1270 1271 static int 1272 ibd_rc_alloc_rx_copybufs(ibd_rc_chan_t *chan) 1273 { 1274 ibd_state_t *state = chan->state; 1275 ibt_mr_attr_t mem_attr; 1276 uint_t rc_rx_bufs_sz; 1277 1278 /* 1279 * Allocate one big chunk for all regular rx copy bufs 1280 */ 1281 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * chan->rcq_size; 1282 1283 chan->rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP); 1284 1285 chan->rx_rwqes = kmem_zalloc(chan->rcq_size * 1286 sizeof (ibd_rwqe_t), KM_SLEEP); 1287 1288 /* 1289 * Do one memory registration on the entire rxbuf area 1290 */ 1291 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->rx_bufs; 1292 mem_attr.mr_len = rc_rx_bufs_sz; 1293 mem_attr.mr_as = NULL; 1294 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 1295 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1296 &chan->rx_mr_hdl, &chan->rx_mr_desc) != IBT_SUCCESS) { 1297 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr failed"); 1298 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t)); 1299 kmem_free(chan->rx_bufs, rc_rx_bufs_sz); 1300 chan->rx_bufs = NULL; 1301 chan->rx_rwqes = NULL; 1302 return (DDI_FAILURE); 1303 } 1304 1305 return (DDI_SUCCESS); 1306 } 1307 1308 static void 1309 ibd_rc_free_rx_copybufs(ibd_rc_chan_t *chan) 1310 { 1311 ibd_state_t *state = chan->state; 1312 uint_t rc_rx_buf_sz; 1313 1314 ASSERT(!state->rc_enable_srq); 1315 ASSERT(chan->rx_rwqes != NULL); 1316 ASSERT(chan->rx_bufs != NULL); 1317 1318 /* 1319 * Don't change the value of state->rc_mtu at the period from call 1320 * ibd_rc_alloc_rx_copybufs() to call ibd_rc_free_rx_copybufs(). 1321 */ 1322 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE; 1323 1324 /* 1325 * Unregister rxbuf mr 1326 */ 1327 if (ibt_deregister_mr(state->id_hca_hdl, 1328 chan->rx_mr_hdl) != IBT_SUCCESS) { 1329 DPRINT(40, "ibd_rc_free_rx_copybufs: ibt_deregister_mr failed"); 1330 } 1331 chan->rx_mr_hdl = NULL; 1332 1333 /* 1334 * Free rxbuf memory 1335 */ 1336 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t)); 1337 chan->rx_rwqes = NULL; 1338 1339 kmem_free(chan->rx_bufs, chan->rcq_size * rc_rx_buf_sz); 1340 chan->rx_bufs = NULL; 1341 } 1342 1343 /* 1344 * Post a certain number of receive buffers and WRs on a RC channel. 1345 */ 1346 static int 1347 ibd_rc_init_rxlist(ibd_rc_chan_t *chan) 1348 { 1349 ibd_state_t *state = chan->state; 1350 ibd_rwqe_t *rwqe; 1351 ibt_lkey_t lkey; 1352 int i; 1353 uint_t len; 1354 uint8_t *bufaddr; 1355 1356 ASSERT(!state->rc_enable_srq); 1357 if (ibd_rc_alloc_rx_copybufs(chan) != DDI_SUCCESS) 1358 return (DDI_FAILURE); 1359 1360 /* 1361 * Allocate and setup the rwqe list 1362 */ 1363 lkey = chan->rx_mr_desc.md_lkey; 1364 rwqe = chan->rx_rwqes; 1365 bufaddr = chan->rx_bufs; 1366 len = state->rc_mtu + IPOIB_GRH_SIZE; 1367 for (i = 0; i < chan->rcq_size; i++, rwqe++, bufaddr += len) { 1368 rwqe->w_state = state; 1369 rwqe->w_chan = chan; 1370 rwqe->w_freeing_wqe = B_FALSE; 1371 rwqe->w_freemsg_cb.free_func = ibd_rc_freemsg_cb; 1372 rwqe->w_freemsg_cb.free_arg = (char *)rwqe; 1373 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr; 1374 1375 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0, 1376 &rwqe->w_freemsg_cb)) == NULL) { 1377 DPRINT(40, "ibd_rc_init_srq_list: desballoc() failed"); 1378 rwqe->rwqe_copybuf.ic_bufaddr = NULL; 1379 ibd_rc_fini_rxlist(chan); 1380 return (DDI_FAILURE); 1381 } 1382 1383 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey; 1384 rwqe->rwqe_copybuf.ic_sgl.ds_va = 1385 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE); 1386 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu; 1387 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe; 1388 rwqe->w_rwr.wr_nds = 1; 1389 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl; 1390 (void) ibd_rc_post_rwqe(chan, rwqe); 1391 } 1392 1393 return (DDI_SUCCESS); 1394 } 1395 1396 /* 1397 * Free the statically allocated Rx buffer list for SRQ. 1398 */ 1399 static void 1400 ibd_rc_fini_rxlist(ibd_rc_chan_t *chan) 1401 { 1402 ibd_rwqe_t *rwqe; 1403 int i; 1404 1405 if (chan->rx_bufs == NULL) { 1406 DPRINT(40, "ibd_rc_fini_rxlist: empty chan->rx_bufs, quit"); 1407 return; 1408 } 1409 1410 /* bufs_outstanding must be 0 */ 1411 ASSERT((chan->rx_wqe_list.dl_head == NULL) || 1412 (chan->rx_wqe_list.dl_bufs_outstanding == 0)); 1413 1414 mutex_enter(&chan->rx_wqe_list.dl_mutex); 1415 rwqe = chan->rx_rwqes; 1416 for (i = 0; i < chan->rcq_size; i++, rwqe++) { 1417 if (rwqe->rwqe_im_mblk != NULL) { 1418 rwqe->w_freeing_wqe = B_TRUE; 1419 freemsg(rwqe->rwqe_im_mblk); 1420 } 1421 } 1422 mutex_exit(&chan->rx_wqe_list.dl_mutex); 1423 1424 ibd_rc_free_rx_copybufs(chan); 1425 } 1426 1427 /* 1428 * Free an allocated recv wqe. 1429 */ 1430 static void 1431 ibd_rc_free_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe) 1432 { 1433 /* 1434 * desballoc() failed (no memory) or the posting of rwqe failed. 1435 * 1436 * This rwqe is placed on a free list so that it 1437 * can be reinstated in future. 1438 * 1439 * NOTE: no code currently exists to reinstate 1440 * these "lost" rwqes. 1441 */ 1442 mutex_enter(&chan->rx_free_list.dl_mutex); 1443 chan->rx_free_list.dl_cnt++; 1444 rwqe->rwqe_next = chan->rx_free_list.dl_head; 1445 chan->rx_free_list.dl_head = RWQE_TO_WQE(rwqe); 1446 mutex_exit(&chan->rx_free_list.dl_mutex); 1447 } 1448 1449 /* 1450 * Processing to be done after receipt of a packet; hand off to GLD 1451 * in the format expected by GLD. 1452 */ 1453 static void 1454 ibd_rc_process_rx(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe, ibt_wc_t *wc) 1455 { 1456 ibd_state_t *state = chan->state; 1457 ib_header_info_t *phdr; 1458 ipoib_hdr_t *ipibp; 1459 mblk_t *mp; 1460 mblk_t *mpc; 1461 int rxcnt; 1462 ip6_t *ip6h; 1463 int len; 1464 1465 /* 1466 * Track number handed to upper layer, and number still 1467 * available to receive packets. 1468 */ 1469 if (state->rc_enable_srq) { 1470 rxcnt = atomic_dec_32_nv(&state->rc_srq_rwqe_list.dl_cnt); 1471 } else { 1472 rxcnt = atomic_dec_32_nv(&chan->rx_wqe_list.dl_cnt); 1473 } 1474 1475 /* 1476 * It can not be a IBA multicast packet. 1477 */ 1478 ASSERT(!wc->wc_flags & IBT_WC_GRH_PRESENT); 1479 1480 /* For the connection reaper routine ibd_rc_conn_timeout_call() */ 1481 chan->is_used = B_TRUE; 1482 1483 #ifdef DEBUG 1484 if (rxcnt < state->id_rc_rx_rwqe_thresh) { 1485 state->rc_rwqe_short++; 1486 } 1487 #endif 1488 1489 /* 1490 * Possibly replenish the Rx pool if needed. 1491 */ 1492 if ((rxcnt >= state->id_rc_rx_rwqe_thresh) && 1493 (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) { 1494 atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer); 1495 atomic_inc_64(&state->rc_rcv_trans_pkt); 1496 1497 /* 1498 * Record how many rwqe has been occupied by upper 1499 * network layer 1500 */ 1501 if (state->rc_enable_srq) { 1502 atomic_add_32(&state->rc_srq_rwqe_list. 1503 dl_bufs_outstanding, 1); 1504 } else { 1505 atomic_add_32(&chan->rx_wqe_list. 1506 dl_bufs_outstanding, 1); 1507 } 1508 mp = rwqe->rwqe_im_mblk; 1509 } else { 1510 atomic_add_64(&state->rc_rcv_copy_byte, wc->wc_bytes_xfer); 1511 atomic_inc_64(&state->rc_rcv_copy_pkt); 1512 1513 if ((mp = allocb(wc->wc_bytes_xfer + IPOIB_GRH_SIZE, 1514 BPRI_HI)) == NULL) { /* no memory */ 1515 DPRINT(40, "ibd_rc_process_rx: allocb() failed"); 1516 state->rc_rcv_alloc_fail++; 1517 if (state->rc_enable_srq) { 1518 if (ibd_rc_post_srq(state, rwqe) == 1519 DDI_FAILURE) { 1520 ibd_rc_srq_free_rwqe(state, rwqe); 1521 } 1522 } else { 1523 if (ibd_rc_post_rwqe(chan, rwqe) == 1524 DDI_FAILURE) { 1525 ibd_rc_free_rwqe(chan, rwqe); 1526 } 1527 } 1528 return; 1529 } 1530 1531 bcopy(rwqe->rwqe_im_mblk->b_rptr + IPOIB_GRH_SIZE, 1532 mp->b_wptr + IPOIB_GRH_SIZE, wc->wc_bytes_xfer); 1533 1534 if (state->rc_enable_srq) { 1535 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1536 ibd_rc_srq_free_rwqe(state, rwqe); 1537 } 1538 } else { 1539 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) { 1540 ibd_rc_free_rwqe(chan, rwqe); 1541 } 1542 } 1543 } 1544 1545 ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + IPOIB_GRH_SIZE); 1546 if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) { 1547 ip6h = (ip6_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t)); 1548 len = ntohs(ip6h->ip6_plen); 1549 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1550 /* LINTED: E_CONSTANT_CONDITION */ 1551 IBD_PAD_NSNA(ip6h, len, IBD_RECV); 1552 } 1553 } 1554 1555 phdr = (ib_header_info_t *)mp->b_rptr; 1556 phdr->ib_grh.ipoib_vertcflow = 0; 1557 ovbcopy(&state->id_macaddr, &phdr->ib_dst, 1558 sizeof (ipoib_mac_t)); 1559 mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer+ IPOIB_GRH_SIZE; 1560 1561 /* 1562 * Can RC mode in IB guarantee its checksum correctness? 1563 * 1564 * (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 1565 * HCK_FULLCKSUM | HCK_FULLCKSUM_OK, 0); 1566 */ 1567 1568 /* 1569 * Make sure this is NULL or we're in trouble. 1570 */ 1571 if (mp->b_next != NULL) { 1572 ibd_print_warn(state, 1573 "ibd_rc_process_rx: got duplicate mp from rcq?"); 1574 mp->b_next = NULL; 1575 } 1576 1577 /* 1578 * Add this mp to the list of processed mp's to send to 1579 * the nw layer 1580 */ 1581 if (state->rc_enable_srq) { 1582 mutex_enter(&state->rc_rx_lock); 1583 if (state->rc_rx_mp) { 1584 ASSERT(state->rc_rx_mp_tail != NULL); 1585 state->rc_rx_mp_tail->b_next = mp; 1586 } else { 1587 ASSERT(state->rc_rx_mp_tail == NULL); 1588 state->rc_rx_mp = mp; 1589 } 1590 1591 state->rc_rx_mp_tail = mp; 1592 state->rc_rx_mp_len++; 1593 1594 if (state->rc_rx_mp_len >= IBD_MAX_RX_MP_LEN) { 1595 mpc = state->rc_rx_mp; 1596 1597 state->rc_rx_mp = NULL; 1598 state->rc_rx_mp_tail = NULL; 1599 state->rc_rx_mp_len = 0; 1600 mutex_exit(&state->rc_rx_lock); 1601 mac_rx(state->id_mh, NULL, mpc); 1602 } else { 1603 mutex_exit(&state->rc_rx_lock); 1604 } 1605 } else { 1606 mutex_enter(&chan->rx_lock); 1607 if (chan->rx_mp) { 1608 ASSERT(chan->rx_mp_tail != NULL); 1609 chan->rx_mp_tail->b_next = mp; 1610 } else { 1611 ASSERT(chan->rx_mp_tail == NULL); 1612 chan->rx_mp = mp; 1613 } 1614 1615 chan->rx_mp_tail = mp; 1616 chan->rx_mp_len++; 1617 1618 if (chan->rx_mp_len >= IBD_MAX_RX_MP_LEN) { 1619 mpc = chan->rx_mp; 1620 1621 chan->rx_mp = NULL; 1622 chan->rx_mp_tail = NULL; 1623 chan->rx_mp_len = 0; 1624 mutex_exit(&chan->rx_lock); 1625 mac_rx(state->id_mh, NULL, mpc); 1626 } else { 1627 mutex_exit(&chan->rx_lock); 1628 } 1629 } 1630 } 1631 1632 /* 1633 * Callback code invoked from STREAMs when the recv data buffer is free 1634 * for recycling. 1635 */ 1636 static void 1637 ibd_rc_freemsg_cb(char *arg) 1638 { 1639 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg; 1640 ibd_rc_chan_t *chan = rwqe->w_chan; 1641 ibd_state_t *state = rwqe->w_state; 1642 1643 /* 1644 * If the wqe is being destructed, do not attempt recycling. 1645 */ 1646 if (rwqe->w_freeing_wqe == B_TRUE) { 1647 return; 1648 } 1649 1650 ASSERT(!state->rc_enable_srq); 1651 ASSERT(chan->rx_wqe_list.dl_cnt < chan->rcq_size); 1652 1653 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr, 1654 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb); 1655 if (rwqe->rwqe_im_mblk == NULL) { 1656 DPRINT(40, "ibd_rc_freemsg_cb: desballoc() failed"); 1657 ibd_rc_free_rwqe(chan, rwqe); 1658 return; 1659 } 1660 1661 /* 1662 * Post back to h/w. We could actually have more than 1663 * id_num_rwqe WQEs on the list if there were multiple 1664 * ibd_freemsg_cb() calls outstanding (since the lock is 1665 * not held the entire time). This will start getting 1666 * corrected over subsequent ibd_freemsg_cb() calls. 1667 */ 1668 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) { 1669 ibd_rc_free_rwqe(chan, rwqe); 1670 return; 1671 } 1672 atomic_add_32(&chan->rx_wqe_list.dl_bufs_outstanding, -1); 1673 } 1674 1675 /* 1676 * Common code for interrupt handling as well as for polling 1677 * for all completed wqe's while detaching. 1678 */ 1679 static void 1680 ibd_rc_poll_rcq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl) 1681 { 1682 ibd_wqe_t *wqe; 1683 ibt_wc_t *wc, *wcs; 1684 uint_t numwcs, real_numwcs; 1685 int i; 1686 1687 wcs = chan->rx_wc; 1688 numwcs = IBD_RC_MAX_CQ_WC; 1689 1690 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) { 1691 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) { 1692 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id; 1693 if (wc->wc_status != IBT_WC_SUCCESS) { 1694 chan->state->rc_rcq_err++; 1695 /* 1696 * Channel being torn down. 1697 */ 1698 DPRINT(40, "ibd_rc_poll_rcq: wc_status(%d) != " 1699 "SUCC, chan=%p", wc->wc_status, chan); 1700 if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) { 1701 /* 1702 * Do not invoke Rx handler because 1703 * it might add buffers to the Rx pool 1704 * when we are trying to deinitialize. 1705 */ 1706 continue; 1707 } 1708 } 1709 ibd_rc_process_rx(chan, WQE_TO_RWQE(wqe), wc); 1710 } 1711 } 1712 } 1713 1714 /* Receive CQ handler */ 1715 /* ARGSUSED */ 1716 static void 1717 ibd_rc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 1718 { 1719 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 1720 ibd_state_t *state = chan->state; 1721 1722 atomic_inc_32(&chan->rcq_invoking); 1723 ASSERT(chan->chan_state == IBD_RC_STATE_PAS_ESTAB); 1724 1725 /* 1726 * Poll for completed entries; the CQ will not interrupt any 1727 * more for incoming (or transmitted) packets. 1728 */ 1729 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 1730 1731 /* 1732 * Now enable CQ notifications; all packets that arrive now 1733 * (or complete transmission) will cause new interrupts. 1734 */ 1735 if (ibt_enable_cq_notify(chan->rcq_hdl, IBT_NEXT_COMPLETION) != 1736 IBT_SUCCESS) { 1737 /* 1738 * We do not expect a failure here. 1739 */ 1740 DPRINT(40, "ibd_rc_rcq_handler: ibt_enable_cq_notify() failed"); 1741 } 1742 1743 /* 1744 * Repoll to catch all packets that might have arrived after 1745 * we finished the first poll loop and before interrupts got 1746 * armed. 1747 */ 1748 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 1749 1750 if (state->rc_enable_srq) { 1751 mutex_enter(&state->rc_rx_lock); 1752 1753 if (state->rc_rx_mp != NULL) { 1754 mblk_t *mpc; 1755 mpc = state->rc_rx_mp; 1756 1757 state->rc_rx_mp = NULL; 1758 state->rc_rx_mp_tail = NULL; 1759 state->rc_rx_mp_len = 0; 1760 1761 mutex_exit(&state->rc_rx_lock); 1762 mac_rx(state->id_mh, NULL, mpc); 1763 } else { 1764 mutex_exit(&state->rc_rx_lock); 1765 } 1766 } else { 1767 mutex_enter(&chan->rx_lock); 1768 1769 if (chan->rx_mp != NULL) { 1770 mblk_t *mpc; 1771 mpc = chan->rx_mp; 1772 1773 chan->rx_mp = NULL; 1774 chan->rx_mp_tail = NULL; 1775 chan->rx_mp_len = 0; 1776 1777 mutex_exit(&chan->rx_lock); 1778 mac_rx(state->id_mh, NULL, mpc); 1779 } else { 1780 mutex_exit(&chan->rx_lock); 1781 } 1782 } 1783 atomic_dec_32(&chan->rcq_invoking); 1784 } 1785 1786 /* 1787 * Allocate the statically allocated Tx buffer list. 1788 */ 1789 int 1790 ibd_rc_init_tx_largebuf_list(ibd_state_t *state) 1791 { 1792 ibd_rc_tx_largebuf_t *lbufp; 1793 ibd_rc_tx_largebuf_t *tail; 1794 uint8_t *memp; 1795 ibt_mr_attr_t mem_attr; 1796 uint32_t num_swqe; 1797 size_t mem_size; 1798 int i; 1799 1800 num_swqe = state->id_rc_num_swqe - 1; 1801 1802 /* 1803 * Allocate one big chunk for all Tx large copy bufs 1804 */ 1805 /* Don't transfer IPOIB_GRH_SIZE bytes (40 bytes) */ 1806 mem_size = num_swqe * state->rc_mtu; 1807 state->rc_tx_mr_bufs = kmem_zalloc(mem_size, KM_SLEEP); 1808 1809 mem_attr.mr_len = mem_size; 1810 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_tx_mr_bufs; 1811 mem_attr.mr_as = NULL; 1812 mem_attr.mr_flags = IBT_MR_SLEEP; 1813 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1814 &state->rc_tx_mr_hdl, &state->rc_tx_mr_desc) != IBT_SUCCESS) { 1815 DPRINT(40, "ibd_rc_init_tx_largebuf_list: ibt_register_mr " 1816 "failed"); 1817 kmem_free(state->rc_tx_mr_bufs, mem_size); 1818 state->rc_tx_mr_bufs = NULL; 1819 return (DDI_FAILURE); 1820 } 1821 1822 state->rc_tx_largebuf_desc_base = kmem_zalloc(num_swqe * 1823 sizeof (ibd_rc_tx_largebuf_t), KM_SLEEP); 1824 1825 /* 1826 * Set up the buf chain 1827 */ 1828 memp = state->rc_tx_mr_bufs; 1829 mutex_enter(&state->rc_tx_large_bufs_lock); 1830 lbufp = state->rc_tx_largebuf_desc_base; 1831 for (i = 0; i < num_swqe; i++) { 1832 lbufp->lb_buf = memp; 1833 lbufp->lb_next = lbufp + 1; 1834 1835 tail = lbufp; 1836 1837 memp += state->rc_mtu; 1838 lbufp++; 1839 } 1840 tail->lb_next = NULL; 1841 1842 /* 1843 * Set up the buffer information in ibd state 1844 */ 1845 state->rc_tx_largebuf_free_head = state->rc_tx_largebuf_desc_base; 1846 state->rc_tx_largebuf_nfree = num_swqe; 1847 mutex_exit(&state->rc_tx_large_bufs_lock); 1848 return (DDI_SUCCESS); 1849 } 1850 1851 void 1852 ibd_rc_fini_tx_largebuf_list(ibd_state_t *state) 1853 { 1854 uint32_t num_swqe; 1855 1856 num_swqe = state->id_rc_num_swqe - 1; 1857 1858 if (ibt_deregister_mr(state->id_hca_hdl, 1859 state->rc_tx_mr_hdl) != IBT_SUCCESS) { 1860 DPRINT(40, "ibd_rc_fini_tx_largebuf_list: ibt_deregister_mr() " 1861 "failed"); 1862 } 1863 state->rc_tx_mr_hdl = NULL; 1864 1865 kmem_free(state->rc_tx_mr_bufs, num_swqe * state->rc_mtu); 1866 state->rc_tx_mr_bufs = NULL; 1867 1868 kmem_free(state->rc_tx_largebuf_desc_base, 1869 num_swqe * sizeof (ibd_rc_tx_largebuf_t)); 1870 state->rc_tx_largebuf_desc_base = NULL; 1871 } 1872 1873 static int 1874 ibd_rc_alloc_tx_copybufs(ibd_rc_chan_t *chan) 1875 { 1876 ibt_mr_attr_t mem_attr; 1877 ibd_state_t *state; 1878 1879 state = chan->state; 1880 ASSERT(state != NULL); 1881 1882 /* 1883 * Allocate one big chunk for all regular tx copy bufs 1884 */ 1885 mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh; 1886 1887 chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP); 1888 1889 /* 1890 * Do one memory registration on the entire txbuf area 1891 */ 1892 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->tx_mr_bufs; 1893 mem_attr.mr_as = NULL; 1894 mem_attr.mr_flags = IBT_MR_SLEEP; 1895 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1896 &chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) { 1897 DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed"); 1898 ASSERT(mem_attr.mr_len == 1899 chan->scq_size * state->id_rc_tx_copy_thresh); 1900 kmem_free(chan->tx_mr_bufs, mem_attr.mr_len); 1901 chan->tx_mr_bufs = NULL; 1902 return (DDI_FAILURE); 1903 } 1904 1905 return (DDI_SUCCESS); 1906 } 1907 1908 /* 1909 * Allocate the statically allocated Tx buffer list. 1910 */ 1911 static int 1912 ibd_rc_init_txlist(ibd_rc_chan_t *chan) 1913 { 1914 ibd_swqe_t *swqe; 1915 int i; 1916 ibt_lkey_t lkey; 1917 ibd_state_t *state = chan->state; 1918 1919 if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS) 1920 return (DDI_FAILURE); 1921 1922 /* 1923 * Allocate and setup the swqe list 1924 */ 1925 lkey = chan->tx_mr_desc.md_lkey; 1926 chan->tx_wqes = kmem_zalloc(chan->scq_size * 1927 sizeof (ibd_swqe_t), KM_SLEEP); 1928 swqe = chan->tx_wqes; 1929 for (i = 0; i < chan->scq_size; i++, swqe++) { 1930 swqe->swqe_next = NULL; 1931 swqe->swqe_im_mblk = NULL; 1932 1933 swqe->swqe_copybuf.ic_sgl.ds_key = lkey; 1934 swqe->swqe_copybuf.ic_sgl.ds_len = 0; /* set in send */ 1935 1936 swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe; 1937 swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL; 1938 swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t) 1939 (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh); 1940 swqe->w_swr.wr_trans = IBT_RC_SRV; 1941 1942 /* Add to list */ 1943 mutex_enter(&chan->tx_wqe_list.dl_mutex); 1944 chan->tx_wqe_list.dl_cnt++; 1945 swqe->swqe_next = chan->tx_wqe_list.dl_head; 1946 chan->tx_wqe_list.dl_head = SWQE_TO_WQE(swqe); 1947 mutex_exit(&chan->tx_wqe_list.dl_mutex); 1948 } 1949 1950 return (DDI_SUCCESS); 1951 } 1952 1953 /* 1954 * Free the statically allocated Tx buffer list. 1955 */ 1956 static void 1957 ibd_rc_fini_txlist(ibd_rc_chan_t *chan) 1958 { 1959 ibd_state_t *state = chan->state; 1960 if (chan->tx_mr_hdl != NULL) { 1961 if (ibt_deregister_mr(chan->state->id_hca_hdl, 1962 chan->tx_mr_hdl) != IBT_SUCCESS) { 1963 DPRINT(40, "ibd_rc_fini_txlist: ibt_deregister_mr " 1964 "failed"); 1965 } 1966 chan->tx_mr_hdl = NULL; 1967 } 1968 1969 if (chan->tx_mr_bufs != NULL) { 1970 kmem_free(chan->tx_mr_bufs, chan->scq_size * 1971 state->id_rc_tx_copy_thresh); 1972 chan->tx_mr_bufs = NULL; 1973 } 1974 1975 if (chan->tx_wqes != NULL) { 1976 kmem_free(chan->tx_wqes, chan->scq_size * 1977 sizeof (ibd_swqe_t)); 1978 chan->tx_wqes = NULL; 1979 } 1980 } 1981 1982 /* 1983 * Acquire send wqe from free list. 1984 * Returns error number and send wqe pointer. 1985 */ 1986 ibd_swqe_t * 1987 ibd_rc_acquire_swqes(ibd_rc_chan_t *chan) 1988 { 1989 ibd_swqe_t *wqe; 1990 1991 mutex_enter(&chan->tx_rel_list.dl_mutex); 1992 if (chan->tx_rel_list.dl_head != NULL) { 1993 /* transfer id_tx_rel_list to id_tx_list */ 1994 chan->tx_wqe_list.dl_head = 1995 chan->tx_rel_list.dl_head; 1996 chan->tx_wqe_list.dl_cnt = 1997 chan->tx_rel_list.dl_cnt; 1998 chan->tx_wqe_list.dl_pending_sends = B_FALSE; 1999 2000 /* clear id_tx_rel_list */ 2001 chan->tx_rel_list.dl_head = NULL; 2002 chan->tx_rel_list.dl_cnt = 0; 2003 mutex_exit(&chan->tx_rel_list.dl_mutex); 2004 2005 wqe = WQE_TO_SWQE(chan->tx_wqe_list.dl_head); 2006 chan->tx_wqe_list.dl_cnt -= 1; 2007 chan->tx_wqe_list.dl_head = wqe->swqe_next; 2008 } else { /* no free swqe */ 2009 mutex_exit(&chan->tx_rel_list.dl_mutex); 2010 chan->tx_wqe_list.dl_pending_sends = B_TRUE; 2011 wqe = NULL; 2012 } 2013 return (wqe); 2014 } 2015 2016 /* 2017 * Release send wqe back into free list. 2018 */ 2019 static void 2020 ibd_rc_release_swqe(ibd_rc_chan_t *chan, ibd_swqe_t *swqe) 2021 { 2022 /* 2023 * Add back on Tx list for reuse. 2024 */ 2025 swqe->swqe_next = NULL; 2026 mutex_enter(&chan->tx_rel_list.dl_mutex); 2027 chan->tx_rel_list.dl_pending_sends = B_FALSE; 2028 swqe->swqe_next = chan->tx_rel_list.dl_head; 2029 chan->tx_rel_list.dl_head = SWQE_TO_WQE(swqe); 2030 chan->tx_rel_list.dl_cnt++; 2031 mutex_exit(&chan->tx_rel_list.dl_mutex); 2032 } 2033 2034 void 2035 ibd_rc_post_send(ibd_rc_chan_t *chan, ibd_swqe_t *node) 2036 { 2037 uint_t i; 2038 uint_t num_posted; 2039 uint_t n_wrs; 2040 ibt_status_t ibt_status; 2041 ibt_send_wr_t wrs[IBD_MAX_TX_POST_MULTIPLE]; 2042 ibd_swqe_t *tx_head, *elem; 2043 ibd_swqe_t *nodes[IBD_MAX_TX_POST_MULTIPLE]; 2044 2045 /* post the one request, then check for more */ 2046 ibt_status = ibt_post_send(chan->chan_hdl, 2047 &node->w_swr, 1, NULL); 2048 if (ibt_status != IBT_SUCCESS) { 2049 ibd_print_warn(chan->state, "ibd_post_send: " 2050 "posting one wr failed: ret=%d", ibt_status); 2051 ibd_rc_tx_cleanup(node); 2052 } 2053 2054 tx_head = NULL; 2055 for (;;) { 2056 if (tx_head == NULL) { 2057 mutex_enter(&chan->tx_post_lock); 2058 tx_head = chan->tx_head; 2059 if (tx_head == NULL) { 2060 chan->tx_busy = 0; 2061 mutex_exit(&chan->tx_post_lock); 2062 return; 2063 } 2064 chan->tx_head = NULL; 2065 mutex_exit(&chan->tx_post_lock); 2066 } 2067 2068 /* 2069 * Collect pending requests, IBD_MAX_TX_POST_MULTIPLE wrs 2070 * at a time if possible, and keep posting them. 2071 */ 2072 for (n_wrs = 0, elem = tx_head; 2073 (elem) && (n_wrs < IBD_MAX_TX_POST_MULTIPLE); 2074 elem = WQE_TO_SWQE(elem->swqe_next), n_wrs++) { 2075 nodes[n_wrs] = elem; 2076 wrs[n_wrs] = elem->w_swr; 2077 } 2078 tx_head = elem; 2079 2080 ASSERT(n_wrs != 0); 2081 2082 /* 2083 * If posting fails for some reason, we'll never receive 2084 * completion intimation, so we'll need to cleanup. But 2085 * we need to make sure we don't clean up nodes whose 2086 * wrs have been successfully posted. We assume that the 2087 * hca driver returns on the first failure to post and 2088 * therefore the first 'num_posted' entries don't need 2089 * cleanup here. 2090 */ 2091 num_posted = 0; 2092 ibt_status = ibt_post_send(chan->chan_hdl, 2093 wrs, n_wrs, &num_posted); 2094 if (ibt_status != IBT_SUCCESS) { 2095 ibd_print_warn(chan->state, "ibd_post_send: " 2096 "posting multiple wrs failed: " 2097 "requested=%d, done=%d, ret=%d", 2098 n_wrs, num_posted, ibt_status); 2099 2100 for (i = num_posted; i < n_wrs; i++) 2101 ibd_rc_tx_cleanup(nodes[i]); 2102 } 2103 } 2104 } 2105 2106 /* 2107 * Common code that deals with clean ups after a successful or 2108 * erroneous transmission attempt. 2109 */ 2110 void 2111 ibd_rc_tx_cleanup(ibd_swqe_t *swqe) 2112 { 2113 ibd_ace_t *ace = swqe->w_ahandle; 2114 ibd_state_t *state; 2115 2116 ASSERT(ace != NULL); 2117 ASSERT(ace->ac_chan != NULL); 2118 2119 state = ace->ac_chan->state; 2120 2121 /* 2122 * If this was a dynamic registration in ibd_send(), 2123 * deregister now. 2124 */ 2125 if (swqe->swqe_im_mblk != NULL) { 2126 ASSERT(swqe->w_buftype == IBD_WQE_MAPPED); 2127 if (swqe->w_buftype == IBD_WQE_MAPPED) { 2128 ibd_unmap_mem(state, swqe); 2129 } 2130 freemsg(swqe->swqe_im_mblk); 2131 swqe->swqe_im_mblk = NULL; 2132 } else { 2133 ASSERT(swqe->w_buftype != IBD_WQE_MAPPED); 2134 } 2135 2136 if (swqe->w_buftype == IBD_WQE_RC_COPYBUF) { 2137 ibd_rc_tx_largebuf_t *lbufp; 2138 2139 lbufp = swqe->w_rc_tx_largebuf; 2140 ASSERT(lbufp != NULL); 2141 2142 mutex_enter(&state->rc_tx_large_bufs_lock); 2143 lbufp->lb_next = state->rc_tx_largebuf_free_head; 2144 state->rc_tx_largebuf_free_head = lbufp; 2145 state->rc_tx_largebuf_nfree ++; 2146 mutex_exit(&state->rc_tx_large_bufs_lock); 2147 swqe->w_rc_tx_largebuf = NULL; 2148 } 2149 2150 2151 /* 2152 * Release the send wqe for reuse. 2153 */ 2154 ibd_rc_release_swqe(ace->ac_chan, swqe); 2155 2156 /* 2157 * Drop the reference count on the AH; it can be reused 2158 * now for a different destination if there are no more 2159 * posted sends that will use it. This can be eliminated 2160 * if we can always associate each Tx buffer with an AH. 2161 * The ace can be null if we are cleaning up from the 2162 * ibd_send() error path. 2163 */ 2164 ibd_dec_ref_ace(state, ace); 2165 } 2166 2167 void 2168 ibd_rc_drain_scq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl) 2169 { 2170 ibd_state_t *state = chan->state; 2171 ibd_wqe_t *wqe; 2172 ibt_wc_t *wc, *wcs; 2173 ibd_ace_t *ace; 2174 uint_t numwcs, real_numwcs; 2175 int i; 2176 boolean_t encount_error; 2177 2178 wcs = chan->tx_wc; 2179 numwcs = IBD_RC_MAX_CQ_WC; 2180 encount_error = B_FALSE; 2181 2182 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) { 2183 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) { 2184 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id; 2185 if (wc->wc_status != IBT_WC_SUCCESS) { 2186 if (encount_error == B_FALSE) { 2187 /* 2188 * This RC channle is in error status, 2189 * remove it. 2190 */ 2191 encount_error = B_TRUE; 2192 mutex_enter(&state->id_ac_mutex); 2193 if ((chan->chan_state == 2194 IBD_RC_STATE_ACT_ESTAB) && 2195 (chan->state->id_link_state == 2196 LINK_STATE_UP) && 2197 ((ace = ibd_acache_find(state, 2198 &chan->ace->ac_mac, B_FALSE, 0)) 2199 != NULL) && (ace == chan->ace)) { 2200 ASSERT(ace->ac_mce == NULL); 2201 INC_REF(ace, 1); 2202 IBD_ACACHE_PULLOUT_ACTIVE( 2203 state, ace); 2204 chan->chan_state = 2205 IBD_RC_STATE_ACT_CLOSING; 2206 mutex_exit(&state->id_ac_mutex); 2207 state->rc_reset_cnt++; 2208 DPRINT(30, "ibd_rc_drain_scq: " 2209 "wc_status(%d) != SUCC, " 2210 "chan=%p, ace=%p, " 2211 "link_state=%d" 2212 "reset RC channel", 2213 wc->wc_status, chan, 2214 chan->ace, chan->state-> 2215 id_link_state); 2216 ibd_rc_signal_act_close( 2217 state, ace); 2218 } else { 2219 mutex_exit(&state->id_ac_mutex); 2220 state-> 2221 rc_act_close_simultaneous++; 2222 DPRINT(40, "ibd_rc_drain_scq: " 2223 "wc_status(%d) != SUCC, " 2224 "chan=%p, chan_state=%d," 2225 "ace=%p, link_state=%d." 2226 "other thread is closing " 2227 "it", wc->wc_status, chan, 2228 chan->chan_state, chan->ace, 2229 chan->state->id_link_state); 2230 } 2231 } 2232 } 2233 ibd_rc_tx_cleanup(WQE_TO_SWQE(wqe)); 2234 } 2235 2236 mutex_enter(&state->id_sched_lock); 2237 if (state->id_sched_needed == 0) { 2238 mutex_exit(&state->id_sched_lock); 2239 } else if (state->id_sched_needed & IBD_RSRC_RC_SWQE) { 2240 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2241 mutex_enter(&chan->tx_rel_list.dl_mutex); 2242 if ((chan->tx_rel_list.dl_cnt + 2243 chan->tx_wqe_list.dl_cnt) > IBD_RC_TX_FREE_THRESH) { 2244 state->id_sched_needed &= ~IBD_RSRC_RC_SWQE; 2245 mutex_exit(&chan->tx_rel_list.dl_mutex); 2246 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2247 mutex_exit(&state->id_sched_lock); 2248 state->rc_swqe_mac_update++; 2249 mac_tx_update(state->id_mh); 2250 } else { 2251 state->rc_scq_no_swqe++; 2252 mutex_exit(&chan->tx_rel_list.dl_mutex); 2253 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2254 mutex_exit(&state->id_sched_lock); 2255 } 2256 } else if (state->id_sched_needed & IBD_RSRC_RC_TX_LARGEBUF) { 2257 mutex_enter(&state->rc_tx_large_bufs_lock); 2258 if (state->rc_tx_largebuf_nfree > 2259 IBD_RC_TX_FREE_THRESH) { 2260 ASSERT(state->rc_tx_largebuf_free_head != NULL); 2261 state->id_sched_needed &= 2262 ~IBD_RSRC_RC_TX_LARGEBUF; 2263 mutex_exit(&state->rc_tx_large_bufs_lock); 2264 mutex_exit(&state->id_sched_lock); 2265 state->rc_xmt_buf_mac_update++; 2266 mac_tx_update(state->id_mh); 2267 } else { 2268 state->rc_scq_no_largebuf++; 2269 mutex_exit(&state->rc_tx_large_bufs_lock); 2270 mutex_exit(&state->id_sched_lock); 2271 } 2272 } else if (state->id_sched_needed & IBD_RSRC_SWQE) { 2273 mutex_enter(&state->id_tx_list.dl_mutex); 2274 mutex_enter(&state->id_tx_rel_list.dl_mutex); 2275 if ((state->id_tx_list.dl_cnt + 2276 state->id_tx_rel_list.dl_cnt) 2277 > IBD_FREE_SWQES_THRESH) { 2278 state->id_sched_needed &= ~IBD_RSRC_SWQE; 2279 state->id_sched_cnt++; 2280 mutex_exit(&state->id_tx_rel_list.dl_mutex); 2281 mutex_exit(&state->id_tx_list.dl_mutex); 2282 mutex_exit(&state->id_sched_lock); 2283 mac_tx_update(state->id_mh); 2284 } else { 2285 mutex_exit(&state->id_tx_rel_list.dl_mutex); 2286 mutex_exit(&state->id_tx_list.dl_mutex); 2287 mutex_exit(&state->id_sched_lock); 2288 } 2289 } else { 2290 mutex_exit(&state->id_sched_lock); 2291 } 2292 } 2293 } 2294 2295 /* Send CQ handler, call ibd_rx_tx_cleanup to recycle Tx buffers */ 2296 /* ARGSUSED */ 2297 static void 2298 ibd_rc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 2299 { 2300 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 2301 2302 if (ibd_rc_tx_softintr == 1) { 2303 mutex_enter(&chan->tx_poll_lock); 2304 if (chan->tx_poll_busy & IBD_CQ_POLLING) { 2305 chan->tx_poll_busy |= IBD_REDO_CQ_POLLING; 2306 mutex_exit(&chan->tx_poll_lock); 2307 return; 2308 } else { 2309 mutex_exit(&chan->tx_poll_lock); 2310 ddi_trigger_softintr(chan->scq_softintr); 2311 } 2312 } else 2313 (void) ibd_rc_tx_recycle(arg); 2314 } 2315 2316 static uint_t 2317 ibd_rc_tx_recycle(caddr_t arg) 2318 { 2319 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 2320 ibd_state_t *state = chan->state; 2321 int flag, redo_flag; 2322 int redo = 1; 2323 2324 flag = IBD_CQ_POLLING; 2325 redo_flag = IBD_REDO_CQ_POLLING; 2326 2327 mutex_enter(&chan->tx_poll_lock); 2328 if (chan->tx_poll_busy & flag) { 2329 ibd_print_warn(state, "ibd_rc_tx_recycle: multiple polling " 2330 "threads"); 2331 chan->tx_poll_busy |= redo_flag; 2332 mutex_exit(&chan->tx_poll_lock); 2333 return (DDI_INTR_CLAIMED); 2334 } 2335 chan->tx_poll_busy |= flag; 2336 mutex_exit(&chan->tx_poll_lock); 2337 2338 /* 2339 * Poll for completed entries; the CQ will not interrupt any 2340 * more for completed packets. 2341 */ 2342 ibd_rc_drain_scq(chan, chan->scq_hdl); 2343 2344 /* 2345 * Now enable CQ notifications; all completions originating now 2346 * will cause new interrupts. 2347 */ 2348 do { 2349 if (ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION) != 2350 IBT_SUCCESS) { 2351 /* 2352 * We do not expect a failure here. 2353 */ 2354 DPRINT(40, "ibd_rc_scq_handler: ibt_enable_cq_notify()" 2355 " failed"); 2356 } 2357 2358 ibd_rc_drain_scq(chan, chan->scq_hdl); 2359 2360 mutex_enter(&chan->tx_poll_lock); 2361 if (chan->tx_poll_busy & redo_flag) 2362 chan->tx_poll_busy &= ~redo_flag; 2363 else { 2364 chan->tx_poll_busy &= ~flag; 2365 redo = 0; 2366 } 2367 mutex_exit(&chan->tx_poll_lock); 2368 2369 } while (redo); 2370 2371 return (DDI_INTR_CLAIMED); 2372 } 2373 2374 static ibt_status_t 2375 ibd_register_service(ibt_srv_desc_t *srv, ib_svc_id_t sid, 2376 int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid) 2377 { 2378 ibd_service_t *p; 2379 ibt_status_t status; 2380 2381 mutex_enter(&ibd_gstate.ig_mutex); 2382 for (p = ibd_gstate.ig_service_list; p != NULL; p = p->is_link) { 2383 if (p->is_sid == sid) { 2384 p->is_ref_cnt++; 2385 *srv_hdl = p->is_srv_hdl; 2386 *ret_sid = sid; 2387 mutex_exit(&ibd_gstate.ig_mutex); 2388 return (IBT_SUCCESS); 2389 } 2390 } 2391 status = ibt_register_service(ibd_gstate.ig_ibt_hdl, srv, sid, 2392 num_sids, srv_hdl, ret_sid); 2393 if (status == IBT_SUCCESS) { 2394 p = kmem_alloc(sizeof (*p), KM_SLEEP); 2395 p->is_srv_hdl = *srv_hdl; 2396 p->is_sid = sid; 2397 p->is_ref_cnt = 1; 2398 p->is_link = ibd_gstate.ig_service_list; 2399 ibd_gstate.ig_service_list = p; 2400 } 2401 mutex_exit(&ibd_gstate.ig_mutex); 2402 return (status); 2403 } 2404 2405 static ibt_status_t 2406 ibd_deregister_service(ibt_srv_hdl_t srv_hdl) 2407 { 2408 ibd_service_t *p, **pp; 2409 ibt_status_t status; 2410 2411 mutex_enter(&ibd_gstate.ig_mutex); 2412 for (pp = &ibd_gstate.ig_service_list; *pp != NULL; 2413 pp = &((*pp)->is_link)) { 2414 p = *pp; 2415 if (p->is_srv_hdl == srv_hdl) { /* Found it */ 2416 if (--p->is_ref_cnt == 0) { 2417 status = ibt_deregister_service( 2418 ibd_gstate.ig_ibt_hdl, srv_hdl); 2419 *pp = p->is_link; /* link prev to next */ 2420 kmem_free(p, sizeof (*p)); 2421 } else { 2422 status = IBT_SUCCESS; 2423 } 2424 mutex_exit(&ibd_gstate.ig_mutex); 2425 return (status); 2426 } 2427 } 2428 /* Should not ever get here */ 2429 mutex_exit(&ibd_gstate.ig_mutex); 2430 return (IBT_FAILURE); 2431 } 2432 2433 /* Listen with corresponding service ID */ 2434 ibt_status_t 2435 ibd_rc_listen(ibd_state_t *state) 2436 { 2437 ibt_srv_desc_t srvdesc; 2438 ib_svc_id_t ret_sid; 2439 ibt_status_t status; 2440 ib_gid_t gid; 2441 2442 if (state->rc_listen_hdl != NULL) { 2443 DPRINT(40, "ibd_rc_listen: rc_listen_hdl should be NULL"); 2444 return (IBT_FAILURE); 2445 } 2446 2447 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 2448 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad; 2449 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 2450 2451 /* 2452 * Register the service with service id 2453 * Incoming connection requests should arrive on this service id. 2454 */ 2455 status = ibd_register_service(&srvdesc, 2456 IBD_RC_QPN_TO_SID(state->id_qpnum), 2457 1, &state->rc_listen_hdl, &ret_sid); 2458 if (status != IBT_SUCCESS) { 2459 DPRINT(40, "ibd_rc_listen: Service Registration Failed, " 2460 "ret=%d", status); 2461 return (status); 2462 } 2463 2464 gid = state->id_sgid; 2465 2466 /* pass state as cm_private */ 2467 status = ibt_bind_service(state->rc_listen_hdl, 2468 gid, NULL, state, &state->rc_listen_bind); 2469 if (status != IBT_SUCCESS) { 2470 DPRINT(40, "ibd_rc_listen:" 2471 " fail to bind port: <%d>", status); 2472 (void) ibd_deregister_service(state->rc_listen_hdl); 2473 return (status); 2474 } 2475 2476 /* 2477 * Legacy OFED had used a wrong service ID (one additional zero digit) 2478 * for many years. To interop with legacy OFED, we support this wrong 2479 * service ID here. 2480 */ 2481 ASSERT(state->rc_listen_hdl_OFED_interop == NULL); 2482 2483 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 2484 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad; 2485 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 2486 2487 /* 2488 * Register the service with service id 2489 * Incoming connection requests should arrive on this service id. 2490 */ 2491 status = ibd_register_service(&srvdesc, 2492 IBD_RC_QPN_TO_SID_OFED_INTEROP(state->id_qpnum), 2493 1, &state->rc_listen_hdl_OFED_interop, &ret_sid); 2494 if (status != IBT_SUCCESS) { 2495 DPRINT(40, 2496 "ibd_rc_listen: Service Registration for Legacy OFED " 2497 "Failed %d", status); 2498 (void) ibt_unbind_service(state->rc_listen_hdl, 2499 state->rc_listen_bind); 2500 (void) ibd_deregister_service(state->rc_listen_hdl); 2501 return (status); 2502 } 2503 2504 gid = state->id_sgid; 2505 2506 /* pass state as cm_private */ 2507 status = ibt_bind_service(state->rc_listen_hdl_OFED_interop, 2508 gid, NULL, state, &state->rc_listen_bind_OFED_interop); 2509 if (status != IBT_SUCCESS) { 2510 DPRINT(40, "ibd_rc_listen: fail to bind port: <%d> for " 2511 "Legacy OFED listener", status); 2512 (void) ibd_deregister_service( 2513 state->rc_listen_hdl_OFED_interop); 2514 (void) ibt_unbind_service(state->rc_listen_hdl, 2515 state->rc_listen_bind); 2516 (void) ibd_deregister_service(state->rc_listen_hdl); 2517 return (status); 2518 } 2519 2520 return (IBT_SUCCESS); 2521 } 2522 2523 void 2524 ibd_rc_stop_listen(ibd_state_t *state) 2525 { 2526 int ret; 2527 2528 /* Disable incoming connection requests */ 2529 if (state->rc_listen_hdl != NULL) { 2530 ret = ibt_unbind_all_services(state->rc_listen_hdl); 2531 if (ret != 0) { 2532 DPRINT(40, "ibd_rc_stop_listen:" 2533 "ibt_unbind_all_services() failed, ret=%d", ret); 2534 } 2535 ret = ibd_deregister_service(state->rc_listen_hdl); 2536 if (ret != 0) { 2537 DPRINT(40, "ibd_rc_stop_listen:" 2538 "ibd_deregister_service() failed, ret=%d", ret); 2539 } else { 2540 state->rc_listen_hdl = NULL; 2541 } 2542 } 2543 2544 /* Disable incoming connection requests */ 2545 if (state->rc_listen_hdl_OFED_interop != NULL) { 2546 ret = ibt_unbind_all_services( 2547 state->rc_listen_hdl_OFED_interop); 2548 if (ret != 0) { 2549 DPRINT(40, "ibd_rc_stop_listen:" 2550 "ibt_unbind_all_services() failed: %d", ret); 2551 } 2552 ret = ibd_deregister_service(state->rc_listen_hdl_OFED_interop); 2553 if (ret != 0) { 2554 DPRINT(40, "ibd_rc_stop_listen:" 2555 "ibd_deregister_service() failed: %d", ret); 2556 } else { 2557 state->rc_listen_hdl_OFED_interop = NULL; 2558 } 2559 } 2560 } 2561 2562 void 2563 ibd_rc_close_all_chan(ibd_state_t *state) 2564 { 2565 ibd_rc_chan_t *rc_chan; 2566 ibd_ace_t *ace, *pre_ace; 2567 uint_t attempts; 2568 2569 /* Disable all Rx routines */ 2570 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 2571 rc_chan = state->rc_pass_chan_list.chan_list; 2572 while (rc_chan != NULL) { 2573 ibt_set_cq_handler(rc_chan->rcq_hdl, 0, 0); 2574 rc_chan = rc_chan->next; 2575 } 2576 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 2577 2578 if (state->rc_enable_srq) { 2579 attempts = 10; 2580 while (state->rc_srq_rwqe_list.dl_bufs_outstanding > 0) { 2581 DPRINT(30, "ibd_rc_close_all_chan: outstanding > 0"); 2582 delay(drv_usectohz(100000)); 2583 if (--attempts == 0) { 2584 /* 2585 * There are pending bufs with the network 2586 * layer and we have no choice but to wait 2587 * for them to be done with. Reap all the 2588 * Tx/Rx completions that were posted since 2589 * we turned off the notification and 2590 * return failure. 2591 */ 2592 break; 2593 } 2594 } 2595 } 2596 2597 /* Close all passive RC channels */ 2598 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list); 2599 while (rc_chan != NULL) { 2600 (void) ibd_rc_pas_close(rc_chan, B_TRUE, B_FALSE); 2601 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list); 2602 } 2603 2604 /* Close all active RC channels */ 2605 mutex_enter(&state->id_ac_mutex); 2606 state->id_ac_hot_ace = NULL; 2607 ace = list_head(&state->id_ah_active); 2608 while ((pre_ace = ace) != NULL) { 2609 ace = list_next(&state->id_ah_active, ace); 2610 if (pre_ace->ac_chan != NULL) { 2611 INC_REF(pre_ace, 1); 2612 IBD_ACACHE_PULLOUT_ACTIVE(state, pre_ace); 2613 pre_ace->ac_chan->chan_state = IBD_RC_STATE_ACT_CLOSING; 2614 ibd_rc_add_to_chan_list(&state->rc_obs_act_chan_list, 2615 pre_ace->ac_chan); 2616 } 2617 } 2618 mutex_exit(&state->id_ac_mutex); 2619 2620 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_obs_act_chan_list); 2621 while (rc_chan != NULL) { 2622 ace = rc_chan->ace; 2623 ibd_rc_act_close(rc_chan, B_TRUE); 2624 if (ace != NULL) { 2625 mutex_enter(&state->id_ac_mutex); 2626 ASSERT(ace->ac_ref != 0); 2627 atomic_dec_32(&ace->ac_ref); 2628 ace->ac_chan = NULL; 2629 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 2630 IBD_ACACHE_INSERT_FREE(state, ace); 2631 ace->ac_ref = 0; 2632 } else { 2633 ace->ac_ref |= CYCLEVAL; 2634 state->rc_delay_ace_recycle++; 2635 } 2636 mutex_exit(&state->id_ac_mutex); 2637 } 2638 rc_chan = ibd_rc_rm_header_chan_list( 2639 &state->rc_obs_act_chan_list); 2640 } 2641 2642 attempts = 400; 2643 while (((state->rc_num_tx_chan != 0) || 2644 (state->rc_num_rx_chan != 0)) && (attempts > 0)) { 2645 /* Other thread is closing CM channel, wait it */ 2646 delay(drv_usectohz(100000)); 2647 attempts--; 2648 } 2649 } 2650 2651 void 2652 ibd_rc_try_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path) 2653 { 2654 ibt_status_t status; 2655 2656 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2657 return; 2658 2659 status = ibd_rc_connect(state, ace, path, 2660 IBD_RC_SERVICE_ID_OFED_INTEROP); 2661 2662 if (status != IBT_SUCCESS) { 2663 /* wait peer side remove stale channel */ 2664 delay(drv_usectohz(10000)); 2665 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2666 return; 2667 status = ibd_rc_connect(state, ace, path, 2668 IBD_RC_SERVICE_ID_OFED_INTEROP); 2669 } 2670 2671 if (status != IBT_SUCCESS) { 2672 /* wait peer side remove stale channel */ 2673 delay(drv_usectohz(10000)); 2674 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2675 return; 2676 (void) ibd_rc_connect(state, ace, path, 2677 IBD_RC_SERVICE_ID); 2678 } 2679 } 2680 2681 /* 2682 * Allocates channel and sets the ace->ac_chan to it. 2683 * Opens the channel. 2684 */ 2685 ibt_status_t 2686 ibd_rc_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path, 2687 uint64_t ietf_cm_service_id) 2688 { 2689 ibt_status_t status = 0; 2690 ibt_rc_returns_t open_returns; 2691 ibt_chan_open_args_t open_args; 2692 ibd_rc_msg_hello_t hello_req_msg; 2693 ibd_rc_msg_hello_t *hello_ack_msg; 2694 ibd_rc_chan_t *chan; 2695 ibt_ud_dest_query_attr_t dest_attrs; 2696 2697 ASSERT(ace != NULL); 2698 ASSERT(ace->ac_mce == NULL); 2699 ASSERT(ace->ac_chan == NULL); 2700 2701 if ((status = ibd_rc_alloc_chan(&chan, state, B_TRUE)) != IBT_SUCCESS) { 2702 DPRINT(10, "ibd_rc_connect: ibd_rc_alloc_chan() failed"); 2703 return (status); 2704 } 2705 2706 ace->ac_chan = chan; 2707 chan->state = state; 2708 chan->ace = ace; 2709 2710 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)ace); 2711 2712 hello_ack_msg = kmem_zalloc(sizeof (ibd_rc_msg_hello_t), KM_SLEEP); 2713 2714 /* 2715 * open the channels 2716 */ 2717 bzero(&open_args, sizeof (ibt_chan_open_args_t)); 2718 bzero(&open_returns, sizeof (ibt_rc_returns_t)); 2719 2720 open_args.oc_cm_handler = ibd_rc_dispatch_actv_mad; 2721 open_args.oc_cm_clnt_private = (void *)(uintptr_t)ace; 2722 2723 /* 2724 * update path record with the SID 2725 */ 2726 if ((status = ibt_query_ud_dest(ace->ac_dest, &dest_attrs)) 2727 != IBT_SUCCESS) { 2728 DPRINT(40, "ibd_rc_connect: ibt_query_ud_dest() failed, " 2729 "ret=%d", status); 2730 return (status); 2731 } 2732 2733 path->pi_sid = 2734 ietf_cm_service_id | ((dest_attrs.ud_dst_qpn) & 0xffffff); 2735 2736 2737 /* pre-allocate memory for hello ack message */ 2738 open_returns.rc_priv_data_len = sizeof (ibd_rc_msg_hello_t); 2739 open_returns.rc_priv_data = hello_ack_msg; 2740 2741 open_args.oc_path = path; 2742 2743 open_args.oc_path_rnr_retry_cnt = 1; 2744 open_args.oc_path_retry_cnt = 1; 2745 2746 /* We don't do RDMA */ 2747 open_args.oc_rdma_ra_out = 0; 2748 open_args.oc_rdma_ra_in = 0; 2749 2750 hello_req_msg.reserved_qpn = htonl(state->id_qpnum); 2751 hello_req_msg.rx_mtu = htonl(state->rc_mtu); 2752 open_args.oc_priv_data_len = sizeof (ibd_rc_msg_hello_t); 2753 open_args.oc_priv_data = (void *)(&hello_req_msg); 2754 2755 ASSERT(open_args.oc_priv_data_len <= IBT_REQ_PRIV_DATA_SZ); 2756 ASSERT(open_returns.rc_priv_data_len <= IBT_REP_PRIV_DATA_SZ); 2757 ASSERT(open_args.oc_cm_handler != NULL); 2758 2759 status = ibt_open_rc_channel(chan->chan_hdl, IBT_OCHAN_NO_FLAGS, 2760 IBT_BLOCKING, &open_args, &open_returns); 2761 2762 if (status == IBT_SUCCESS) { 2763 /* Success! */ 2764 DPRINT(2, "ibd_rc_connect: call ibt_open_rc_channel succ!"); 2765 state->rc_conn_succ++; 2766 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t)); 2767 return (IBT_SUCCESS); 2768 } 2769 2770 /* failure */ 2771 (void) ibt_flush_channel(chan->chan_hdl); 2772 ibd_rc_free_chan(chan); 2773 ace->ac_chan = NULL; 2774 2775 /* check open_returns report error and exit */ 2776 DPRINT(30, "ibd_rc_connect: call ibt_open_rc_chan fail." 2777 "ret status = %d, reason=%d, ace=%p, mtu=0x%x, qpn=0x%x," 2778 " peer qpn=0x%x", status, (int)open_returns.rc_status, ace, 2779 hello_req_msg.rx_mtu, hello_req_msg.reserved_qpn, 2780 dest_attrs.ud_dst_qpn); 2781 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t)); 2782 return (status); 2783 } 2784 2785 void 2786 ibd_rc_signal_act_close(ibd_state_t *state, ibd_ace_t *ace) 2787 { 2788 ibd_req_t *req; 2789 2790 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 2791 if (req == NULL) { 2792 ibd_print_warn(state, "ibd_rc_signal_act_close: alloc " 2793 "ibd_req_t fail"); 2794 mutex_enter(&state->rc_obs_act_chan_list.chan_list_mutex); 2795 ace->ac_chan->next = state->rc_obs_act_chan_list.chan_list; 2796 state->rc_obs_act_chan_list.chan_list = ace->ac_chan; 2797 mutex_exit(&state->rc_obs_act_chan_list.chan_list_mutex); 2798 } else { 2799 req->rq_ptr = ace->ac_chan; 2800 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_CLOSE_ACT_CHAN); 2801 } 2802 } 2803 2804 void 2805 ibd_rc_signal_ace_recycle(ibd_state_t *state, ibd_ace_t *ace) 2806 { 2807 ibd_req_t *req; 2808 2809 mutex_enter(&state->rc_ace_recycle_lock); 2810 if (state->rc_ace_recycle != NULL) { 2811 mutex_exit(&state->rc_ace_recycle_lock); 2812 return; 2813 } 2814 2815 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 2816 if (req == NULL) { 2817 mutex_exit(&state->rc_ace_recycle_lock); 2818 return; 2819 } 2820 2821 state->rc_ace_recycle = ace; 2822 mutex_exit(&state->rc_ace_recycle_lock); 2823 ASSERT(ace->ac_mce == NULL); 2824 INC_REF(ace, 1); 2825 IBD_ACACHE_PULLOUT_ACTIVE(state, ace); 2826 req->rq_ptr = ace; 2827 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_RECYCLE_ACE); 2828 } 2829 2830 /* 2831 * Close an active channel 2832 * 2833 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel() 2834 */ 2835 static void 2836 ibd_rc_act_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan) 2837 { 2838 ibd_state_t *state; 2839 ibd_ace_t *ace; 2840 uint_t times; 2841 ibt_status_t ret; 2842 2843 ASSERT(chan != NULL); 2844 2845 chan->state->rc_act_close++; 2846 switch (chan->chan_state) { 2847 case IBD_RC_STATE_ACT_CLOSING: /* stale, close it */ 2848 case IBD_RC_STATE_ACT_ESTAB: 2849 DPRINT(30, "ibd_rc_act_close-1: close and free chan, " 2850 "act_state=%d, chan=%p", chan->chan_state, chan); 2851 chan->chan_state = IBD_RC_STATE_ACT_CLOSED; 2852 ibt_set_cq_handler(chan->rcq_hdl, 0, 0); 2853 /* 2854 * Wait send queue empty. Its old value is 50 (5 seconds). But 2855 * in my experiment, 5 seconds is not enough time to let IBTL 2856 * return all buffers and ace->ac_ref. I tried 25 seconds, it 2857 * works well. As another evidence, I saw IBTL takes about 17 2858 * seconds every time it cleans a stale RC channel. 2859 */ 2860 times = 250; 2861 ace = chan->ace; 2862 ASSERT(ace != NULL); 2863 state = chan->state; 2864 ASSERT(state != NULL); 2865 mutex_enter(&state->id_ac_mutex); 2866 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2867 mutex_enter(&chan->tx_rel_list.dl_mutex); 2868 while (((chan->tx_wqe_list.dl_cnt + chan->tx_rel_list.dl_cnt) 2869 != chan->scq_size) || ((ace->ac_ref != 1) && 2870 (ace->ac_ref != (CYCLEVAL+1)))) { 2871 mutex_exit(&chan->tx_rel_list.dl_mutex); 2872 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2873 mutex_exit(&state->id_ac_mutex); 2874 times--; 2875 if (times == 0) { 2876 state->rc_act_close_not_clean++; 2877 DPRINT(40, "ibd_rc_act_close: dl_cnt(tx_wqe_" 2878 "list=%d, tx_rel_list=%d) != chan->" 2879 "scq_size=%d, OR ac_ref(=%d) not clean", 2880 chan->tx_wqe_list.dl_cnt, 2881 chan->tx_rel_list.dl_cnt, 2882 chan->scq_size, ace->ac_ref); 2883 break; 2884 } 2885 mutex_enter(&chan->tx_poll_lock); 2886 if (chan->tx_poll_busy & IBD_CQ_POLLING) { 2887 DPRINT(40, "ibd_rc_act_close: multiple " 2888 "polling threads"); 2889 mutex_exit(&chan->tx_poll_lock); 2890 } else { 2891 chan->tx_poll_busy = IBD_CQ_POLLING; 2892 mutex_exit(&chan->tx_poll_lock); 2893 ibd_rc_drain_scq(chan, chan->scq_hdl); 2894 mutex_enter(&chan->tx_poll_lock); 2895 chan->tx_poll_busy = 0; 2896 mutex_exit(&chan->tx_poll_lock); 2897 } 2898 delay(drv_usectohz(100000)); 2899 mutex_enter(&state->id_ac_mutex); 2900 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2901 mutex_enter(&chan->tx_rel_list.dl_mutex); 2902 } 2903 if (times != 0) { 2904 mutex_exit(&chan->tx_rel_list.dl_mutex); 2905 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2906 mutex_exit(&state->id_ac_mutex); 2907 } 2908 2909 ibt_set_cq_handler(chan->scq_hdl, 0, 0); 2910 if (is_close_rc_chan) { 2911 ret = ibt_close_rc_channel(chan->chan_hdl, 2912 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 2913 0); 2914 if (ret != IBT_SUCCESS) { 2915 DPRINT(40, "ibd_rc_act_close: ibt_close_rc_" 2916 "channel fail, chan=%p, ret=%d", 2917 chan, ret); 2918 } else { 2919 DPRINT(30, "ibd_rc_act_close: ibt_close_rc_" 2920 "channel succ, chan=%p", chan); 2921 } 2922 } 2923 2924 ibd_rc_free_chan(chan); 2925 break; 2926 case IBD_RC_STATE_ACT_REP_RECV: 2927 chan->chan_state = IBD_RC_STATE_ACT_CLOSED; 2928 (void) ibt_flush_channel(chan->chan_hdl); 2929 ibd_rc_free_chan(chan); 2930 break; 2931 case IBD_RC_STATE_ACT_ERROR: 2932 DPRINT(40, "ibd_rc_act_close: IBD_RC_STATE_ERROR branch"); 2933 break; 2934 default: 2935 DPRINT(40, "ibd_rc_act_close: default branch, act_state=%d, " 2936 "chan=%p", chan->chan_state, chan); 2937 } 2938 } 2939 2940 /* 2941 * Close a passive channel 2942 * 2943 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel() 2944 * 2945 * is_timeout_close: if B_TRUE, this function is called by the connection 2946 * reaper (refer to function ibd_rc_conn_timeout_call). When the connection 2947 * reaper calls ibd_rc_pas_close(), and if it finds that dl_bufs_outstanding 2948 * or chan->rcq_invoking is non-zero, then it can simply put that channel back 2949 * on the passive channels list and move on, since it might be an indication 2950 * that the channel became active again by the time we started it's cleanup. 2951 * It is costlier to do the cleanup and then reinitiate the channel 2952 * establishment and hence it will help to be conservative when we do the 2953 * cleanup. 2954 */ 2955 int 2956 ibd_rc_pas_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan, 2957 boolean_t is_timeout_close) 2958 { 2959 uint_t times; 2960 ibt_status_t ret; 2961 2962 ASSERT(chan != NULL); 2963 chan->state->rc_pas_close++; 2964 2965 switch (chan->chan_state) { 2966 case IBD_RC_STATE_PAS_ESTAB: 2967 if (is_timeout_close) { 2968 if ((chan->rcq_invoking != 0) || 2969 ((!chan->state->rc_enable_srq) && 2970 (chan->rx_wqe_list.dl_bufs_outstanding > 0))) { 2971 if (ibd_rc_re_add_to_pas_chan_list(chan)) { 2972 return (DDI_FAILURE); 2973 } 2974 } 2975 } 2976 /* 2977 * First, stop receive interrupts; this stops the 2978 * connection from handing up buffers to higher layers. 2979 * Wait for receive buffers to be returned; give up 2980 * after 5 seconds. 2981 */ 2982 ibt_set_cq_handler(chan->rcq_hdl, 0, 0); 2983 /* Wait 0.01 second to let ibt_set_cq_handler() take effect */ 2984 delay(drv_usectohz(10000)); 2985 if (!chan->state->rc_enable_srq) { 2986 times = 50; 2987 while (chan->rx_wqe_list.dl_bufs_outstanding > 0) { 2988 delay(drv_usectohz(100000)); 2989 if (--times == 0) { 2990 DPRINT(40, "ibd_rc_pas_close : " 2991 "reclaiming failed"); 2992 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 2993 ibt_set_cq_handler(chan->rcq_hdl, 2994 ibd_rc_rcq_handler, 2995 (void *)(uintptr_t)chan); 2996 return (DDI_FAILURE); 2997 } 2998 } 2999 } 3000 times = 50; 3001 while (chan->rcq_invoking != 0) { 3002 delay(drv_usectohz(100000)); 3003 if (--times == 0) { 3004 DPRINT(40, "ibd_rc_pas_close : " 3005 "rcq handler is being invoked"); 3006 chan->state->rc_pas_close_rcq_invoking++; 3007 break; 3008 } 3009 } 3010 ibt_set_cq_handler(chan->scq_hdl, 0, 0); 3011 chan->chan_state = IBD_RC_STATE_PAS_CLOSED; 3012 DPRINT(30, "ibd_rc_pas_close-1: close and free chan, " 3013 "chan_state=%d, chan=%p", chan->chan_state, chan); 3014 if (is_close_rc_chan) { 3015 ret = ibt_close_rc_channel(chan->chan_hdl, 3016 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 3017 0); 3018 if (ret != IBT_SUCCESS) { 3019 DPRINT(40, "ibd_rc_pas_close: ibt_close_rc_" 3020 "channel() fail, chan=%p, ret=%d", chan, 3021 ret); 3022 } else { 3023 DPRINT(30, "ibd_rc_pas_close: ibt_close_rc_" 3024 "channel() succ, chan=%p", chan); 3025 } 3026 } 3027 ibd_rc_free_chan(chan); 3028 break; 3029 case IBD_RC_STATE_PAS_REQ_RECV: 3030 chan->chan_state = IBD_RC_STATE_PAS_CLOSED; 3031 (void) ibt_flush_channel(chan->chan_hdl); 3032 ibd_rc_free_chan(chan); 3033 break; 3034 default: 3035 DPRINT(40, "ibd_rc_pas_close: default, chan_state=%d, chan=%p", 3036 chan->chan_state, chan); 3037 } 3038 return (DDI_SUCCESS); 3039 } 3040 3041 /* 3042 * Passive Side: 3043 * Handle an incoming CM REQ from active side. 3044 * 3045 * If success, this function allocates an ibd_rc_chan_t, then 3046 * assigns it to "*ret_conn". 3047 */ 3048 static ibt_cm_status_t 3049 ibd_rc_handle_req(void *arg, ibd_rc_chan_t **ret_conn, 3050 ibt_cm_event_t *ibt_cm_event, ibt_cm_return_args_t *ret_args, 3051 void *ret_priv_data) 3052 { 3053 ibd_rc_msg_hello_t *hello_msg; 3054 ibd_state_t *state = (ibd_state_t *)arg; 3055 ibd_rc_chan_t *chan; 3056 3057 if (ibd_rc_alloc_chan(&chan, state, B_FALSE) != IBT_SUCCESS) { 3058 DPRINT(40, "ibd_rc_handle_req: ibd_rc_alloc_chan() failed"); 3059 return (IBT_CM_REJECT); 3060 } 3061 3062 ibd_rc_add_to_chan_list(&state->rc_pass_chan_list, chan); 3063 3064 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)chan); 3065 3066 if (!state->rc_enable_srq) { 3067 if (ibd_rc_init_rxlist(chan) != DDI_SUCCESS) { 3068 ibd_rc_free_chan(chan); 3069 DPRINT(40, "ibd_rc_handle_req: ibd_rc_init_rxlist() " 3070 "failed"); 3071 return (IBT_CM_REJECT); 3072 } 3073 } 3074 3075 ret_args->cm_ret.rep.cm_channel = chan->chan_hdl; 3076 3077 /* We don't do RDMA */ 3078 ret_args->cm_ret.rep.cm_rdma_ra_out = 0; 3079 ret_args->cm_ret.rep.cm_rdma_ra_in = 0; 3080 3081 ret_args->cm_ret.rep.cm_rnr_retry_cnt = 7; 3082 ret_args->cm_ret_len = sizeof (ibd_rc_msg_hello_t); 3083 3084 hello_msg = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data; 3085 DPRINT(30, "ibd_rc_handle_req(): peer qpn=0x%x, peer mtu=0x%x", 3086 ntohl(hello_msg->reserved_qpn), ntohl(hello_msg->rx_mtu)); 3087 3088 hello_msg = (ibd_rc_msg_hello_t *)ret_priv_data; 3089 hello_msg->reserved_qpn = htonl(state->id_qpnum); 3090 hello_msg->rx_mtu = htonl(state->rc_mtu); 3091 3092 chan->chan_state = IBD_RC_STATE_PAS_REQ_RECV; /* ready to receive */ 3093 *ret_conn = chan; 3094 3095 return (IBT_CM_ACCEPT); 3096 } 3097 3098 /* 3099 * ibd_rc_handle_act_estab -- handler for connection established completion 3100 * for active side. 3101 */ 3102 static ibt_cm_status_t 3103 ibd_rc_handle_act_estab(ibd_ace_t *ace) 3104 { 3105 ibt_status_t result; 3106 3107 switch (ace->ac_chan->chan_state) { 3108 case IBD_RC_STATE_ACT_REP_RECV: 3109 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_ESTAB; 3110 result = ibt_enable_cq_notify(ace->ac_chan->rcq_hdl, 3111 IBT_NEXT_COMPLETION); 3112 if (result != IBT_SUCCESS) { 3113 DPRINT(40, "ibd_rc_handle_act_estab: " 3114 "ibt_enable_cq_notify(rcq) " 3115 "failed: status %d", result); 3116 return (IBT_CM_REJECT); 3117 } 3118 break; 3119 default: 3120 DPRINT(40, "ibd_rc_handle_act_estab: default " 3121 "branch, act_state=%d", ace->ac_chan->chan_state); 3122 return (IBT_CM_REJECT); 3123 } 3124 return (IBT_CM_ACCEPT); 3125 } 3126 3127 /* 3128 * ibd_rc_handle_pas_estab -- handler for connection established completion 3129 * for passive side. 3130 */ 3131 static ibt_cm_status_t 3132 ibd_rc_handle_pas_estab(ibd_rc_chan_t *chan) 3133 { 3134 ibt_status_t result; 3135 3136 switch (chan->chan_state) { 3137 case IBD_RC_STATE_PAS_REQ_RECV: 3138 chan->chan_state = IBD_RC_STATE_PAS_ESTAB; 3139 3140 result = ibt_enable_cq_notify(chan->rcq_hdl, 3141 IBT_NEXT_COMPLETION); 3142 if (result != IBT_SUCCESS) { 3143 DPRINT(40, "ibd_rc_handle_pas_estab: " 3144 "ibt_enable_cq_notify(rcq) " 3145 "failed: status %d", result); 3146 return (IBT_CM_REJECT); 3147 } 3148 break; 3149 default: 3150 DPRINT(40, "ibd_rc_handle_pas_estab: default " 3151 "branch, chan_state=%d", chan->chan_state); 3152 return (IBT_CM_REJECT); 3153 } 3154 return (IBT_CM_ACCEPT); 3155 } 3156 3157 /* ARGSUSED */ 3158 static ibt_cm_status_t 3159 ibd_rc_dispatch_actv_mad(void *arg, ibt_cm_event_t *ibt_cm_event, 3160 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 3161 ibt_priv_data_len_t ret_len_max) 3162 { 3163 ibt_cm_status_t result = IBT_CM_ACCEPT; 3164 ibd_ace_t *ace = (ibd_ace_t *)(uintptr_t)arg; 3165 ibd_rc_chan_t *rc_chan; 3166 ibd_state_t *state; 3167 ibd_rc_msg_hello_t *hello_ack; 3168 3169 switch (ibt_cm_event->cm_type) { 3170 case IBT_CM_EVENT_REP_RCV: 3171 ASSERT(ace->ac_chan != NULL); 3172 ASSERT(ace->ac_chan->chan_state == IBD_RC_STATE_INIT); 3173 hello_ack = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data; 3174 DPRINT(30, "ibd_rc_handle_rep: hello_ack->mtu=0x%x, " 3175 "hello_ack->qpn=0x%x", ntohl(hello_ack->rx_mtu), 3176 ntohl(hello_ack->reserved_qpn)); 3177 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_REP_RECV; 3178 break; 3179 3180 case IBT_CM_EVENT_CONN_EST: 3181 ASSERT(ace->ac_chan != NULL); 3182 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_CONN_EST, " 3183 "ace=%p, act_state=%d, chan=%p", 3184 ace, ace->ac_chan->chan_state, ace->ac_chan); 3185 result = ibd_rc_handle_act_estab(ace); 3186 break; 3187 3188 case IBT_CM_EVENT_CONN_CLOSED: 3189 rc_chan = ace->ac_chan; 3190 if (rc_chan == NULL) { 3191 DPRINT(40, "ibd_rc_dispatch_actv_mad: " 3192 "rc_chan==NULL, IBT_CM_EVENT_CONN_CLOSED"); 3193 return (IBT_CM_ACCEPT); 3194 } 3195 state = rc_chan->state; 3196 mutex_enter(&state->id_ac_mutex); 3197 if ((rc_chan->chan_state == IBD_RC_STATE_ACT_ESTAB) && 3198 ((ace = ibd_acache_find(state, &ace->ac_mac, B_FALSE, 0)) 3199 != NULL) && (ace == rc_chan->ace)) { 3200 rc_chan->chan_state = IBD_RC_STATE_ACT_CLOSING; 3201 ASSERT(ace->ac_mce == NULL); 3202 INC_REF(ace, 1); 3203 IBD_ACACHE_PULLOUT_ACTIVE(state, ace); 3204 mutex_exit(&state->id_ac_mutex); 3205 DPRINT(30, "ibd_rc_dispatch_actv_mad: " 3206 "IBT_CM_EVENT_CONN_CLOSED, ace=%p, chan=%p, " 3207 "reason=%d", ace, rc_chan, 3208 ibt_cm_event->cm_event.closed); 3209 } else { 3210 mutex_exit(&state->id_ac_mutex); 3211 state->rc_act_close_simultaneous++; 3212 DPRINT(40, "ibd_rc_dispatch_actv_mad: other thread " 3213 "is closing it, IBT_CM_EVENT_CONN_CLOSED, " 3214 "chan_state=%d", rc_chan->chan_state); 3215 return (IBT_CM_ACCEPT); 3216 } 3217 ibd_rc_act_close(rc_chan, B_FALSE); 3218 mutex_enter(&state->id_ac_mutex); 3219 ace->ac_chan = NULL; 3220 ASSERT(ace->ac_ref != 0); 3221 atomic_dec_32(&ace->ac_ref); 3222 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 3223 IBD_ACACHE_INSERT_FREE(state, ace); 3224 ace->ac_ref = 0; 3225 } else { 3226 ace->ac_ref |= CYCLEVAL; 3227 state->rc_delay_ace_recycle++; 3228 } 3229 mutex_exit(&state->id_ac_mutex); 3230 break; 3231 3232 case IBT_CM_EVENT_FAILURE: 3233 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_FAILURE," 3234 "ace=%p, chan=%p, code: %d, msg: %d, reason=%d", 3235 ace, ace->ac_chan, 3236 ibt_cm_event->cm_event.failed.cf_code, 3237 ibt_cm_event->cm_event.failed.cf_msg, 3238 ibt_cm_event->cm_event.failed.cf_reason); 3239 /* 3240 * Don't need free resource here. The resource is freed 3241 * at function ibd_rc_connect() 3242 */ 3243 break; 3244 3245 case IBT_CM_EVENT_MRA_RCV: 3246 DPRINT(40, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_MRA_RCV"); 3247 break; 3248 case IBT_CM_EVENT_LAP_RCV: 3249 DPRINT(40, "ibd_rc_dispatch_actv_mad: LAP message received"); 3250 break; 3251 case IBT_CM_EVENT_APR_RCV: 3252 DPRINT(40, "ibd_rc_dispatch_actv_mad: APR message received"); 3253 break; 3254 default: 3255 DPRINT(40, "ibd_rc_dispatch_actv_mad: default branch, " 3256 "ibt_cm_event->cm_type=%d", ibt_cm_event->cm_type); 3257 break; 3258 } 3259 3260 return (result); 3261 } 3262 3263 /* ARGSUSED */ 3264 static ibt_cm_status_t 3265 ibd_rc_dispatch_pass_mad(void *arg, ibt_cm_event_t *ibt_cm_event, 3266 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 3267 ibt_priv_data_len_t ret_len_max) 3268 { 3269 ibt_cm_status_t result = IBT_CM_ACCEPT; 3270 ibd_rc_chan_t *chan; 3271 3272 if (ibt_cm_event->cm_type == IBT_CM_EVENT_REQ_RCV) { 3273 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_REQ_RCV," 3274 "req_pkey=%x", ibt_cm_event->cm_event.req.req_pkey); 3275 /* Receive an incoming CM REQ from active side */ 3276 result = ibd_rc_handle_req(arg, &chan, ibt_cm_event, ret_args, 3277 ret_priv_data); 3278 return (result); 3279 } 3280 3281 if (ibt_cm_event->cm_channel == 0) { 3282 DPRINT(30, "ibd_rc_dispatch_pass_mad: " 3283 "ERROR ibt_cm_event->cm_channel == 0"); 3284 return (IBT_CM_REJECT); 3285 } 3286 3287 chan = 3288 (ibd_rc_chan_t *)ibt_get_chan_private(ibt_cm_event->cm_channel); 3289 if (chan == NULL) { 3290 DPRINT(40, "ibd_rc_dispatch_pass_mad: conn == 0"); 3291 return (IBT_CM_REJECT); 3292 } 3293 3294 switch (ibt_cm_event->cm_type) { 3295 case IBT_CM_EVENT_CONN_EST: 3296 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_EST, " 3297 "chan=%p", chan); 3298 result = ibd_rc_handle_pas_estab(chan); 3299 break; 3300 case IBT_CM_EVENT_CONN_CLOSED: 3301 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_CLOSED," 3302 " chan=%p, reason=%d", chan, ibt_cm_event->cm_event.closed); 3303 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list, 3304 chan); 3305 if (chan != NULL) 3306 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE); 3307 break; 3308 case IBT_CM_EVENT_FAILURE: 3309 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_FAILURE," 3310 " chan=%p, code: %d, msg: %d, reason=%d", chan, 3311 ibt_cm_event->cm_event.failed.cf_code, 3312 ibt_cm_event->cm_event.failed.cf_msg, 3313 ibt_cm_event->cm_event.failed.cf_reason); 3314 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list, 3315 chan); 3316 if (chan != NULL) 3317 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE); 3318 return (IBT_CM_ACCEPT); 3319 case IBT_CM_EVENT_MRA_RCV: 3320 DPRINT(40, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_MRA_RCV"); 3321 break; 3322 case IBT_CM_EVENT_LAP_RCV: 3323 DPRINT(40, "ibd_rc_dispatch_pass_mad: LAP message received"); 3324 break; 3325 case IBT_CM_EVENT_APR_RCV: 3326 DPRINT(40, "ibd_rc_dispatch_pass_mad: APR message received"); 3327 break; 3328 default: 3329 DPRINT(40, "ibd_rc_dispatch_pass_mad: default, type=%d, " 3330 "chan=%p", ibt_cm_event->cm_type, chan); 3331 break; 3332 } 3333 3334 return (result); 3335 } 3336