1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2018 Joyent, Inc. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 /* 29 * An implementation of the IPoIB-CM standard based on PSARC 2009/593. 30 */ 31 #include <sys/types.h> 32 #include <sys/conf.h> 33 #include <sys/ddi.h> 34 #include <sys/sunddi.h> 35 #include <sys/modctl.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strsun.h> 39 #include <sys/strsubr.h> 40 #include <sys/dlpi.h> 41 #include <sys/mac_provider.h> 42 43 #include <sys/pattr.h> /* for HCK_FULLCKSUM */ 44 #include <sys/atomic.h> /* for atomic_add*() */ 45 #include <sys/ethernet.h> /* for ETHERTYPE_IP */ 46 #include <netinet/in.h> /* for netinet/ip.h below */ 47 #include <netinet/ip.h> /* for struct ip */ 48 #include <inet/common.h> /* for inet/ip.h below */ 49 #include <inet/ip.h> /* for ipha_t */ 50 #include <inet/ip_if.h> /* for ETHERTYPE_IPV6 */ 51 #include <inet/ip6.h> /* for ip6_t */ 52 #include <netinet/icmp6.h> /* for icmp6_t */ 53 54 #include <sys/ib/clients/ibd/ibd.h> 55 56 extern ibd_global_state_t ibd_gstate; 57 extern int ibd_rc_conn_timeout; 58 uint_t ibd_rc_tx_softintr = 1; 59 /* 60 * If the number of WRs in receive queue of each RC connection less than 61 * IBD_RC_RX_WR_THRESHOLD, we will post more receive WRs into it. 62 */ 63 #define IBD_RC_RX_WR_THRESHOLD 0x20 64 65 /* 66 * If the number of free SWQEs (or large Tx buf) is larger than or equal to 67 * IBD_RC_TX_FREE_THRESH, we will call mac_tx_update to notify GLD to continue 68 * transmitting packets. 69 */ 70 #define IBD_RC_TX_FREE_THRESH 8 71 72 #define IBD_RC_QPN_TO_SID(qpn) \ 73 ((uint64_t)(IBD_RC_SERVICE_ID | ((qpn) & 0xffffff))) 74 75 /* For interop with legacy OFED */ 76 #define IBD_RC_QPN_TO_SID_OFED_INTEROP(qpn) \ 77 ((uint64_t)(IBD_RC_SERVICE_ID_OFED_INTEROP | ((qpn) & 0xffffff))) 78 79 /* Internet Header + 64 bits of Data Datagram. Refer to RFC 792 */ 80 #define IBD_RC_IP_ICMP_RETURN_DATA_BYTES 64 81 82 83 /* Functions for Reliable Connected Mode */ 84 /* Connection Setup/Close Functions */ 85 static ibt_cm_status_t ibd_rc_dispatch_pass_mad(void *, 86 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 87 static ibt_cm_status_t ibd_rc_dispatch_actv_mad(void *, 88 ibt_cm_event_t *, ibt_cm_return_args_t *, void *, ibt_priv_data_len_t); 89 static void ibd_rc_act_close(ibd_rc_chan_t *, boolean_t); 90 91 static inline void ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *, 92 ibd_rc_chan_t *); 93 static inline ibd_rc_chan_t *ibd_rc_rm_header_chan_list( 94 ibd_rc_chan_list_t *); 95 static inline ibd_rc_chan_t *ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *, 96 ibd_rc_chan_t *); 97 98 /* CQ handlers */ 99 static void ibd_rc_rcq_handler(ibt_cq_hdl_t, void *); 100 static void ibd_rc_scq_handler(ibt_cq_hdl_t, void *); 101 static void ibd_rc_poll_rcq(ibd_rc_chan_t *, ibt_cq_hdl_t); 102 103 /* Receive Functions */ 104 static int ibd_rc_post_srq(ibd_state_t *, ibd_rwqe_t *); 105 static void ibd_rc_srq_freemsg_cb(char *); 106 static void ibd_rc_srq_free_rwqe(ibd_state_t *, ibd_rwqe_t *); 107 108 static int ibd_rc_post_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *); 109 static void ibd_rc_freemsg_cb(char *); 110 static void ibd_rc_process_rx(ibd_rc_chan_t *, ibd_rwqe_t *, ibt_wc_t *); 111 static void ibd_rc_free_rwqe(ibd_rc_chan_t *, ibd_rwqe_t *); 112 static void ibd_rc_fini_rxlist(ibd_rc_chan_t *); 113 114 115 /* Send Functions */ 116 static void ibd_rc_release_swqe(ibd_rc_chan_t *, ibd_swqe_t *); 117 static int ibd_rc_init_txlist(ibd_rc_chan_t *); 118 static void ibd_rc_fini_txlist(ibd_rc_chan_t *); 119 static uint_t ibd_rc_tx_recycle(caddr_t); 120 121 122 void 123 ibd_async_rc_close_act_chan(ibd_state_t *state, ibd_req_t *req) 124 { 125 ibd_rc_chan_t *rc_chan = req->rq_ptr; 126 ibd_ace_t *ace; 127 128 while (rc_chan != NULL) { 129 ace = rc_chan->ace; 130 ASSERT(ace != NULL); 131 /* Close old RC channel */ 132 ibd_rc_act_close(rc_chan, B_TRUE); 133 mutex_enter(&state->id_ac_mutex); 134 ASSERT(ace->ac_ref != 0); 135 atomic_dec_32(&ace->ac_ref); 136 ace->ac_chan = NULL; 137 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 138 IBD_ACACHE_INSERT_FREE(state, ace); 139 ace->ac_ref = 0; 140 } else { 141 ace->ac_ref |= CYCLEVAL; 142 state->rc_delay_ace_recycle++; 143 } 144 mutex_exit(&state->id_ac_mutex); 145 rc_chan = ibd_rc_rm_header_chan_list( 146 &state->rc_obs_act_chan_list); 147 } 148 } 149 150 void 151 ibd_async_rc_recycle_ace(ibd_state_t *state, ibd_req_t *req) 152 { 153 ibd_ace_t *ace = req->rq_ptr; 154 ibd_rc_chan_t *rc_chan; 155 156 ASSERT(ace != NULL); 157 rc_chan = ace->ac_chan; 158 ASSERT(rc_chan != NULL); 159 /* Close old RC channel */ 160 ibd_rc_act_close(rc_chan, B_TRUE); 161 mutex_enter(&state->id_ac_mutex); 162 ASSERT(ace->ac_ref != 0); 163 atomic_dec_32(&ace->ac_ref); 164 ace->ac_chan = NULL; 165 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 166 IBD_ACACHE_INSERT_FREE(state, ace); 167 ace->ac_ref = 0; 168 } else { 169 ace->ac_ref |= CYCLEVAL; 170 state->rc_delay_ace_recycle++; 171 } 172 mutex_exit(&state->id_ac_mutex); 173 mutex_enter(&state->rc_ace_recycle_lock); 174 state->rc_ace_recycle = NULL; 175 mutex_exit(&state->rc_ace_recycle_lock); 176 } 177 178 /* Simple ICMP IP Header Template */ 179 static const ipha_t icmp_ipha = { 180 IP_SIMPLE_HDR_VERSION, 0, 0, 0, 0, 0, IPPROTO_ICMP 181 }; 182 183 /* Packet is too big. Send ICMP packet to GLD to request a smaller MTU */ 184 void 185 ibd_async_rc_process_too_big(ibd_state_t *state, ibd_req_t *req) 186 { 187 mblk_t *mp = req->rq_ptr; 188 ibd_ace_t *ace = req->rq_ptr2; 189 uint16_t mtu = state->id_mtu - IPOIB_HDRSIZE; 190 uint_t len_needed; 191 size_t msg_len; 192 mblk_t *pmtu_mp; 193 ushort_t sap; 194 ib_header_info_t *ibha; /* ib header for pmtu_pkt */ 195 /* 196 * ipha: IP header for pmtu_pkt 197 * old_ipha: IP header for old packet 198 */ 199 ipha_t *ipha, *old_ipha; 200 icmph_t *icmph; 201 202 sap = ntohs(((ipoib_hdr_t *)mp->b_rptr)->ipoib_type); 203 204 if (!pullupmsg(mp, -1)) { 205 DPRINT(40, "ibd_async_rc_process_too_big: pullupmsg fail"); 206 goto too_big_fail; 207 } 208 /* move to IP header. */ 209 mp->b_rptr += IPOIB_HDRSIZE; 210 old_ipha = (ipha_t *)mp->b_rptr; 211 212 len_needed = IPH_HDR_LENGTH(old_ipha); 213 if (old_ipha->ipha_protocol == IPPROTO_ENCAP) { 214 len_needed += IPH_HDR_LENGTH(((uchar_t *)old_ipha + 215 len_needed)); 216 } else if (old_ipha->ipha_protocol == IPPROTO_IPV6) { 217 ip6_t *ip6h = (ip6_t *)((uchar_t *)old_ipha 218 + len_needed); 219 len_needed += ip_hdr_length_v6(mp, ip6h); 220 } 221 len_needed += IBD_RC_IP_ICMP_RETURN_DATA_BYTES; 222 msg_len = msgdsize(mp); 223 if (msg_len > len_needed) { 224 (void) adjmsg(mp, len_needed - msg_len); 225 msg_len = len_needed; 226 } 227 228 if ((pmtu_mp = allocb(sizeof (ib_header_info_t) + sizeof (ipha_t) 229 + sizeof (icmph_t), BPRI_MED)) == NULL) { 230 DPRINT(40, "ibd_async_rc_process_too_big: allocb fail"); 231 goto too_big_fail; 232 } 233 pmtu_mp->b_cont = mp; 234 pmtu_mp->b_wptr = pmtu_mp->b_rptr + sizeof (ib_header_info_t) 235 + sizeof (ipha_t) + sizeof (icmph_t); 236 237 ibha = (ib_header_info_t *)pmtu_mp->b_rptr; 238 239 /* Fill IB header */ 240 bcopy(&state->id_macaddr, &ibha->ib_dst, IPOIB_ADDRL); 241 /* 242 * If the GRH is not valid, indicate to GLDv3 by setting 243 * the VerTcFlow field to 0. 244 */ 245 ibha->ib_grh.ipoib_vertcflow = 0; 246 ibha->ipib_rhdr.ipoib_type = htons(sap); 247 ibha->ipib_rhdr.ipoib_mbz = 0; 248 249 /* Fill IP header */ 250 ipha = (ipha_t *)&ibha[1]; 251 *ipha = icmp_ipha; 252 ipha->ipha_src = old_ipha->ipha_dst; 253 ipha->ipha_dst = old_ipha->ipha_src; 254 ipha->ipha_ttl = old_ipha->ipha_ttl; 255 msg_len += sizeof (icmp_ipha) + sizeof (icmph_t); 256 if (msg_len > IP_MAXPACKET) { 257 ibd_print_warn(state, "ibd_rc_process_too_big_pkt: msg_len(%d) " 258 "> IP_MAXPACKET", (uint32_t)msg_len); 259 (void) adjmsg(mp, IP_MAXPACKET - msg_len); 260 msg_len = IP_MAXPACKET; 261 } 262 ipha->ipha_length = htons((uint16_t)msg_len); 263 ipha->ipha_hdr_checksum = 0; 264 ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha); 265 266 /* Fill ICMP body */ 267 icmph = (icmph_t *)&ipha[1]; 268 bzero(icmph, sizeof (icmph_t)); 269 icmph->icmph_type = ICMP_DEST_UNREACHABLE; 270 icmph->icmph_code = ICMP_FRAGMENTATION_NEEDED; 271 icmph->icmph_du_mtu = htons(mtu); 272 icmph->icmph_checksum = 0; 273 icmph->icmph_checksum = IP_CSUM(pmtu_mp, 274 (int32_t)sizeof (ib_header_info_t) + (int32_t)sizeof (ipha_t), 0); 275 276 mac_hcksum_set(pmtu_mp, 0, 0, 0, 0, HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 277 278 DPRINT(30, "ibd_async_rc_process_too_big: sap=0x%x, ip_src=0x%x, " 279 "ip_dst=0x%x, ttl=%d, len_needed=%d, msg_len=%d", 280 sap, ipha->ipha_src, ipha->ipha_dst, ipha->ipha_ttl, 281 len_needed, (uint32_t)msg_len); 282 283 mac_rx(state->id_mh, state->id_rh, pmtu_mp); 284 285 mutex_enter(&ace->tx_too_big_mutex); 286 ace->tx_too_big_ongoing = B_FALSE; 287 mutex_exit(&ace->tx_too_big_mutex); 288 return; 289 290 too_big_fail: 291 /* Drop packet */ 292 freemsg(mp); 293 mutex_enter(&ace->tx_too_big_mutex); 294 ace->tx_too_big_ongoing = B_FALSE; 295 mutex_exit(&ace->tx_too_big_mutex); 296 } 297 298 /* 299 * Check all active/passive channels. If any ative/passive 300 * channel has not been used for a long time, close it. 301 */ 302 void 303 ibd_rc_conn_timeout_call(void *carg) 304 { 305 ibd_state_t *state = carg; 306 ibd_ace_t *ace, *pre_ace; 307 ibd_rc_chan_t *chan, *pre_chan, *next_chan; 308 ibd_req_t *req; 309 310 /* Check all active channels. If chan->is_used == B_FALSE, close it */ 311 mutex_enter(&state->id_ac_mutex); 312 ace = list_head(&state->id_ah_active); 313 while ((pre_ace = ace) != NULL) { 314 ace = list_next(&state->id_ah_active, ace); 315 if (pre_ace->ac_chan != NULL) { 316 chan = pre_ace->ac_chan; 317 ASSERT(state->id_enable_rc == B_TRUE); 318 if (chan->chan_state == IBD_RC_STATE_ACT_ESTAB) { 319 if (chan->is_used == B_FALSE) { 320 state->rc_timeout_act++; 321 INC_REF(pre_ace, 1); 322 IBD_ACACHE_PULLOUT_ACTIVE(state, 323 pre_ace); 324 chan->chan_state = 325 IBD_RC_STATE_ACT_CLOSING; 326 ibd_rc_signal_act_close(state, pre_ace); 327 } else { 328 chan->is_used = B_FALSE; 329 } 330 } 331 } 332 } 333 mutex_exit(&state->id_ac_mutex); 334 335 /* Check all passive channels. If chan->is_used == B_FALSE, close it */ 336 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 337 next_chan = state->rc_pass_chan_list.chan_list; 338 pre_chan = NULL; 339 while ((chan = next_chan) != NULL) { 340 next_chan = chan->next; 341 if (chan->is_used == B_FALSE) { 342 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 343 if (req != NULL) { 344 /* remove it */ 345 state->rc_timeout_pas++; 346 req->rq_ptr = chan; 347 ibd_queue_work_slot(state, req, 348 IBD_ASYNC_RC_CLOSE_PAS_CHAN); 349 } else { 350 ibd_print_warn(state, "ibd_rc_conn_timeout: " 351 "alloc ibd_req_t fail"); 352 if (pre_chan == NULL) { 353 state->rc_pass_chan_list.chan_list = 354 chan; 355 } else { 356 pre_chan->next = chan; 357 } 358 pre_chan = chan; 359 } 360 } else { 361 if (pre_chan == NULL) { 362 state->rc_pass_chan_list.chan_list = chan; 363 } else { 364 pre_chan->next = chan; 365 } 366 pre_chan = chan; 367 chan->is_used = B_FALSE; 368 } 369 } 370 if (pre_chan != NULL) { 371 pre_chan->next = NULL; 372 } else { 373 state->rc_pass_chan_list.chan_list = NULL; 374 } 375 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 376 377 mutex_enter(&state->rc_timeout_lock); 378 if (state->rc_timeout_start == B_TRUE) { 379 state->rc_timeout = timeout(ibd_rc_conn_timeout_call, state, 380 SEC_TO_TICK(ibd_rc_conn_timeout)); 381 } 382 mutex_exit(&state->rc_timeout_lock); 383 } 384 385 #ifdef DEBUG 386 /* 387 * ibd_rc_update_stats - update driver private kstat counters 388 * 389 * This routine will dump the internal statistics counters for ibd's 390 * Reliable Connected Mode. The current stats dump values will 391 * be sent to the kernel status area. 392 */ 393 static int 394 ibd_rc_update_stats(kstat_t *ksp, int rw) 395 { 396 ibd_state_t *state; 397 ibd_rc_stat_t *ibd_rc_ksp; 398 399 if (rw == KSTAT_WRITE) 400 return (EACCES); 401 402 state = (ibd_state_t *)ksp->ks_private; 403 ASSERT(state != NULL); 404 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data; 405 406 ibd_rc_ksp->rc_rcv_trans_byte.value.ul = state->rc_rcv_trans_byte; 407 ibd_rc_ksp->rc_rcv_trans_pkt.value.ul = state->rc_rcv_trans_pkt; 408 ibd_rc_ksp->rc_rcv_copy_byte.value.ul = state->rc_rcv_copy_byte; 409 ibd_rc_ksp->rc_rcv_copy_pkt.value.ul = state->rc_rcv_copy_pkt; 410 ibd_rc_ksp->rc_rcv_alloc_fail.value.ul = state->rc_rcv_alloc_fail; 411 412 ibd_rc_ksp->rc_rcq_err.value.ul = state->rc_rcq_err; 413 414 ibd_rc_ksp->rc_rwqe_short.value.ul = state->rc_rwqe_short; 415 416 ibd_rc_ksp->rc_xmt_bytes.value.ul = state->rc_xmt_bytes; 417 ibd_rc_ksp->rc_xmt_small_pkt.value.ul = state->rc_xmt_small_pkt; 418 ibd_rc_ksp->rc_xmt_fragmented_pkt.value.ul = 419 state->rc_xmt_fragmented_pkt; 420 ibd_rc_ksp->rc_xmt_map_fail_pkt.value.ul = state->rc_xmt_map_fail_pkt; 421 ibd_rc_ksp->rc_xmt_map_succ_pkt.value.ul = state->rc_xmt_map_succ_pkt; 422 ibd_rc_ksp->rc_ace_not_found.value.ul = state->rc_ace_not_found; 423 424 ibd_rc_ksp->rc_scq_no_swqe.value.ul = state->rc_scq_no_swqe; 425 ibd_rc_ksp->rc_scq_no_largebuf.value.ul = state->rc_scq_no_largebuf; 426 ibd_rc_ksp->rc_swqe_short.value.ul = state->rc_swqe_short; 427 ibd_rc_ksp->rc_swqe_mac_update.value.ul = state->rc_swqe_mac_update; 428 ibd_rc_ksp->rc_xmt_buf_short.value.ul = state->rc_xmt_buf_short; 429 ibd_rc_ksp->rc_xmt_buf_mac_update.value.ul = 430 state->rc_xmt_buf_mac_update; 431 432 ibd_rc_ksp->rc_conn_succ.value.ul = state->rc_conn_succ; 433 ibd_rc_ksp->rc_conn_fail.value.ul = state->rc_conn_fail; 434 ibd_rc_ksp->rc_null_conn.value.ul = state->rc_null_conn; 435 ibd_rc_ksp->rc_no_estab_conn.value.ul = state->rc_no_estab_conn; 436 437 ibd_rc_ksp->rc_act_close.value.ul = state->rc_act_close; 438 ibd_rc_ksp->rc_pas_close.value.ul = state->rc_pas_close; 439 ibd_rc_ksp->rc_delay_ace_recycle.value.ul = state->rc_delay_ace_recycle; 440 ibd_rc_ksp->rc_act_close_simultaneous.value.ul = 441 state->rc_act_close_simultaneous; 442 ibd_rc_ksp->rc_reset_cnt.value.ul = state->rc_reset_cnt; 443 ibd_rc_ksp->rc_timeout_act.value.ul = state->rc_timeout_act; 444 ibd_rc_ksp->rc_timeout_pas.value.ul = state->rc_timeout_pas; 445 446 return (0); 447 } 448 449 450 /* 451 * ibd_rc_init_stats - initialize kstat data structures 452 * 453 * This routine will create and initialize the driver private 454 * statistics counters. 455 */ 456 int 457 ibd_rc_init_stats(ibd_state_t *state) 458 { 459 kstat_t *ksp; 460 ibd_rc_stat_t *ibd_rc_ksp; 461 char stat_name[KSTAT_STRLEN]; 462 int inst; 463 464 /* 465 * Create and init kstat 466 */ 467 inst = ddi_get_instance(state->id_dip); 468 (void) snprintf(stat_name, KSTAT_STRLEN, "statistics%d_%x_%u", inst, 469 state->id_pkey, state->id_plinkid); 470 ksp = kstat_create("ibd", 0, stat_name, "net", KSTAT_TYPE_NAMED, 471 sizeof (ibd_rc_stat_t) / sizeof (kstat_named_t), 0); 472 473 if (ksp == NULL) { 474 ibd_print_warn(state, "ibd_rc_init_stats: Could not create " 475 "kernel statistics"); 476 return (DDI_FAILURE); 477 } 478 479 state->rc_ksp = ksp; /* Fill in the ksp of ibd over RC mode */ 480 481 ibd_rc_ksp = (ibd_rc_stat_t *)ksp->ks_data; 482 483 /* 484 * Initialize all the statistics 485 */ 486 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_byte, "RC: Rx Bytes, " 487 "transfer mode", KSTAT_DATA_ULONG); 488 kstat_named_init(&ibd_rc_ksp->rc_rcv_trans_pkt, "RC: Rx Pkts, " 489 "transfer mode", KSTAT_DATA_ULONG); 490 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_byte, "RC: Rx Bytes, " 491 "copy mode", KSTAT_DATA_ULONG); 492 kstat_named_init(&ibd_rc_ksp->rc_rcv_copy_pkt, "RC: Rx Pkts, " 493 "copy mode", KSTAT_DATA_ULONG); 494 kstat_named_init(&ibd_rc_ksp->rc_rcv_alloc_fail, "RC: Rx alloc fail", 495 KSTAT_DATA_ULONG); 496 497 kstat_named_init(&ibd_rc_ksp->rc_rcq_err, "RC: fail in Recv CQ handler", 498 KSTAT_DATA_ULONG); 499 500 kstat_named_init(&ibd_rc_ksp->rc_rwqe_short, "RC: Short rwqe", 501 KSTAT_DATA_ULONG); 502 503 kstat_named_init(&ibd_rc_ksp->rc_xmt_bytes, "RC: Sent Bytes", 504 KSTAT_DATA_ULONG); 505 kstat_named_init(&ibd_rc_ksp->rc_xmt_small_pkt, 506 "RC: Tx pkt small size", KSTAT_DATA_ULONG); 507 kstat_named_init(&ibd_rc_ksp->rc_xmt_fragmented_pkt, 508 "RC: Tx pkt fragmentary", KSTAT_DATA_ULONG); 509 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_fail_pkt, 510 "RC: Tx pkt fail ibt_map_mem_iov()", KSTAT_DATA_ULONG); 511 kstat_named_init(&ibd_rc_ksp->rc_xmt_map_succ_pkt, 512 "RC: Tx pkt succ ibt_map_mem_iov()", KSTAT_DATA_ULONG); 513 kstat_named_init(&ibd_rc_ksp->rc_ace_not_found, "RC: ace not found", 514 KSTAT_DATA_ULONG); 515 516 kstat_named_init(&ibd_rc_ksp->rc_scq_no_swqe, "RC: No swqe after " 517 "recycle", KSTAT_DATA_ULONG); 518 kstat_named_init(&ibd_rc_ksp->rc_scq_no_largebuf, "RC: No large tx buf " 519 "after recycle", KSTAT_DATA_ULONG); 520 kstat_named_init(&ibd_rc_ksp->rc_swqe_short, "RC: No swqe in ibd_send", 521 KSTAT_DATA_ULONG); 522 kstat_named_init(&ibd_rc_ksp->rc_swqe_mac_update, "RC: mac_tx_update " 523 "#, swqe available", KSTAT_DATA_ULONG); 524 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_short, "RC: No buf in " 525 "ibd_send", KSTAT_DATA_ULONG); 526 kstat_named_init(&ibd_rc_ksp->rc_xmt_buf_mac_update, "RC: " 527 "mac_tx_update #, buf available", KSTAT_DATA_ULONG); 528 529 kstat_named_init(&ibd_rc_ksp->rc_conn_succ, "RC: succ connected", 530 KSTAT_DATA_ULONG); 531 kstat_named_init(&ibd_rc_ksp->rc_conn_fail, "RC: fail connect", 532 KSTAT_DATA_ULONG); 533 kstat_named_init(&ibd_rc_ksp->rc_null_conn, "RC: null conn for unicast " 534 "pkt", KSTAT_DATA_ULONG); 535 kstat_named_init(&ibd_rc_ksp->rc_no_estab_conn, "RC: not in act estab " 536 "state", KSTAT_DATA_ULONG); 537 538 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: call ibd_rc_act_close", 539 KSTAT_DATA_ULONG); 540 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: call ibd_rc_pas_close", 541 KSTAT_DATA_ULONG); 542 kstat_named_init(&ibd_rc_ksp->rc_delay_ace_recycle, "RC: delay ace " 543 "recycle", KSTAT_DATA_ULONG); 544 kstat_named_init(&ibd_rc_ksp->rc_act_close_simultaneous, "RC: " 545 "simultaneous ibd_rc_act_close", KSTAT_DATA_ULONG); 546 kstat_named_init(&ibd_rc_ksp->rc_reset_cnt, "RC: Reset RC channel", 547 KSTAT_DATA_ULONG); 548 kstat_named_init(&ibd_rc_ksp->rc_act_close, "RC: timeout act side", 549 KSTAT_DATA_ULONG); 550 kstat_named_init(&ibd_rc_ksp->rc_pas_close, "RC: timeout pas side", 551 KSTAT_DATA_ULONG); 552 553 /* 554 * Function to provide kernel stat update on demand 555 */ 556 ksp->ks_update = ibd_rc_update_stats; 557 558 /* 559 * Pointer into provider's raw statistics 560 */ 561 ksp->ks_private = (void *)state; 562 563 /* 564 * Add kstat to systems kstat chain 565 */ 566 kstat_install(ksp); 567 568 return (DDI_SUCCESS); 569 } 570 #endif 571 572 static ibt_status_t 573 ibd_rc_alloc_chan(ibd_rc_chan_t **ret_chan, ibd_state_t *state, 574 boolean_t is_tx_chan) 575 { 576 ibt_status_t result; 577 ibd_rc_chan_t *chan; 578 ibt_rc_chan_alloc_args_t alloc_args; 579 ibt_chan_alloc_flags_t alloc_flags; 580 ibt_chan_sizes_t sizes; 581 ibt_cq_attr_t cq_atts; 582 int rv; 583 584 chan = kmem_zalloc(sizeof (ibd_rc_chan_t), KM_SLEEP); 585 586 chan->state = state; 587 mutex_init(&chan->rx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 588 mutex_init(&chan->rx_free_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 589 mutex_init(&chan->tx_wqe_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 590 mutex_init(&chan->tx_rel_list.dl_mutex, NULL, MUTEX_DRIVER, NULL); 591 mutex_init(&chan->tx_post_lock, NULL, MUTEX_DRIVER, NULL); 592 mutex_init(&chan->tx_poll_lock, NULL, MUTEX_DRIVER, NULL); 593 594 /* Allocate IB structures for a new RC channel. */ 595 if (is_tx_chan) { 596 chan->scq_size = state->id_rc_num_swqe; 597 chan->rcq_size = IBD_RC_MIN_CQ_SIZE; 598 } else { 599 chan->scq_size = IBD_RC_MIN_CQ_SIZE; 600 chan->rcq_size = state->id_rc_num_rwqe; 601 } 602 cq_atts.cq_size = chan->scq_size; 603 cq_atts.cq_sched = NULL; 604 cq_atts.cq_flags = IBT_CQ_NO_FLAGS; 605 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->scq_hdl, 606 &chan->scq_size); 607 if (result != IBT_SUCCESS) { 608 DPRINT(40, "ibd_rc_alloc_chan: error <%d>" 609 "create scq completion queue (size <%d>)", 610 result, chan->scq_size); 611 goto alloc_scq_err; 612 } /* if failure to alloc cq */ 613 614 if (ibt_modify_cq(chan->scq_hdl, state->id_rc_tx_comp_count, 615 state->id_rc_tx_comp_usec, 0) != IBT_SUCCESS) { 616 DPRINT(30, "ibd_rc_alloc_chan: Send CQ " 617 "interrupt moderation failed"); 618 } 619 620 ibt_set_cq_private(chan->scq_hdl, (void *) (uintptr_t)chan); 621 ibt_set_cq_handler(chan->scq_hdl, ibd_rc_scq_handler, 622 (void *) (uintptr_t)chan); 623 624 cq_atts.cq_size = chan->rcq_size; 625 cq_atts.cq_sched = NULL; 626 cq_atts.cq_flags = IBT_CQ_NO_FLAGS; 627 result = ibt_alloc_cq(state->id_hca_hdl, &cq_atts, &chan->rcq_hdl, 628 &chan->rcq_size); 629 if (result != IBT_SUCCESS) { 630 ibd_print_warn(state, "ibd_rc_alloc_chan: error <%d> creating " 631 "rx completion queue (size <%d>)", result, chan->rcq_size); 632 goto alloc_rcq_err; 633 } /* if failure to alloc cq */ 634 635 if (ibt_modify_cq(chan->rcq_hdl, state->id_rc_rx_comp_count, 636 state->id_rc_rx_comp_usec, 0) != IBT_SUCCESS) { 637 DPRINT(30, "ibd_rc_alloc_chan: Receive CQ " 638 "interrupt moderation failed"); 639 } 640 641 ibt_set_cq_private(chan->rcq_hdl, (void *) (uintptr_t)chan); 642 ibt_set_cq_handler(chan->rcq_hdl, ibd_rc_rcq_handler, 643 (void *)(uintptr_t)chan); 644 645 if (is_tx_chan) { 646 chan->is_tx_chan = B_TRUE; 647 if (ibd_rc_init_txlist(chan) != DDI_SUCCESS) { 648 ibd_print_warn(state, "ibd_rc_alloc_chan: " 649 "ibd_rc_init_txlist failed"); 650 goto init_txlist_err; 651 } 652 if (ibd_rc_tx_softintr == 1) { 653 if ((rv = ddi_add_softintr(state->id_dip, 654 DDI_SOFTINT_LOW, &chan->scq_softintr, NULL, NULL, 655 ibd_rc_tx_recycle, (caddr_t)chan)) != 656 DDI_SUCCESS) { 657 DPRINT(10, "ibd_rc_alloc_chan: failed in " 658 "ddi_add_softintr(scq_softintr), ret=%d", 659 rv); 660 goto alloc_softintr_err; 661 } 662 } 663 } else { 664 chan->is_tx_chan = B_FALSE; 665 } 666 667 /* 668 * enable completions 669 */ 670 result = ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION); 671 if (result != IBT_SUCCESS) { 672 ibd_print_warn(state, "ibd_rc_alloc_chan: ibt_enable_cq_notify" 673 "(scq) failed: status %d\n", result); 674 goto alloc_scq_enable_err; 675 } 676 677 /* We will enable chan->rcq_hdl later. */ 678 679 /* alloc a RC channel */ 680 bzero(&alloc_args, sizeof (ibt_rc_chan_alloc_args_t)); 681 bzero(&sizes, sizeof (ibt_chan_sizes_t)); 682 683 alloc_args.rc_flags = IBT_WR_SIGNALED; 684 alloc_args.rc_control = IBT_CEP_NO_FLAGS; 685 686 alloc_args.rc_scq = chan->scq_hdl; 687 alloc_args.rc_rcq = chan->rcq_hdl; 688 alloc_args.rc_pd = state->id_pd_hdl; 689 690 alloc_args.rc_hca_port_num = state->id_port; 691 alloc_args.rc_clone_chan = NULL; 692 693 /* scatter/gather */ 694 alloc_args.rc_sizes.cs_sq_sgl = state->rc_tx_max_sqseg; 695 696 /* 697 * For the number of SGL elements in receive side, I think it 698 * should be 1. Because ibd driver allocates a whole block memory 699 * for each ibt_post_recv(). 700 */ 701 alloc_args.rc_sizes.cs_rq_sgl = 1; 702 703 /* The send queue size and the receive queue size */ 704 alloc_args.rc_sizes.cs_sq = chan->scq_size; 705 alloc_args.rc_sizes.cs_rq = chan->rcq_size; 706 707 if (state->id_hca_res_lkey_capab) { 708 alloc_args.rc_flags = IBT_FAST_REG_RES_LKEY; 709 } else { 710 DPRINT(40, "ibd_rc_alloc_chan: not support reserved lkey"); 711 } 712 713 if (state->rc_enable_srq) { 714 alloc_flags = IBT_ACHAN_USES_SRQ; 715 alloc_args.rc_srq = state->rc_srq_hdl; 716 } else { 717 alloc_flags = IBT_ACHAN_NO_FLAGS; 718 } 719 720 result = ibt_alloc_rc_channel(state->id_hca_hdl, 721 alloc_flags, &alloc_args, &chan->chan_hdl, &sizes); 722 if (result != IBT_SUCCESS) { 723 ibd_print_warn(state, "ibd_rc_alloc_chan: ibd_rc_open_channel" 724 " fail:<%d>", result); 725 goto alloc_scq_enable_err; 726 } 727 728 if (is_tx_chan) 729 atomic_inc_32(&state->rc_num_tx_chan); 730 else 731 atomic_inc_32(&state->rc_num_rx_chan); 732 733 /* For the connection reaper routine ibd_rc_conn_timeout_call() */ 734 chan->is_used = B_TRUE; 735 736 *ret_chan = chan; 737 return (IBT_SUCCESS); 738 739 alloc_scq_enable_err: 740 if (is_tx_chan) { 741 if (ibd_rc_tx_softintr == 1) { 742 ddi_remove_softintr(chan->scq_softintr); 743 } 744 } 745 alloc_softintr_err: 746 if (is_tx_chan) { 747 ibd_rc_fini_txlist(chan); 748 } 749 init_txlist_err: 750 (void) ibt_free_cq(chan->rcq_hdl); 751 alloc_rcq_err: 752 (void) ibt_free_cq(chan->scq_hdl); 753 alloc_scq_err: 754 mutex_destroy(&chan->tx_poll_lock); 755 mutex_destroy(&chan->tx_post_lock); 756 mutex_destroy(&chan->tx_rel_list.dl_mutex); 757 mutex_destroy(&chan->tx_wqe_list.dl_mutex); 758 mutex_destroy(&chan->rx_free_list.dl_mutex); 759 mutex_destroy(&chan->rx_wqe_list.dl_mutex); 760 kmem_free(chan, sizeof (ibd_rc_chan_t)); 761 return (result); 762 } 763 764 static void 765 ibd_rc_free_chan(ibd_rc_chan_t *chan) 766 { 767 ibt_status_t ret; 768 769 /* DPRINT(30, "ibd_rc_free_chan: chan=%p", chan); */ 770 771 if (chan->chan_hdl != NULL) { 772 ret = ibt_free_channel(chan->chan_hdl); 773 if (ret != IBT_SUCCESS) { 774 DPRINT(40, "ib_rc_free_chan: ibt_free_channel failed, " 775 "chan=%p, returned: %d", chan, ret); 776 return; 777 } 778 chan->chan_hdl = NULL; 779 } 780 781 if (chan->rcq_hdl != NULL) { 782 ret = ibt_free_cq(chan->rcq_hdl); 783 if (ret != IBT_SUCCESS) { 784 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(rcq) failed, " 785 "chan=%p, returned: %d", chan, ret); 786 return; 787 } 788 chan->rcq_hdl = NULL; 789 } 790 791 if (chan->scq_hdl != NULL) { 792 ret = ibt_free_cq(chan->scq_hdl); 793 if (ret != IBT_SUCCESS) { 794 DPRINT(40, "ib_rc_free_chan: ibt_free_cq(scq) failed, " 795 "chan=%p, returned: %d", chan, ret); 796 return; 797 } 798 chan->scq_hdl = NULL; 799 } 800 801 /* Free buffers */ 802 if (chan->is_tx_chan) { 803 ibd_rc_fini_txlist(chan); 804 if (ibd_rc_tx_softintr == 1) { 805 ddi_remove_softintr(chan->scq_softintr); 806 } 807 atomic_dec_32(&chan->state->rc_num_tx_chan); 808 } else { 809 if (!chan->state->rc_enable_srq) { 810 ibd_rc_fini_rxlist(chan); 811 } 812 atomic_dec_32(&chan->state->rc_num_rx_chan); 813 } 814 815 mutex_destroy(&chan->tx_poll_lock); 816 mutex_destroy(&chan->tx_post_lock); 817 mutex_destroy(&chan->tx_rel_list.dl_mutex); 818 mutex_destroy(&chan->tx_wqe_list.dl_mutex); 819 mutex_destroy(&chan->rx_free_list.dl_mutex); 820 mutex_destroy(&chan->rx_wqe_list.dl_mutex); 821 822 /* 823 * If it is a passive channel, must make sure it has been removed 824 * from chan->state->rc_pass_chan_list 825 */ 826 kmem_free(chan, sizeof (ibd_rc_chan_t)); 827 } 828 829 /* Add a RC channel */ 830 static inline void 831 ibd_rc_add_to_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan) 832 { 833 mutex_enter(&list->chan_list_mutex); 834 if (list->chan_list == NULL) { 835 list->chan_list = chan; 836 chan->next = NULL; 837 } else { 838 chan->next = list->chan_list; 839 list->chan_list = chan; 840 } 841 mutex_exit(&list->chan_list_mutex); 842 } 843 844 static boolean_t 845 ibd_rc_re_add_to_pas_chan_list(ibd_rc_chan_t *chan) 846 { 847 ibd_state_t *state = chan->state; 848 849 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 850 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) { 851 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 852 return (B_FALSE); 853 } else { 854 if (state->rc_pass_chan_list.chan_list == NULL) { 855 state->rc_pass_chan_list.chan_list = chan; 856 chan->next = NULL; 857 } else { 858 chan->next = state->rc_pass_chan_list.chan_list; 859 state->rc_pass_chan_list.chan_list = chan; 860 } 861 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 862 return (B_TRUE); 863 } 864 } 865 866 /* Remove a RC channel */ 867 static inline ibd_rc_chan_t * 868 ibd_rc_rm_from_chan_list(ibd_rc_chan_list_t *list, ibd_rc_chan_t *chan) 869 { 870 ibd_rc_chan_t *pre_chan; 871 872 mutex_enter(&list->chan_list_mutex); 873 if (list->chan_list == chan) { 874 DPRINT(30, "ibd_rc_rm_from_chan_list(first): found chan(%p)" 875 " in chan_list", chan); 876 list->chan_list = chan->next; 877 } else { 878 pre_chan = list->chan_list; 879 while (pre_chan != NULL) { 880 if (pre_chan->next == chan) { 881 DPRINT(30, "ibd_rc_rm_from_chan_list" 882 "(middle): found chan(%p)", chan); 883 pre_chan->next = chan->next; 884 break; 885 } 886 pre_chan = pre_chan->next; 887 } 888 if (pre_chan == NULL) 889 chan = NULL; 890 } 891 mutex_exit(&list->chan_list_mutex); 892 return (chan); 893 } 894 895 static inline ibd_rc_chan_t * 896 ibd_rc_rm_header_chan_list(ibd_rc_chan_list_t *list) 897 { 898 ibd_rc_chan_t *rc_chan; 899 900 mutex_enter(&list->chan_list_mutex); 901 rc_chan = list->chan_list; 902 if (rc_chan != NULL) { 903 list->chan_list = rc_chan->next; 904 } 905 mutex_exit(&list->chan_list_mutex); 906 return (rc_chan); 907 } 908 909 static int 910 ibd_rc_alloc_srq_copybufs(ibd_state_t *state) 911 { 912 ibt_mr_attr_t mem_attr; 913 uint_t rc_rx_bufs_sz; 914 915 /* 916 * Allocate one big chunk for all regular rx copy bufs 917 */ 918 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * state->rc_srq_size; 919 920 state->rc_srq_rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP); 921 922 state->rc_srq_rwqes = kmem_zalloc(state->rc_srq_size * 923 sizeof (ibd_rwqe_t), KM_SLEEP); 924 925 /* 926 * Do one memory registration on the entire rxbuf area 927 */ 928 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_srq_rx_bufs; 929 mem_attr.mr_len = rc_rx_bufs_sz; 930 mem_attr.mr_as = NULL; 931 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 932 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 933 &state->rc_srq_rx_mr_hdl, &state->rc_srq_rx_mr_desc) 934 != IBT_SUCCESS) { 935 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr() " 936 "failed"); 937 kmem_free(state->rc_srq_rwqes, 938 state->rc_srq_size * sizeof (ibd_rwqe_t)); 939 kmem_free(state->rc_srq_rx_bufs, rc_rx_bufs_sz); 940 state->rc_srq_rx_bufs = NULL; 941 state->rc_srq_rwqes = NULL; 942 return (DDI_FAILURE); 943 } 944 945 return (DDI_SUCCESS); 946 } 947 948 static void 949 ibd_rc_free_srq_copybufs(ibd_state_t *state) 950 { 951 uint_t rc_rx_buf_sz; 952 953 /* 954 * Don't change the value of state->rc_mtu at the period from call 955 * ibd_rc_alloc_srq_copybufs() to call ibd_rc_free_srq_copybufs(). 956 */ 957 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE; 958 959 /* 960 * Unregister rxbuf mr 961 */ 962 if (ibt_deregister_mr(state->id_hca_hdl, 963 state->rc_srq_rx_mr_hdl) != IBT_SUCCESS) { 964 DPRINT(40, "ibd_rc_free_srq_copybufs: ibt_deregister_mr()" 965 " failed"); 966 } 967 state->rc_srq_rx_mr_hdl = NULL; 968 969 /* 970 * Free rxbuf memory 971 */ 972 kmem_free(state->rc_srq_rwqes, 973 state->rc_srq_size * sizeof (ibd_rwqe_t)); 974 kmem_free(state->rc_srq_rx_bufs, state->rc_srq_size * rc_rx_buf_sz); 975 state->rc_srq_rwqes = NULL; 976 state->rc_srq_rx_bufs = NULL; 977 } 978 979 /* 980 * Allocate and post a certain number of SRQ receive buffers and WRs. 981 */ 982 int 983 ibd_rc_init_srq_list(ibd_state_t *state) 984 { 985 ibd_rwqe_t *rwqe; 986 ibt_lkey_t lkey; 987 int i; 988 uint_t len; 989 uint8_t *bufaddr; 990 ibt_srq_sizes_t srq_sizes; 991 ibt_srq_sizes_t srq_real_sizes; 992 ibt_status_t ret; 993 994 srq_sizes.srq_sgl_sz = 1; 995 srq_sizes.srq_wr_sz = state->id_rc_num_srq; 996 ret = ibt_alloc_srq(state->id_hca_hdl, IBT_SRQ_NO_FLAGS, 997 state->id_pd_hdl, &srq_sizes, &state->rc_srq_hdl, &srq_real_sizes); 998 if (ret != IBT_SUCCESS) { 999 /* 1000 * The following code is for CR 6932460 (can't configure ibd 1001 * interface on 32 bits x86 systems). 32 bits x86 system has 1002 * less memory resource than 64 bits x86 system. If current 1003 * resource request can't be satisfied, we request less 1004 * resource here. 1005 */ 1006 len = state->id_rc_num_srq; 1007 while ((ret == IBT_HCA_WR_EXCEEDED) && 1008 (len >= 2 * IBD_RC_MIN_CQ_SIZE)) { 1009 len = len/2; 1010 srq_sizes.srq_sgl_sz = 1; 1011 srq_sizes.srq_wr_sz = len; 1012 ret = ibt_alloc_srq(state->id_hca_hdl, 1013 IBT_SRQ_NO_FLAGS, state->id_pd_hdl, &srq_sizes, 1014 &state->rc_srq_hdl, &srq_real_sizes); 1015 } 1016 if (ret != IBT_SUCCESS) { 1017 DPRINT(10, "ibd_rc_init_srq_list: ibt_alloc_srq failed." 1018 "req_sgl_sz=%d, req_wr_sz=0x%x, final_req_wr_sz=" 1019 "0x%x, ret=%d", srq_sizes.srq_sgl_sz, 1020 srq_sizes.srq_wr_sz, len, ret); 1021 return (DDI_FAILURE); 1022 } 1023 state->id_rc_num_srq = len; 1024 state->id_rc_num_rwqe = state->id_rc_num_srq + 1; 1025 } 1026 1027 state->rc_srq_size = srq_real_sizes.srq_wr_sz; 1028 if (ibd_rc_alloc_srq_copybufs(state) != DDI_SUCCESS) { 1029 ret = ibt_free_srq(state->rc_srq_hdl); 1030 if (ret != IBT_SUCCESS) { 1031 ibd_print_warn(state, "ibd_rc_init_srq_list: " 1032 "ibt_free_srq fail, ret=%d", ret); 1033 } 1034 return (DDI_FAILURE); 1035 } 1036 1037 /* 1038 * Allocate and setup the rwqe list 1039 */ 1040 lkey = state->rc_srq_rx_mr_desc.md_lkey; 1041 rwqe = state->rc_srq_rwqes; 1042 bufaddr = state->rc_srq_rx_bufs; 1043 len = state->rc_mtu + IPOIB_GRH_SIZE; 1044 state->rc_srq_rwqe_list.dl_cnt = 0; 1045 state->rc_srq_rwqe_list.dl_bufs_outstanding = 0; 1046 for (i = 0; i < state->rc_srq_size; i++, rwqe++, bufaddr += len) { 1047 rwqe->w_state = state; 1048 rwqe->w_freeing_wqe = B_FALSE; 1049 rwqe->w_freemsg_cb.free_func = ibd_rc_srq_freemsg_cb; 1050 rwqe->w_freemsg_cb.free_arg = (char *)rwqe; 1051 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr; 1052 1053 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0, 1054 &rwqe->w_freemsg_cb)) == NULL) { 1055 DPRINT(40, "ibd_rc_init_srq_list : desballoc() failed"); 1056 rwqe->rwqe_copybuf.ic_bufaddr = NULL; 1057 if (atomic_dec_32_nv(&state->id_running) != 0) { 1058 cmn_err(CE_WARN, "ibd_rc_init_srq_list: " 1059 "id_running was not 1\n"); 1060 } 1061 ibd_rc_fini_srq_list(state); 1062 atomic_inc_32(&state->id_running); 1063 return (DDI_FAILURE); 1064 } 1065 1066 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey; 1067 /* Leave IPOIB_GRH_SIZE space */ 1068 rwqe->rwqe_copybuf.ic_sgl.ds_va = 1069 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE); 1070 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu; 1071 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe; 1072 rwqe->w_rwr.wr_nds = 1; 1073 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl; 1074 (void) ibd_rc_post_srq(state, rwqe); 1075 } 1076 1077 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1078 state->rc_srq_free_list.dl_head = NULL; 1079 state->rc_srq_free_list.dl_cnt = 0; 1080 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1081 1082 return (DDI_SUCCESS); 1083 } 1084 1085 /* 1086 * Free the statically allocated Rx buffer list for SRQ. 1087 */ 1088 void 1089 ibd_rc_fini_srq_list(ibd_state_t *state) 1090 { 1091 ibd_rwqe_t *rwqe; 1092 int i; 1093 ibt_status_t ret; 1094 1095 ASSERT(state->id_running == 0); 1096 ret = ibt_free_srq(state->rc_srq_hdl); 1097 if (ret != IBT_SUCCESS) { 1098 ibd_print_warn(state, "ibd_rc_fini_srq_list: " 1099 "ibt_free_srq fail, ret=%d", ret); 1100 } 1101 1102 mutex_enter(&state->rc_srq_rwqe_list.dl_mutex); 1103 rwqe = state->rc_srq_rwqes; 1104 for (i = 0; i < state->rc_srq_size; i++, rwqe++) { 1105 if (rwqe->rwqe_im_mblk != NULL) { 1106 rwqe->w_freeing_wqe = B_TRUE; 1107 freemsg(rwqe->rwqe_im_mblk); 1108 } 1109 } 1110 mutex_exit(&state->rc_srq_rwqe_list.dl_mutex); 1111 1112 ibd_rc_free_srq_copybufs(state); 1113 } 1114 1115 /* Repost the elements in state->ib_rc_free_list */ 1116 int 1117 ibd_rc_repost_srq_free_list(ibd_state_t *state) 1118 { 1119 ibd_rwqe_t *rwqe; 1120 ibd_wqe_t *list; 1121 uint_t len; 1122 1123 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1124 if (state->rc_srq_free_list.dl_head != NULL) { 1125 /* repost them */ 1126 len = state->rc_mtu + IPOIB_GRH_SIZE; 1127 list = state->rc_srq_free_list.dl_head; 1128 state->rc_srq_free_list.dl_head = NULL; 1129 state->rc_srq_free_list.dl_cnt = 0; 1130 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1131 while (list != NULL) { 1132 rwqe = WQE_TO_RWQE(list); 1133 if ((rwqe->rwqe_im_mblk == NULL) && 1134 ((rwqe->rwqe_im_mblk = desballoc( 1135 rwqe->rwqe_copybuf.ic_bufaddr, len, 0, 1136 &rwqe->w_freemsg_cb)) == NULL)) { 1137 DPRINT(40, "ibd_rc_repost_srq_free_list: " 1138 "failed in desballoc()"); 1139 do { 1140 ibd_rc_srq_free_rwqe(state, rwqe); 1141 list = list->w_next; 1142 rwqe = WQE_TO_RWQE(list); 1143 } while (list != NULL); 1144 return (DDI_FAILURE); 1145 } 1146 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1147 ibd_rc_srq_free_rwqe(state, rwqe); 1148 } 1149 list = list->w_next; 1150 } 1151 return (DDI_SUCCESS); 1152 } 1153 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1154 return (DDI_SUCCESS); 1155 } 1156 1157 /* 1158 * Free an allocated recv wqe. 1159 */ 1160 static void 1161 ibd_rc_srq_free_rwqe(ibd_state_t *state, ibd_rwqe_t *rwqe) 1162 { 1163 /* 1164 * desballoc() failed (no memory) or the posting of rwqe failed. 1165 * 1166 * This rwqe is placed on a free list so that it 1167 * can be reinstated in future. 1168 * 1169 * NOTE: no code currently exists to reinstate 1170 * these "lost" rwqes. 1171 */ 1172 mutex_enter(&state->rc_srq_free_list.dl_mutex); 1173 state->rc_srq_free_list.dl_cnt++; 1174 rwqe->rwqe_next = state->rc_srq_free_list.dl_head; 1175 state->rc_srq_free_list.dl_head = RWQE_TO_WQE(rwqe); 1176 mutex_exit(&state->rc_srq_free_list.dl_mutex); 1177 } 1178 1179 static void 1180 ibd_rc_srq_freemsg_cb(char *arg) 1181 { 1182 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg; 1183 ibd_state_t *state = rwqe->w_state; 1184 1185 ASSERT(state->rc_enable_srq); 1186 1187 /* 1188 * If the driver is stopped, just free the rwqe. 1189 */ 1190 if (atomic_add_32_nv(&state->id_running, 0) == 0) { 1191 if (!rwqe->w_freeing_wqe) { 1192 atomic_dec_32( 1193 &state->rc_srq_rwqe_list.dl_bufs_outstanding); 1194 DPRINT(6, "ibd_rc_srq_freemsg_cb: wqe being freed"); 1195 rwqe->rwqe_im_mblk = NULL; 1196 ibd_rc_srq_free_rwqe(state, rwqe); 1197 } 1198 return; 1199 } 1200 1201 atomic_dec_32(&state->rc_srq_rwqe_list.dl_bufs_outstanding); 1202 1203 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size); 1204 ASSERT(!rwqe->w_freeing_wqe); 1205 1206 /* 1207 * Upper layer has released held mblk, so we have 1208 * no more use for keeping the old pointer in 1209 * our rwqe. 1210 */ 1211 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr, 1212 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb); 1213 if (rwqe->rwqe_im_mblk == NULL) { 1214 DPRINT(40, "ibd_rc_srq_freemsg_cb: desballoc failed"); 1215 ibd_rc_srq_free_rwqe(state, rwqe); 1216 return; 1217 } 1218 1219 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1220 ibd_print_warn(state, "ibd_rc_srq_freemsg_cb: ibd_rc_post_srq" 1221 " failed"); 1222 ibd_rc_srq_free_rwqe(state, rwqe); 1223 return; 1224 } 1225 } 1226 1227 /* 1228 * Post a rwqe to the hardware and add it to the Rx list. 1229 */ 1230 static int 1231 ibd_rc_post_srq(ibd_state_t *state, ibd_rwqe_t *rwqe) 1232 { 1233 /* 1234 * Here we should add dl_cnt before post recv, because 1235 * we would have to make sure dl_cnt is updated before 1236 * the corresponding ibd_rc_process_rx() is called. 1237 */ 1238 ASSERT(state->rc_srq_rwqe_list.dl_cnt < state->rc_srq_size); 1239 atomic_inc_32(&state->rc_srq_rwqe_list.dl_cnt); 1240 if (ibt_post_srq(state->rc_srq_hdl, &rwqe->w_rwr, 1, NULL) != 1241 IBT_SUCCESS) { 1242 atomic_dec_32(&state->rc_srq_rwqe_list.dl_cnt); 1243 DPRINT(40, "ibd_rc_post_srq : ibt_post_srq() failed"); 1244 return (DDI_FAILURE); 1245 } 1246 1247 return (DDI_SUCCESS); 1248 } 1249 1250 /* 1251 * Post a rwqe to the hardware and add it to the Rx list. 1252 */ 1253 static int 1254 ibd_rc_post_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe) 1255 { 1256 /* 1257 * Here we should add dl_cnt before post recv, because we would 1258 * have to make sure dl_cnt has already updated before 1259 * corresponding ibd_rc_process_rx() is called. 1260 */ 1261 atomic_inc_32(&chan->rx_wqe_list.dl_cnt); 1262 if (ibt_post_recv(chan->chan_hdl, &rwqe->w_rwr, 1, NULL) != 1263 IBT_SUCCESS) { 1264 atomic_dec_32(&chan->rx_wqe_list.dl_cnt); 1265 DPRINT(40, "ibd_rc_post_rwqe : failed in ibt_post_recv()"); 1266 return (DDI_FAILURE); 1267 } 1268 return (DDI_SUCCESS); 1269 } 1270 1271 static int 1272 ibd_rc_alloc_rx_copybufs(ibd_rc_chan_t *chan) 1273 { 1274 ibd_state_t *state = chan->state; 1275 ibt_mr_attr_t mem_attr; 1276 uint_t rc_rx_bufs_sz; 1277 1278 /* 1279 * Allocate one big chunk for all regular rx copy bufs 1280 */ 1281 rc_rx_bufs_sz = (state->rc_mtu + IPOIB_GRH_SIZE) * chan->rcq_size; 1282 1283 chan->rx_bufs = kmem_zalloc(rc_rx_bufs_sz, KM_SLEEP); 1284 1285 chan->rx_rwqes = kmem_zalloc(chan->rcq_size * 1286 sizeof (ibd_rwqe_t), KM_SLEEP); 1287 1288 /* 1289 * Do one memory registration on the entire rxbuf area 1290 */ 1291 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->rx_bufs; 1292 mem_attr.mr_len = rc_rx_bufs_sz; 1293 mem_attr.mr_as = NULL; 1294 mem_attr.mr_flags = IBT_MR_SLEEP | IBT_MR_ENABLE_LOCAL_WRITE; 1295 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1296 &chan->rx_mr_hdl, &chan->rx_mr_desc) != IBT_SUCCESS) { 1297 DPRINT(40, "ibd_rc_alloc_srq_copybufs: ibt_register_mr failed"); 1298 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t)); 1299 kmem_free(chan->rx_bufs, rc_rx_bufs_sz); 1300 chan->rx_bufs = NULL; 1301 chan->rx_rwqes = NULL; 1302 return (DDI_FAILURE); 1303 } 1304 1305 return (DDI_SUCCESS); 1306 } 1307 1308 static void 1309 ibd_rc_free_rx_copybufs(ibd_rc_chan_t *chan) 1310 { 1311 ibd_state_t *state = chan->state; 1312 uint_t rc_rx_buf_sz; 1313 1314 ASSERT(!state->rc_enable_srq); 1315 ASSERT(chan->rx_rwqes != NULL); 1316 ASSERT(chan->rx_bufs != NULL); 1317 1318 /* 1319 * Don't change the value of state->rc_mtu at the period from call 1320 * ibd_rc_alloc_rx_copybufs() to call ibd_rc_free_rx_copybufs(). 1321 */ 1322 rc_rx_buf_sz = state->rc_mtu + IPOIB_GRH_SIZE; 1323 1324 /* 1325 * Unregister rxbuf mr 1326 */ 1327 if (ibt_deregister_mr(state->id_hca_hdl, 1328 chan->rx_mr_hdl) != IBT_SUCCESS) { 1329 DPRINT(40, "ibd_rc_free_rx_copybufs: ibt_deregister_mr failed"); 1330 } 1331 chan->rx_mr_hdl = NULL; 1332 1333 /* 1334 * Free rxbuf memory 1335 */ 1336 kmem_free(chan->rx_rwqes, chan->rcq_size * sizeof (ibd_rwqe_t)); 1337 chan->rx_rwqes = NULL; 1338 1339 kmem_free(chan->rx_bufs, chan->rcq_size * rc_rx_buf_sz); 1340 chan->rx_bufs = NULL; 1341 } 1342 1343 /* 1344 * Post a certain number of receive buffers and WRs on a RC channel. 1345 */ 1346 static int 1347 ibd_rc_init_rxlist(ibd_rc_chan_t *chan) 1348 { 1349 ibd_state_t *state = chan->state; 1350 ibd_rwqe_t *rwqe; 1351 ibt_lkey_t lkey; 1352 int i; 1353 uint_t len; 1354 uint8_t *bufaddr; 1355 1356 ASSERT(!state->rc_enable_srq); 1357 if (ibd_rc_alloc_rx_copybufs(chan) != DDI_SUCCESS) 1358 return (DDI_FAILURE); 1359 1360 /* 1361 * Allocate and setup the rwqe list 1362 */ 1363 lkey = chan->rx_mr_desc.md_lkey; 1364 rwqe = chan->rx_rwqes; 1365 bufaddr = chan->rx_bufs; 1366 len = state->rc_mtu + IPOIB_GRH_SIZE; 1367 for (i = 0; i < chan->rcq_size; i++, rwqe++, bufaddr += len) { 1368 rwqe->w_state = state; 1369 rwqe->w_chan = chan; 1370 rwqe->w_freeing_wqe = B_FALSE; 1371 rwqe->w_freemsg_cb.free_func = ibd_rc_freemsg_cb; 1372 rwqe->w_freemsg_cb.free_arg = (char *)rwqe; 1373 rwqe->rwqe_copybuf.ic_bufaddr = bufaddr; 1374 1375 if ((rwqe->rwqe_im_mblk = desballoc(bufaddr, len, 0, 1376 &rwqe->w_freemsg_cb)) == NULL) { 1377 DPRINT(40, "ibd_rc_init_srq_list: desballoc() failed"); 1378 rwqe->rwqe_copybuf.ic_bufaddr = NULL; 1379 ibd_rc_fini_rxlist(chan); 1380 return (DDI_FAILURE); 1381 } 1382 1383 rwqe->rwqe_copybuf.ic_sgl.ds_key = lkey; 1384 rwqe->rwqe_copybuf.ic_sgl.ds_va = 1385 (ib_vaddr_t)(uintptr_t)(bufaddr + IPOIB_GRH_SIZE); 1386 rwqe->rwqe_copybuf.ic_sgl.ds_len = state->rc_mtu; 1387 rwqe->w_rwr.wr_id = (ibt_wrid_t)(uintptr_t)rwqe; 1388 rwqe->w_rwr.wr_nds = 1; 1389 rwqe->w_rwr.wr_sgl = &rwqe->rwqe_copybuf.ic_sgl; 1390 (void) ibd_rc_post_rwqe(chan, rwqe); 1391 } 1392 1393 return (DDI_SUCCESS); 1394 } 1395 1396 /* 1397 * Free the statically allocated Rx buffer list for SRQ. 1398 */ 1399 static void 1400 ibd_rc_fini_rxlist(ibd_rc_chan_t *chan) 1401 { 1402 ibd_rwqe_t *rwqe; 1403 int i; 1404 1405 if (chan->rx_bufs == NULL) { 1406 DPRINT(40, "ibd_rc_fini_rxlist: empty chan->rx_bufs, quit"); 1407 return; 1408 } 1409 1410 /* bufs_outstanding must be 0 */ 1411 ASSERT((chan->rx_wqe_list.dl_head == NULL) || 1412 (chan->rx_wqe_list.dl_bufs_outstanding == 0)); 1413 1414 mutex_enter(&chan->rx_wqe_list.dl_mutex); 1415 rwqe = chan->rx_rwqes; 1416 for (i = 0; i < chan->rcq_size; i++, rwqe++) { 1417 if (rwqe->rwqe_im_mblk != NULL) { 1418 rwqe->w_freeing_wqe = B_TRUE; 1419 freemsg(rwqe->rwqe_im_mblk); 1420 } 1421 } 1422 mutex_exit(&chan->rx_wqe_list.dl_mutex); 1423 1424 ibd_rc_free_rx_copybufs(chan); 1425 } 1426 1427 /* 1428 * Free an allocated recv wqe. 1429 */ 1430 static void 1431 ibd_rc_free_rwqe(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe) 1432 { 1433 /* 1434 * desballoc() failed (no memory) or the posting of rwqe failed. 1435 * 1436 * This rwqe is placed on a free list so that it 1437 * can be reinstated in future. 1438 * 1439 * NOTE: no code currently exists to reinstate 1440 * these "lost" rwqes. 1441 */ 1442 mutex_enter(&chan->rx_free_list.dl_mutex); 1443 chan->rx_free_list.dl_cnt++; 1444 rwqe->rwqe_next = chan->rx_free_list.dl_head; 1445 chan->rx_free_list.dl_head = RWQE_TO_WQE(rwqe); 1446 mutex_exit(&chan->rx_free_list.dl_mutex); 1447 } 1448 1449 /* 1450 * Processing to be done after receipt of a packet; hand off to GLD 1451 * in the format expected by GLD. 1452 */ 1453 static void 1454 ibd_rc_process_rx(ibd_rc_chan_t *chan, ibd_rwqe_t *rwqe, ibt_wc_t *wc) 1455 { 1456 ibd_state_t *state = chan->state; 1457 ib_header_info_t *phdr; 1458 ipoib_hdr_t *ipibp; 1459 mblk_t *mp; 1460 mblk_t *mpc; 1461 int rxcnt; 1462 ip6_t *ip6h; 1463 int len; 1464 1465 /* 1466 * Track number handed to upper layer, and number still 1467 * available to receive packets. 1468 */ 1469 if (state->rc_enable_srq) { 1470 rxcnt = atomic_dec_32_nv(&state->rc_srq_rwqe_list.dl_cnt); 1471 } else { 1472 rxcnt = atomic_dec_32_nv(&chan->rx_wqe_list.dl_cnt); 1473 } 1474 1475 /* 1476 * It can not be a IBA multicast packet. 1477 */ 1478 ASSERT(!wc->wc_flags & IBT_WC_GRH_PRESENT); 1479 1480 /* For the connection reaper routine ibd_rc_conn_timeout_call() */ 1481 chan->is_used = B_TRUE; 1482 1483 #ifdef DEBUG 1484 if (rxcnt < state->id_rc_rx_rwqe_thresh) { 1485 state->rc_rwqe_short++; 1486 } 1487 #endif 1488 1489 /* 1490 * Possibly replenish the Rx pool if needed. 1491 */ 1492 if ((rxcnt >= state->id_rc_rx_rwqe_thresh) && 1493 (wc->wc_bytes_xfer > state->id_rc_rx_copy_thresh)) { 1494 atomic_add_64(&state->rc_rcv_trans_byte, wc->wc_bytes_xfer); 1495 atomic_inc_64(&state->rc_rcv_trans_pkt); 1496 1497 /* 1498 * Record how many rwqe has been occupied by upper 1499 * network layer 1500 */ 1501 if (state->rc_enable_srq) { 1502 atomic_inc_32( 1503 &state->rc_srq_rwqe_list.dl_bufs_outstanding); 1504 } else { 1505 atomic_inc_32(&chan->rx_wqe_list.dl_bufs_outstanding); 1506 } 1507 mp = rwqe->rwqe_im_mblk; 1508 } else { 1509 atomic_add_64(&state->rc_rcv_copy_byte, wc->wc_bytes_xfer); 1510 atomic_inc_64(&state->rc_rcv_copy_pkt); 1511 1512 if ((mp = allocb(wc->wc_bytes_xfer + IPOIB_GRH_SIZE, 1513 BPRI_HI)) == NULL) { /* no memory */ 1514 DPRINT(40, "ibd_rc_process_rx: allocb() failed"); 1515 state->rc_rcv_alloc_fail++; 1516 if (state->rc_enable_srq) { 1517 if (ibd_rc_post_srq(state, rwqe) == 1518 DDI_FAILURE) { 1519 ibd_rc_srq_free_rwqe(state, rwqe); 1520 } 1521 } else { 1522 if (ibd_rc_post_rwqe(chan, rwqe) == 1523 DDI_FAILURE) { 1524 ibd_rc_free_rwqe(chan, rwqe); 1525 } 1526 } 1527 return; 1528 } 1529 1530 bcopy(rwqe->rwqe_im_mblk->b_rptr + IPOIB_GRH_SIZE, 1531 mp->b_wptr + IPOIB_GRH_SIZE, wc->wc_bytes_xfer); 1532 1533 if (state->rc_enable_srq) { 1534 if (ibd_rc_post_srq(state, rwqe) == DDI_FAILURE) { 1535 ibd_rc_srq_free_rwqe(state, rwqe); 1536 } 1537 } else { 1538 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) { 1539 ibd_rc_free_rwqe(chan, rwqe); 1540 } 1541 } 1542 } 1543 1544 ipibp = (ipoib_hdr_t *)((uchar_t *)mp->b_rptr + IPOIB_GRH_SIZE); 1545 if (ntohs(ipibp->ipoib_type) == ETHERTYPE_IPV6) { 1546 ip6h = (ip6_t *)((uchar_t *)ipibp + sizeof (ipoib_hdr_t)); 1547 len = ntohs(ip6h->ip6_plen); 1548 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) { 1549 /* LINTED: E_CONSTANT_CONDITION */ 1550 IBD_PAD_NSNA(ip6h, len, IBD_RECV); 1551 } 1552 } 1553 1554 phdr = (ib_header_info_t *)mp->b_rptr; 1555 phdr->ib_grh.ipoib_vertcflow = 0; 1556 ovbcopy(&state->id_macaddr, &phdr->ib_dst, 1557 sizeof (ipoib_mac_t)); 1558 mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer+ IPOIB_GRH_SIZE; 1559 1560 /* 1561 * Can RC mode in IB guarantee its checksum correctness? 1562 * 1563 * mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM | HCK_FULLCKSUM_OK); 1564 */ 1565 1566 /* 1567 * Make sure this is NULL or we're in trouble. 1568 */ 1569 if (mp->b_next != NULL) { 1570 ibd_print_warn(state, 1571 "ibd_rc_process_rx: got duplicate mp from rcq?"); 1572 mp->b_next = NULL; 1573 } 1574 1575 /* 1576 * Add this mp to the list of processed mp's to send to 1577 * the nw layer 1578 */ 1579 if (state->rc_enable_srq) { 1580 mutex_enter(&state->rc_rx_lock); 1581 if (state->rc_rx_mp) { 1582 ASSERT(state->rc_rx_mp_tail != NULL); 1583 state->rc_rx_mp_tail->b_next = mp; 1584 } else { 1585 ASSERT(state->rc_rx_mp_tail == NULL); 1586 state->rc_rx_mp = mp; 1587 } 1588 1589 state->rc_rx_mp_tail = mp; 1590 state->rc_rx_mp_len++; 1591 1592 if (state->rc_rx_mp_len >= IBD_MAX_RX_MP_LEN) { 1593 mpc = state->rc_rx_mp; 1594 1595 state->rc_rx_mp = NULL; 1596 state->rc_rx_mp_tail = NULL; 1597 state->rc_rx_mp_len = 0; 1598 mutex_exit(&state->rc_rx_lock); 1599 mac_rx(state->id_mh, NULL, mpc); 1600 } else { 1601 mutex_exit(&state->rc_rx_lock); 1602 } 1603 } else { 1604 mutex_enter(&chan->rx_lock); 1605 if (chan->rx_mp) { 1606 ASSERT(chan->rx_mp_tail != NULL); 1607 chan->rx_mp_tail->b_next = mp; 1608 } else { 1609 ASSERT(chan->rx_mp_tail == NULL); 1610 chan->rx_mp = mp; 1611 } 1612 1613 chan->rx_mp_tail = mp; 1614 chan->rx_mp_len++; 1615 1616 if (chan->rx_mp_len >= IBD_MAX_RX_MP_LEN) { 1617 mpc = chan->rx_mp; 1618 1619 chan->rx_mp = NULL; 1620 chan->rx_mp_tail = NULL; 1621 chan->rx_mp_len = 0; 1622 mutex_exit(&chan->rx_lock); 1623 mac_rx(state->id_mh, NULL, mpc); 1624 } else { 1625 mutex_exit(&chan->rx_lock); 1626 } 1627 } 1628 } 1629 1630 /* 1631 * Callback code invoked from STREAMs when the recv data buffer is free 1632 * for recycling. 1633 */ 1634 static void 1635 ibd_rc_freemsg_cb(char *arg) 1636 { 1637 ibd_rwqe_t *rwqe = (ibd_rwqe_t *)arg; 1638 ibd_rc_chan_t *chan = rwqe->w_chan; 1639 ibd_state_t *state = rwqe->w_state; 1640 1641 /* 1642 * If the wqe is being destructed, do not attempt recycling. 1643 */ 1644 if (rwqe->w_freeing_wqe == B_TRUE) { 1645 return; 1646 } 1647 1648 ASSERT(!state->rc_enable_srq); 1649 ASSERT(chan->rx_wqe_list.dl_cnt < chan->rcq_size); 1650 1651 rwqe->rwqe_im_mblk = desballoc(rwqe->rwqe_copybuf.ic_bufaddr, 1652 state->rc_mtu + IPOIB_GRH_SIZE, 0, &rwqe->w_freemsg_cb); 1653 if (rwqe->rwqe_im_mblk == NULL) { 1654 DPRINT(40, "ibd_rc_freemsg_cb: desballoc() failed"); 1655 ibd_rc_free_rwqe(chan, rwqe); 1656 return; 1657 } 1658 1659 /* 1660 * Post back to h/w. We could actually have more than 1661 * id_num_rwqe WQEs on the list if there were multiple 1662 * ibd_freemsg_cb() calls outstanding (since the lock is 1663 * not held the entire time). This will start getting 1664 * corrected over subsequent ibd_freemsg_cb() calls. 1665 */ 1666 if (ibd_rc_post_rwqe(chan, rwqe) == DDI_FAILURE) { 1667 ibd_rc_free_rwqe(chan, rwqe); 1668 return; 1669 } 1670 atomic_dec_32(&chan->rx_wqe_list.dl_bufs_outstanding); 1671 } 1672 1673 /* 1674 * Common code for interrupt handling as well as for polling 1675 * for all completed wqe's while detaching. 1676 */ 1677 static void 1678 ibd_rc_poll_rcq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl) 1679 { 1680 ibd_wqe_t *wqe; 1681 ibt_wc_t *wc, *wcs; 1682 uint_t numwcs, real_numwcs; 1683 int i; 1684 1685 wcs = chan->rx_wc; 1686 numwcs = IBD_RC_MAX_CQ_WC; 1687 1688 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) { 1689 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) { 1690 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id; 1691 if (wc->wc_status != IBT_WC_SUCCESS) { 1692 chan->state->rc_rcq_err++; 1693 /* 1694 * Channel being torn down. 1695 */ 1696 DPRINT(40, "ibd_rc_poll_rcq: wc_status(%d) != " 1697 "SUCC, chan=%p", wc->wc_status, chan); 1698 if (wc->wc_status == IBT_WC_WR_FLUSHED_ERR) { 1699 /* 1700 * Do not invoke Rx handler because 1701 * it might add buffers to the Rx pool 1702 * when we are trying to deinitialize. 1703 */ 1704 continue; 1705 } 1706 } 1707 ibd_rc_process_rx(chan, WQE_TO_RWQE(wqe), wc); 1708 } 1709 } 1710 } 1711 1712 /* Receive CQ handler */ 1713 /* ARGSUSED */ 1714 static void 1715 ibd_rc_rcq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 1716 { 1717 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 1718 ibd_state_t *state = chan->state; 1719 1720 atomic_inc_32(&chan->rcq_invoking); 1721 ASSERT(chan->chan_state == IBD_RC_STATE_PAS_ESTAB); 1722 1723 /* 1724 * Poll for completed entries; the CQ will not interrupt any 1725 * more for incoming (or transmitted) packets. 1726 */ 1727 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 1728 1729 /* 1730 * Now enable CQ notifications; all packets that arrive now 1731 * (or complete transmission) will cause new interrupts. 1732 */ 1733 if (ibt_enable_cq_notify(chan->rcq_hdl, IBT_NEXT_COMPLETION) != 1734 IBT_SUCCESS) { 1735 /* 1736 * We do not expect a failure here. 1737 */ 1738 DPRINT(40, "ibd_rc_rcq_handler: ibt_enable_cq_notify() failed"); 1739 } 1740 1741 /* 1742 * Repoll to catch all packets that might have arrived after 1743 * we finished the first poll loop and before interrupts got 1744 * armed. 1745 */ 1746 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 1747 1748 if (state->rc_enable_srq) { 1749 mutex_enter(&state->rc_rx_lock); 1750 1751 if (state->rc_rx_mp != NULL) { 1752 mblk_t *mpc; 1753 mpc = state->rc_rx_mp; 1754 1755 state->rc_rx_mp = NULL; 1756 state->rc_rx_mp_tail = NULL; 1757 state->rc_rx_mp_len = 0; 1758 1759 mutex_exit(&state->rc_rx_lock); 1760 mac_rx(state->id_mh, NULL, mpc); 1761 } else { 1762 mutex_exit(&state->rc_rx_lock); 1763 } 1764 } else { 1765 mutex_enter(&chan->rx_lock); 1766 1767 if (chan->rx_mp != NULL) { 1768 mblk_t *mpc; 1769 mpc = chan->rx_mp; 1770 1771 chan->rx_mp = NULL; 1772 chan->rx_mp_tail = NULL; 1773 chan->rx_mp_len = 0; 1774 1775 mutex_exit(&chan->rx_lock); 1776 mac_rx(state->id_mh, NULL, mpc); 1777 } else { 1778 mutex_exit(&chan->rx_lock); 1779 } 1780 } 1781 atomic_dec_32(&chan->rcq_invoking); 1782 } 1783 1784 /* 1785 * Allocate the statically allocated Tx buffer list. 1786 */ 1787 int 1788 ibd_rc_init_tx_largebuf_list(ibd_state_t *state) 1789 { 1790 ibd_rc_tx_largebuf_t *lbufp; 1791 ibd_rc_tx_largebuf_t *tail; 1792 uint8_t *memp; 1793 ibt_mr_attr_t mem_attr; 1794 uint32_t num_swqe; 1795 size_t mem_size; 1796 int i; 1797 1798 num_swqe = state->id_rc_num_swqe - 1; 1799 1800 /* 1801 * Allocate one big chunk for all Tx large copy bufs 1802 */ 1803 /* Don't transfer IPOIB_GRH_SIZE bytes (40 bytes) */ 1804 mem_size = num_swqe * state->rc_mtu; 1805 state->rc_tx_mr_bufs = kmem_zalloc(mem_size, KM_SLEEP); 1806 1807 mem_attr.mr_len = mem_size; 1808 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)state->rc_tx_mr_bufs; 1809 mem_attr.mr_as = NULL; 1810 mem_attr.mr_flags = IBT_MR_SLEEP; 1811 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1812 &state->rc_tx_mr_hdl, &state->rc_tx_mr_desc) != IBT_SUCCESS) { 1813 DPRINT(40, "ibd_rc_init_tx_largebuf_list: ibt_register_mr " 1814 "failed"); 1815 kmem_free(state->rc_tx_mr_bufs, mem_size); 1816 state->rc_tx_mr_bufs = NULL; 1817 return (DDI_FAILURE); 1818 } 1819 1820 state->rc_tx_largebuf_desc_base = kmem_zalloc(num_swqe * 1821 sizeof (ibd_rc_tx_largebuf_t), KM_SLEEP); 1822 1823 /* 1824 * Set up the buf chain 1825 */ 1826 memp = state->rc_tx_mr_bufs; 1827 mutex_enter(&state->rc_tx_large_bufs_lock); 1828 lbufp = state->rc_tx_largebuf_desc_base; 1829 for (i = 0; i < num_swqe; i++) { 1830 lbufp->lb_buf = memp; 1831 lbufp->lb_next = lbufp + 1; 1832 1833 tail = lbufp; 1834 1835 memp += state->rc_mtu; 1836 lbufp++; 1837 } 1838 tail->lb_next = NULL; 1839 1840 /* 1841 * Set up the buffer information in ibd state 1842 */ 1843 state->rc_tx_largebuf_free_head = state->rc_tx_largebuf_desc_base; 1844 state->rc_tx_largebuf_nfree = num_swqe; 1845 mutex_exit(&state->rc_tx_large_bufs_lock); 1846 return (DDI_SUCCESS); 1847 } 1848 1849 void 1850 ibd_rc_fini_tx_largebuf_list(ibd_state_t *state) 1851 { 1852 uint32_t num_swqe; 1853 1854 num_swqe = state->id_rc_num_swqe - 1; 1855 1856 if (ibt_deregister_mr(state->id_hca_hdl, 1857 state->rc_tx_mr_hdl) != IBT_SUCCESS) { 1858 DPRINT(40, "ibd_rc_fini_tx_largebuf_list: ibt_deregister_mr() " 1859 "failed"); 1860 } 1861 state->rc_tx_mr_hdl = NULL; 1862 1863 kmem_free(state->rc_tx_mr_bufs, num_swqe * state->rc_mtu); 1864 state->rc_tx_mr_bufs = NULL; 1865 1866 kmem_free(state->rc_tx_largebuf_desc_base, 1867 num_swqe * sizeof (ibd_rc_tx_largebuf_t)); 1868 state->rc_tx_largebuf_desc_base = NULL; 1869 } 1870 1871 static int 1872 ibd_rc_alloc_tx_copybufs(ibd_rc_chan_t *chan) 1873 { 1874 ibt_mr_attr_t mem_attr; 1875 ibd_state_t *state; 1876 1877 state = chan->state; 1878 ASSERT(state != NULL); 1879 1880 /* 1881 * Allocate one big chunk for all regular tx copy bufs 1882 */ 1883 mem_attr.mr_len = chan->scq_size * state->id_rc_tx_copy_thresh; 1884 1885 chan->tx_mr_bufs = kmem_zalloc(mem_attr.mr_len, KM_SLEEP); 1886 1887 /* 1888 * Do one memory registration on the entire txbuf area 1889 */ 1890 mem_attr.mr_vaddr = (uint64_t)(uintptr_t)chan->tx_mr_bufs; 1891 mem_attr.mr_as = NULL; 1892 mem_attr.mr_flags = IBT_MR_SLEEP; 1893 if (ibt_register_mr(state->id_hca_hdl, state->id_pd_hdl, &mem_attr, 1894 &chan->tx_mr_hdl, &chan->tx_mr_desc) != IBT_SUCCESS) { 1895 DPRINT(40, "ibd_rc_alloc_tx_copybufs: ibt_register_mr failed"); 1896 ASSERT(mem_attr.mr_len == 1897 chan->scq_size * state->id_rc_tx_copy_thresh); 1898 kmem_free(chan->tx_mr_bufs, mem_attr.mr_len); 1899 chan->tx_mr_bufs = NULL; 1900 return (DDI_FAILURE); 1901 } 1902 1903 return (DDI_SUCCESS); 1904 } 1905 1906 /* 1907 * Allocate the statically allocated Tx buffer list. 1908 */ 1909 static int 1910 ibd_rc_init_txlist(ibd_rc_chan_t *chan) 1911 { 1912 ibd_swqe_t *swqe; 1913 int i; 1914 ibt_lkey_t lkey; 1915 ibd_state_t *state = chan->state; 1916 1917 if (ibd_rc_alloc_tx_copybufs(chan) != DDI_SUCCESS) 1918 return (DDI_FAILURE); 1919 1920 /* 1921 * Allocate and setup the swqe list 1922 */ 1923 lkey = chan->tx_mr_desc.md_lkey; 1924 chan->tx_wqes = kmem_zalloc(chan->scq_size * 1925 sizeof (ibd_swqe_t), KM_SLEEP); 1926 swqe = chan->tx_wqes; 1927 for (i = 0; i < chan->scq_size; i++, swqe++) { 1928 swqe->swqe_next = NULL; 1929 swqe->swqe_im_mblk = NULL; 1930 1931 swqe->swqe_copybuf.ic_sgl.ds_key = lkey; 1932 swqe->swqe_copybuf.ic_sgl.ds_len = 0; /* set in send */ 1933 1934 swqe->w_swr.wr_id = (ibt_wrid_t)(uintptr_t)swqe; 1935 swqe->w_swr.wr_flags = IBT_WR_SEND_SIGNAL; 1936 swqe->swqe_copybuf.ic_sgl.ds_va = (ib_vaddr_t)(uintptr_t) 1937 (chan->tx_mr_bufs + i * state->id_rc_tx_copy_thresh); 1938 swqe->w_swr.wr_trans = IBT_RC_SRV; 1939 1940 /* Add to list */ 1941 mutex_enter(&chan->tx_wqe_list.dl_mutex); 1942 chan->tx_wqe_list.dl_cnt++; 1943 swqe->swqe_next = chan->tx_wqe_list.dl_head; 1944 chan->tx_wqe_list.dl_head = SWQE_TO_WQE(swqe); 1945 mutex_exit(&chan->tx_wqe_list.dl_mutex); 1946 } 1947 1948 return (DDI_SUCCESS); 1949 } 1950 1951 /* 1952 * Free the statically allocated Tx buffer list. 1953 */ 1954 static void 1955 ibd_rc_fini_txlist(ibd_rc_chan_t *chan) 1956 { 1957 ibd_state_t *state = chan->state; 1958 if (chan->tx_mr_hdl != NULL) { 1959 if (ibt_deregister_mr(chan->state->id_hca_hdl, 1960 chan->tx_mr_hdl) != IBT_SUCCESS) { 1961 DPRINT(40, "ibd_rc_fini_txlist: ibt_deregister_mr " 1962 "failed"); 1963 } 1964 chan->tx_mr_hdl = NULL; 1965 } 1966 1967 if (chan->tx_mr_bufs != NULL) { 1968 kmem_free(chan->tx_mr_bufs, chan->scq_size * 1969 state->id_rc_tx_copy_thresh); 1970 chan->tx_mr_bufs = NULL; 1971 } 1972 1973 if (chan->tx_wqes != NULL) { 1974 kmem_free(chan->tx_wqes, chan->scq_size * 1975 sizeof (ibd_swqe_t)); 1976 chan->tx_wqes = NULL; 1977 } 1978 } 1979 1980 /* 1981 * Acquire send wqe from free list. 1982 * Returns error number and send wqe pointer. 1983 */ 1984 ibd_swqe_t * 1985 ibd_rc_acquire_swqes(ibd_rc_chan_t *chan) 1986 { 1987 ibd_swqe_t *wqe; 1988 1989 mutex_enter(&chan->tx_rel_list.dl_mutex); 1990 if (chan->tx_rel_list.dl_head != NULL) { 1991 /* transfer id_tx_rel_list to id_tx_list */ 1992 chan->tx_wqe_list.dl_head = 1993 chan->tx_rel_list.dl_head; 1994 chan->tx_wqe_list.dl_cnt = 1995 chan->tx_rel_list.dl_cnt; 1996 chan->tx_wqe_list.dl_pending_sends = B_FALSE; 1997 1998 /* clear id_tx_rel_list */ 1999 chan->tx_rel_list.dl_head = NULL; 2000 chan->tx_rel_list.dl_cnt = 0; 2001 mutex_exit(&chan->tx_rel_list.dl_mutex); 2002 2003 wqe = WQE_TO_SWQE(chan->tx_wqe_list.dl_head); 2004 chan->tx_wqe_list.dl_cnt -= 1; 2005 chan->tx_wqe_list.dl_head = wqe->swqe_next; 2006 } else { /* no free swqe */ 2007 mutex_exit(&chan->tx_rel_list.dl_mutex); 2008 chan->tx_wqe_list.dl_pending_sends = B_TRUE; 2009 wqe = NULL; 2010 } 2011 return (wqe); 2012 } 2013 2014 /* 2015 * Release send wqe back into free list. 2016 */ 2017 static void 2018 ibd_rc_release_swqe(ibd_rc_chan_t *chan, ibd_swqe_t *swqe) 2019 { 2020 /* 2021 * Add back on Tx list for reuse. 2022 */ 2023 swqe->swqe_next = NULL; 2024 mutex_enter(&chan->tx_rel_list.dl_mutex); 2025 chan->tx_rel_list.dl_pending_sends = B_FALSE; 2026 swqe->swqe_next = chan->tx_rel_list.dl_head; 2027 chan->tx_rel_list.dl_head = SWQE_TO_WQE(swqe); 2028 chan->tx_rel_list.dl_cnt++; 2029 mutex_exit(&chan->tx_rel_list.dl_mutex); 2030 } 2031 2032 void 2033 ibd_rc_post_send(ibd_rc_chan_t *chan, ibd_swqe_t *node) 2034 { 2035 uint_t i; 2036 uint_t num_posted; 2037 uint_t n_wrs; 2038 ibt_status_t ibt_status; 2039 ibt_send_wr_t wrs[IBD_MAX_TX_POST_MULTIPLE]; 2040 ibd_swqe_t *tx_head, *elem; 2041 ibd_swqe_t *nodes[IBD_MAX_TX_POST_MULTIPLE]; 2042 2043 /* post the one request, then check for more */ 2044 ibt_status = ibt_post_send(chan->chan_hdl, 2045 &node->w_swr, 1, NULL); 2046 if (ibt_status != IBT_SUCCESS) { 2047 ibd_print_warn(chan->state, "ibd_post_send: " 2048 "posting one wr failed: ret=%d", ibt_status); 2049 ibd_rc_tx_cleanup(node); 2050 } 2051 2052 tx_head = NULL; 2053 for (;;) { 2054 if (tx_head == NULL) { 2055 mutex_enter(&chan->tx_post_lock); 2056 tx_head = chan->tx_head; 2057 if (tx_head == NULL) { 2058 chan->tx_busy = 0; 2059 mutex_exit(&chan->tx_post_lock); 2060 return; 2061 } 2062 chan->tx_head = NULL; 2063 mutex_exit(&chan->tx_post_lock); 2064 } 2065 2066 /* 2067 * Collect pending requests, IBD_MAX_TX_POST_MULTIPLE wrs 2068 * at a time if possible, and keep posting them. 2069 */ 2070 for (n_wrs = 0, elem = tx_head; 2071 (elem) && (n_wrs < IBD_MAX_TX_POST_MULTIPLE); 2072 elem = WQE_TO_SWQE(elem->swqe_next), n_wrs++) { 2073 nodes[n_wrs] = elem; 2074 wrs[n_wrs] = elem->w_swr; 2075 } 2076 tx_head = elem; 2077 2078 ASSERT(n_wrs != 0); 2079 2080 /* 2081 * If posting fails for some reason, we'll never receive 2082 * completion intimation, so we'll need to cleanup. But 2083 * we need to make sure we don't clean up nodes whose 2084 * wrs have been successfully posted. We assume that the 2085 * hca driver returns on the first failure to post and 2086 * therefore the first 'num_posted' entries don't need 2087 * cleanup here. 2088 */ 2089 num_posted = 0; 2090 ibt_status = ibt_post_send(chan->chan_hdl, 2091 wrs, n_wrs, &num_posted); 2092 if (ibt_status != IBT_SUCCESS) { 2093 ibd_print_warn(chan->state, "ibd_post_send: " 2094 "posting multiple wrs failed: " 2095 "requested=%d, done=%d, ret=%d", 2096 n_wrs, num_posted, ibt_status); 2097 2098 for (i = num_posted; i < n_wrs; i++) 2099 ibd_rc_tx_cleanup(nodes[i]); 2100 } 2101 } 2102 } 2103 2104 /* 2105 * Common code that deals with clean ups after a successful or 2106 * erroneous transmission attempt. 2107 */ 2108 void 2109 ibd_rc_tx_cleanup(ibd_swqe_t *swqe) 2110 { 2111 ibd_ace_t *ace = swqe->w_ahandle; 2112 ibd_state_t *state; 2113 2114 ASSERT(ace != NULL); 2115 ASSERT(ace->ac_chan != NULL); 2116 2117 state = ace->ac_chan->state; 2118 2119 /* 2120 * If this was a dynamic registration in ibd_send(), 2121 * deregister now. 2122 */ 2123 if (swqe->swqe_im_mblk != NULL) { 2124 ASSERT(swqe->w_buftype == IBD_WQE_MAPPED); 2125 if (swqe->w_buftype == IBD_WQE_MAPPED) { 2126 ibd_unmap_mem(state, swqe); 2127 } 2128 freemsg(swqe->swqe_im_mblk); 2129 swqe->swqe_im_mblk = NULL; 2130 } else { 2131 ASSERT(swqe->w_buftype != IBD_WQE_MAPPED); 2132 } 2133 2134 if (swqe->w_buftype == IBD_WQE_RC_COPYBUF) { 2135 ibd_rc_tx_largebuf_t *lbufp; 2136 2137 lbufp = swqe->w_rc_tx_largebuf; 2138 ASSERT(lbufp != NULL); 2139 2140 mutex_enter(&state->rc_tx_large_bufs_lock); 2141 lbufp->lb_next = state->rc_tx_largebuf_free_head; 2142 state->rc_tx_largebuf_free_head = lbufp; 2143 state->rc_tx_largebuf_nfree ++; 2144 mutex_exit(&state->rc_tx_large_bufs_lock); 2145 swqe->w_rc_tx_largebuf = NULL; 2146 } 2147 2148 2149 /* 2150 * Release the send wqe for reuse. 2151 */ 2152 ibd_rc_release_swqe(ace->ac_chan, swqe); 2153 2154 /* 2155 * Drop the reference count on the AH; it can be reused 2156 * now for a different destination if there are no more 2157 * posted sends that will use it. This can be eliminated 2158 * if we can always associate each Tx buffer with an AH. 2159 * The ace can be null if we are cleaning up from the 2160 * ibd_send() error path. 2161 */ 2162 ibd_dec_ref_ace(state, ace); 2163 } 2164 2165 void 2166 ibd_rc_drain_scq(ibd_rc_chan_t *chan, ibt_cq_hdl_t cq_hdl) 2167 { 2168 ibd_state_t *state = chan->state; 2169 ibd_wqe_t *wqe; 2170 ibt_wc_t *wc, *wcs; 2171 ibd_ace_t *ace; 2172 uint_t numwcs, real_numwcs; 2173 int i; 2174 boolean_t encount_error; 2175 2176 wcs = chan->tx_wc; 2177 numwcs = IBD_RC_MAX_CQ_WC; 2178 encount_error = B_FALSE; 2179 2180 while (ibt_poll_cq(cq_hdl, wcs, numwcs, &real_numwcs) == IBT_SUCCESS) { 2181 for (i = 0, wc = wcs; i < real_numwcs; i++, wc++) { 2182 wqe = (ibd_wqe_t *)(uintptr_t)wc->wc_id; 2183 if (wc->wc_status != IBT_WC_SUCCESS) { 2184 if (encount_error == B_FALSE) { 2185 /* 2186 * This RC channle is in error status, 2187 * remove it. 2188 */ 2189 encount_error = B_TRUE; 2190 mutex_enter(&state->id_ac_mutex); 2191 if ((chan->chan_state == 2192 IBD_RC_STATE_ACT_ESTAB) && 2193 (chan->state->id_link_state == 2194 LINK_STATE_UP) && 2195 ((ace = ibd_acache_find(state, 2196 &chan->ace->ac_mac, B_FALSE, 0)) 2197 != NULL) && (ace == chan->ace)) { 2198 ASSERT(ace->ac_mce == NULL); 2199 INC_REF(ace, 1); 2200 IBD_ACACHE_PULLOUT_ACTIVE( 2201 state, ace); 2202 chan->chan_state = 2203 IBD_RC_STATE_ACT_CLOSING; 2204 mutex_exit(&state->id_ac_mutex); 2205 state->rc_reset_cnt++; 2206 DPRINT(30, "ibd_rc_drain_scq: " 2207 "wc_status(%d) != SUCC, " 2208 "chan=%p, ace=%p, " 2209 "link_state=%d" 2210 "reset RC channel", 2211 wc->wc_status, chan, 2212 chan->ace, chan->state-> 2213 id_link_state); 2214 ibd_rc_signal_act_close( 2215 state, ace); 2216 } else { 2217 mutex_exit(&state->id_ac_mutex); 2218 state-> 2219 rc_act_close_simultaneous++; 2220 DPRINT(40, "ibd_rc_drain_scq: " 2221 "wc_status(%d) != SUCC, " 2222 "chan=%p, chan_state=%d," 2223 "ace=%p, link_state=%d." 2224 "other thread is closing " 2225 "it", wc->wc_status, chan, 2226 chan->chan_state, chan->ace, 2227 chan->state->id_link_state); 2228 } 2229 } 2230 } 2231 ibd_rc_tx_cleanup(WQE_TO_SWQE(wqe)); 2232 } 2233 2234 mutex_enter(&state->id_sched_lock); 2235 if (state->id_sched_needed == 0) { 2236 mutex_exit(&state->id_sched_lock); 2237 } else if (state->id_sched_needed & IBD_RSRC_RC_SWQE) { 2238 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2239 mutex_enter(&chan->tx_rel_list.dl_mutex); 2240 if ((chan->tx_rel_list.dl_cnt + 2241 chan->tx_wqe_list.dl_cnt) > IBD_RC_TX_FREE_THRESH) { 2242 state->id_sched_needed &= ~IBD_RSRC_RC_SWQE; 2243 mutex_exit(&chan->tx_rel_list.dl_mutex); 2244 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2245 mutex_exit(&state->id_sched_lock); 2246 state->rc_swqe_mac_update++; 2247 mac_tx_update(state->id_mh); 2248 } else { 2249 state->rc_scq_no_swqe++; 2250 mutex_exit(&chan->tx_rel_list.dl_mutex); 2251 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2252 mutex_exit(&state->id_sched_lock); 2253 } 2254 } else if (state->id_sched_needed & IBD_RSRC_RC_TX_LARGEBUF) { 2255 mutex_enter(&state->rc_tx_large_bufs_lock); 2256 if (state->rc_tx_largebuf_nfree > 2257 IBD_RC_TX_FREE_THRESH) { 2258 ASSERT(state->rc_tx_largebuf_free_head != NULL); 2259 state->id_sched_needed &= 2260 ~IBD_RSRC_RC_TX_LARGEBUF; 2261 mutex_exit(&state->rc_tx_large_bufs_lock); 2262 mutex_exit(&state->id_sched_lock); 2263 state->rc_xmt_buf_mac_update++; 2264 mac_tx_update(state->id_mh); 2265 } else { 2266 state->rc_scq_no_largebuf++; 2267 mutex_exit(&state->rc_tx_large_bufs_lock); 2268 mutex_exit(&state->id_sched_lock); 2269 } 2270 } else if (state->id_sched_needed & IBD_RSRC_SWQE) { 2271 mutex_enter(&state->id_tx_list.dl_mutex); 2272 mutex_enter(&state->id_tx_rel_list.dl_mutex); 2273 if ((state->id_tx_list.dl_cnt + 2274 state->id_tx_rel_list.dl_cnt) 2275 > IBD_FREE_SWQES_THRESH) { 2276 state->id_sched_needed &= ~IBD_RSRC_SWQE; 2277 state->id_sched_cnt++; 2278 mutex_exit(&state->id_tx_rel_list.dl_mutex); 2279 mutex_exit(&state->id_tx_list.dl_mutex); 2280 mutex_exit(&state->id_sched_lock); 2281 mac_tx_update(state->id_mh); 2282 } else { 2283 mutex_exit(&state->id_tx_rel_list.dl_mutex); 2284 mutex_exit(&state->id_tx_list.dl_mutex); 2285 mutex_exit(&state->id_sched_lock); 2286 } 2287 } else { 2288 mutex_exit(&state->id_sched_lock); 2289 } 2290 } 2291 } 2292 2293 /* Send CQ handler, call ibd_rx_tx_cleanup to recycle Tx buffers */ 2294 /* ARGSUSED */ 2295 static void 2296 ibd_rc_scq_handler(ibt_cq_hdl_t cq_hdl, void *arg) 2297 { 2298 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 2299 2300 if (ibd_rc_tx_softintr == 1) { 2301 mutex_enter(&chan->tx_poll_lock); 2302 if (chan->tx_poll_busy & IBD_CQ_POLLING) { 2303 chan->tx_poll_busy |= IBD_REDO_CQ_POLLING; 2304 mutex_exit(&chan->tx_poll_lock); 2305 return; 2306 } else { 2307 mutex_exit(&chan->tx_poll_lock); 2308 ddi_trigger_softintr(chan->scq_softintr); 2309 } 2310 } else 2311 (void) ibd_rc_tx_recycle(arg); 2312 } 2313 2314 static uint_t 2315 ibd_rc_tx_recycle(caddr_t arg) 2316 { 2317 ibd_rc_chan_t *chan = (ibd_rc_chan_t *)arg; 2318 ibd_state_t *state = chan->state; 2319 int flag, redo_flag; 2320 int redo = 1; 2321 2322 flag = IBD_CQ_POLLING; 2323 redo_flag = IBD_REDO_CQ_POLLING; 2324 2325 mutex_enter(&chan->tx_poll_lock); 2326 if (chan->tx_poll_busy & flag) { 2327 ibd_print_warn(state, "ibd_rc_tx_recycle: multiple polling " 2328 "threads"); 2329 chan->tx_poll_busy |= redo_flag; 2330 mutex_exit(&chan->tx_poll_lock); 2331 return (DDI_INTR_CLAIMED); 2332 } 2333 chan->tx_poll_busy |= flag; 2334 mutex_exit(&chan->tx_poll_lock); 2335 2336 /* 2337 * Poll for completed entries; the CQ will not interrupt any 2338 * more for completed packets. 2339 */ 2340 ibd_rc_drain_scq(chan, chan->scq_hdl); 2341 2342 /* 2343 * Now enable CQ notifications; all completions originating now 2344 * will cause new interrupts. 2345 */ 2346 do { 2347 if (ibt_enable_cq_notify(chan->scq_hdl, IBT_NEXT_COMPLETION) != 2348 IBT_SUCCESS) { 2349 /* 2350 * We do not expect a failure here. 2351 */ 2352 DPRINT(40, "ibd_rc_scq_handler: ibt_enable_cq_notify()" 2353 " failed"); 2354 } 2355 2356 ibd_rc_drain_scq(chan, chan->scq_hdl); 2357 2358 mutex_enter(&chan->tx_poll_lock); 2359 if (chan->tx_poll_busy & redo_flag) 2360 chan->tx_poll_busy &= ~redo_flag; 2361 else { 2362 chan->tx_poll_busy &= ~flag; 2363 redo = 0; 2364 } 2365 mutex_exit(&chan->tx_poll_lock); 2366 2367 } while (redo); 2368 2369 return (DDI_INTR_CLAIMED); 2370 } 2371 2372 static ibt_status_t 2373 ibd_register_service(ibt_srv_desc_t *srv, ib_svc_id_t sid, 2374 int num_sids, ibt_srv_hdl_t *srv_hdl, ib_svc_id_t *ret_sid) 2375 { 2376 ibd_service_t *p; 2377 ibt_status_t status; 2378 2379 mutex_enter(&ibd_gstate.ig_mutex); 2380 for (p = ibd_gstate.ig_service_list; p != NULL; p = p->is_link) { 2381 if (p->is_sid == sid) { 2382 p->is_ref_cnt++; 2383 *srv_hdl = p->is_srv_hdl; 2384 *ret_sid = sid; 2385 mutex_exit(&ibd_gstate.ig_mutex); 2386 return (IBT_SUCCESS); 2387 } 2388 } 2389 status = ibt_register_service(ibd_gstate.ig_ibt_hdl, srv, sid, 2390 num_sids, srv_hdl, ret_sid); 2391 if (status == IBT_SUCCESS) { 2392 p = kmem_alloc(sizeof (*p), KM_SLEEP); 2393 p->is_srv_hdl = *srv_hdl; 2394 p->is_sid = sid; 2395 p->is_ref_cnt = 1; 2396 p->is_link = ibd_gstate.ig_service_list; 2397 ibd_gstate.ig_service_list = p; 2398 } 2399 mutex_exit(&ibd_gstate.ig_mutex); 2400 return (status); 2401 } 2402 2403 static ibt_status_t 2404 ibd_deregister_service(ibt_srv_hdl_t srv_hdl) 2405 { 2406 ibd_service_t *p, **pp; 2407 ibt_status_t status; 2408 2409 mutex_enter(&ibd_gstate.ig_mutex); 2410 for (pp = &ibd_gstate.ig_service_list; *pp != NULL; 2411 pp = &((*pp)->is_link)) { 2412 p = *pp; 2413 if (p->is_srv_hdl == srv_hdl) { /* Found it */ 2414 if (--p->is_ref_cnt == 0) { 2415 status = ibt_deregister_service( 2416 ibd_gstate.ig_ibt_hdl, srv_hdl); 2417 *pp = p->is_link; /* link prev to next */ 2418 kmem_free(p, sizeof (*p)); 2419 } else { 2420 status = IBT_SUCCESS; 2421 } 2422 mutex_exit(&ibd_gstate.ig_mutex); 2423 return (status); 2424 } 2425 } 2426 /* Should not ever get here */ 2427 mutex_exit(&ibd_gstate.ig_mutex); 2428 return (IBT_FAILURE); 2429 } 2430 2431 /* Listen with corresponding service ID */ 2432 ibt_status_t 2433 ibd_rc_listen(ibd_state_t *state) 2434 { 2435 ibt_srv_desc_t srvdesc; 2436 ib_svc_id_t ret_sid; 2437 ibt_status_t status; 2438 ib_gid_t gid; 2439 2440 if (state->rc_listen_hdl != NULL) { 2441 DPRINT(40, "ibd_rc_listen: rc_listen_hdl should be NULL"); 2442 return (IBT_FAILURE); 2443 } 2444 2445 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 2446 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad; 2447 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 2448 2449 /* 2450 * Register the service with service id 2451 * Incoming connection requests should arrive on this service id. 2452 */ 2453 status = ibd_register_service(&srvdesc, 2454 IBD_RC_QPN_TO_SID(state->id_qpnum), 2455 1, &state->rc_listen_hdl, &ret_sid); 2456 if (status != IBT_SUCCESS) { 2457 DPRINT(40, "ibd_rc_listen: Service Registration Failed, " 2458 "ret=%d", status); 2459 return (status); 2460 } 2461 2462 gid = state->id_sgid; 2463 2464 /* pass state as cm_private */ 2465 status = ibt_bind_service(state->rc_listen_hdl, 2466 gid, NULL, state, &state->rc_listen_bind); 2467 if (status != IBT_SUCCESS) { 2468 DPRINT(40, "ibd_rc_listen:" 2469 " fail to bind port: <%d>", status); 2470 (void) ibd_deregister_service(state->rc_listen_hdl); 2471 return (status); 2472 } 2473 2474 /* 2475 * Legacy OFED had used a wrong service ID (one additional zero digit) 2476 * for many years. To interop with legacy OFED, we support this wrong 2477 * service ID here. 2478 */ 2479 ASSERT(state->rc_listen_hdl_OFED_interop == NULL); 2480 2481 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 2482 srvdesc.sd_handler = ibd_rc_dispatch_pass_mad; 2483 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 2484 2485 /* 2486 * Register the service with service id 2487 * Incoming connection requests should arrive on this service id. 2488 */ 2489 status = ibd_register_service(&srvdesc, 2490 IBD_RC_QPN_TO_SID_OFED_INTEROP(state->id_qpnum), 2491 1, &state->rc_listen_hdl_OFED_interop, &ret_sid); 2492 if (status != IBT_SUCCESS) { 2493 DPRINT(40, 2494 "ibd_rc_listen: Service Registration for Legacy OFED " 2495 "Failed %d", status); 2496 (void) ibt_unbind_service(state->rc_listen_hdl, 2497 state->rc_listen_bind); 2498 (void) ibd_deregister_service(state->rc_listen_hdl); 2499 return (status); 2500 } 2501 2502 gid = state->id_sgid; 2503 2504 /* pass state as cm_private */ 2505 status = ibt_bind_service(state->rc_listen_hdl_OFED_interop, 2506 gid, NULL, state, &state->rc_listen_bind_OFED_interop); 2507 if (status != IBT_SUCCESS) { 2508 DPRINT(40, "ibd_rc_listen: fail to bind port: <%d> for " 2509 "Legacy OFED listener", status); 2510 (void) ibd_deregister_service( 2511 state->rc_listen_hdl_OFED_interop); 2512 (void) ibt_unbind_service(state->rc_listen_hdl, 2513 state->rc_listen_bind); 2514 (void) ibd_deregister_service(state->rc_listen_hdl); 2515 return (status); 2516 } 2517 2518 return (IBT_SUCCESS); 2519 } 2520 2521 void 2522 ibd_rc_stop_listen(ibd_state_t *state) 2523 { 2524 int ret; 2525 2526 /* Disable incoming connection requests */ 2527 if (state->rc_listen_hdl != NULL) { 2528 ret = ibt_unbind_all_services(state->rc_listen_hdl); 2529 if (ret != 0) { 2530 DPRINT(40, "ibd_rc_stop_listen:" 2531 "ibt_unbind_all_services() failed, ret=%d", ret); 2532 } 2533 ret = ibd_deregister_service(state->rc_listen_hdl); 2534 if (ret != 0) { 2535 DPRINT(40, "ibd_rc_stop_listen:" 2536 "ibd_deregister_service() failed, ret=%d", ret); 2537 } else { 2538 state->rc_listen_hdl = NULL; 2539 } 2540 } 2541 2542 /* Disable incoming connection requests */ 2543 if (state->rc_listen_hdl_OFED_interop != NULL) { 2544 ret = ibt_unbind_all_services( 2545 state->rc_listen_hdl_OFED_interop); 2546 if (ret != 0) { 2547 DPRINT(40, "ibd_rc_stop_listen:" 2548 "ibt_unbind_all_services() failed: %d", ret); 2549 } 2550 ret = ibd_deregister_service(state->rc_listen_hdl_OFED_interop); 2551 if (ret != 0) { 2552 DPRINT(40, "ibd_rc_stop_listen:" 2553 "ibd_deregister_service() failed: %d", ret); 2554 } else { 2555 state->rc_listen_hdl_OFED_interop = NULL; 2556 } 2557 } 2558 } 2559 2560 void 2561 ibd_rc_close_all_chan(ibd_state_t *state) 2562 { 2563 ibd_rc_chan_t *rc_chan; 2564 ibd_ace_t *ace, *pre_ace; 2565 uint_t attempts; 2566 2567 /* Disable all Rx routines */ 2568 mutex_enter(&state->rc_pass_chan_list.chan_list_mutex); 2569 rc_chan = state->rc_pass_chan_list.chan_list; 2570 while (rc_chan != NULL) { 2571 ibt_set_cq_handler(rc_chan->rcq_hdl, 0, 0); 2572 rc_chan = rc_chan->next; 2573 } 2574 mutex_exit(&state->rc_pass_chan_list.chan_list_mutex); 2575 2576 if (state->rc_enable_srq) { 2577 attempts = 10; 2578 while (state->rc_srq_rwqe_list.dl_bufs_outstanding > 0) { 2579 DPRINT(30, "ibd_rc_close_all_chan: outstanding > 0"); 2580 delay(drv_usectohz(100000)); 2581 if (--attempts == 0) { 2582 /* 2583 * There are pending bufs with the network 2584 * layer and we have no choice but to wait 2585 * for them to be done with. Reap all the 2586 * Tx/Rx completions that were posted since 2587 * we turned off the notification and 2588 * return failure. 2589 */ 2590 break; 2591 } 2592 } 2593 } 2594 2595 /* Close all passive RC channels */ 2596 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list); 2597 while (rc_chan != NULL) { 2598 (void) ibd_rc_pas_close(rc_chan, B_TRUE, B_FALSE); 2599 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_pass_chan_list); 2600 } 2601 2602 /* Close all active RC channels */ 2603 mutex_enter(&state->id_ac_mutex); 2604 state->id_ac_hot_ace = NULL; 2605 ace = list_head(&state->id_ah_active); 2606 while ((pre_ace = ace) != NULL) { 2607 ace = list_next(&state->id_ah_active, ace); 2608 if (pre_ace->ac_chan != NULL) { 2609 INC_REF(pre_ace, 1); 2610 IBD_ACACHE_PULLOUT_ACTIVE(state, pre_ace); 2611 pre_ace->ac_chan->chan_state = IBD_RC_STATE_ACT_CLOSING; 2612 ibd_rc_add_to_chan_list(&state->rc_obs_act_chan_list, 2613 pre_ace->ac_chan); 2614 } 2615 } 2616 mutex_exit(&state->id_ac_mutex); 2617 2618 rc_chan = ibd_rc_rm_header_chan_list(&state->rc_obs_act_chan_list); 2619 while (rc_chan != NULL) { 2620 ace = rc_chan->ace; 2621 ibd_rc_act_close(rc_chan, B_TRUE); 2622 if (ace != NULL) { 2623 mutex_enter(&state->id_ac_mutex); 2624 ASSERT(ace->ac_ref != 0); 2625 atomic_dec_32(&ace->ac_ref); 2626 ace->ac_chan = NULL; 2627 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 2628 IBD_ACACHE_INSERT_FREE(state, ace); 2629 ace->ac_ref = 0; 2630 } else { 2631 ace->ac_ref |= CYCLEVAL; 2632 state->rc_delay_ace_recycle++; 2633 } 2634 mutex_exit(&state->id_ac_mutex); 2635 } 2636 rc_chan = ibd_rc_rm_header_chan_list( 2637 &state->rc_obs_act_chan_list); 2638 } 2639 2640 attempts = 400; 2641 while (((state->rc_num_tx_chan != 0) || 2642 (state->rc_num_rx_chan != 0)) && (attempts > 0)) { 2643 /* Other thread is closing CM channel, wait it */ 2644 delay(drv_usectohz(100000)); 2645 attempts--; 2646 } 2647 } 2648 2649 void 2650 ibd_rc_try_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path) 2651 { 2652 ibt_status_t status; 2653 2654 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2655 return; 2656 2657 status = ibd_rc_connect(state, ace, path, 2658 IBD_RC_SERVICE_ID_OFED_INTEROP); 2659 2660 if (status != IBT_SUCCESS) { 2661 /* wait peer side remove stale channel */ 2662 delay(drv_usectohz(10000)); 2663 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2664 return; 2665 status = ibd_rc_connect(state, ace, path, 2666 IBD_RC_SERVICE_ID_OFED_INTEROP); 2667 } 2668 2669 if (status != IBT_SUCCESS) { 2670 /* wait peer side remove stale channel */ 2671 delay(drv_usectohz(10000)); 2672 if ((state->id_mac_state & IBD_DRV_STARTED) == 0) 2673 return; 2674 (void) ibd_rc_connect(state, ace, path, 2675 IBD_RC_SERVICE_ID); 2676 } 2677 } 2678 2679 /* 2680 * Allocates channel and sets the ace->ac_chan to it. 2681 * Opens the channel. 2682 */ 2683 ibt_status_t 2684 ibd_rc_connect(ibd_state_t *state, ibd_ace_t *ace, ibt_path_info_t *path, 2685 uint64_t ietf_cm_service_id) 2686 { 2687 ibt_status_t status = 0; 2688 ibt_rc_returns_t open_returns; 2689 ibt_chan_open_args_t open_args; 2690 ibd_rc_msg_hello_t hello_req_msg; 2691 ibd_rc_msg_hello_t *hello_ack_msg; 2692 ibd_rc_chan_t *chan; 2693 ibt_ud_dest_query_attr_t dest_attrs; 2694 2695 ASSERT(ace != NULL); 2696 ASSERT(ace->ac_mce == NULL); 2697 ASSERT(ace->ac_chan == NULL); 2698 2699 if ((status = ibd_rc_alloc_chan(&chan, state, B_TRUE)) != IBT_SUCCESS) { 2700 DPRINT(10, "ibd_rc_connect: ibd_rc_alloc_chan() failed"); 2701 return (status); 2702 } 2703 2704 ace->ac_chan = chan; 2705 chan->state = state; 2706 chan->ace = ace; 2707 2708 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)ace); 2709 2710 hello_ack_msg = kmem_zalloc(sizeof (ibd_rc_msg_hello_t), KM_SLEEP); 2711 2712 /* 2713 * open the channels 2714 */ 2715 bzero(&open_args, sizeof (ibt_chan_open_args_t)); 2716 bzero(&open_returns, sizeof (ibt_rc_returns_t)); 2717 2718 open_args.oc_cm_handler = ibd_rc_dispatch_actv_mad; 2719 open_args.oc_cm_clnt_private = (void *)(uintptr_t)ace; 2720 2721 /* 2722 * update path record with the SID 2723 */ 2724 if ((status = ibt_query_ud_dest(ace->ac_dest, &dest_attrs)) 2725 != IBT_SUCCESS) { 2726 DPRINT(40, "ibd_rc_connect: ibt_query_ud_dest() failed, " 2727 "ret=%d", status); 2728 return (status); 2729 } 2730 2731 path->pi_sid = 2732 ietf_cm_service_id | ((dest_attrs.ud_dst_qpn) & 0xffffff); 2733 2734 2735 /* pre-allocate memory for hello ack message */ 2736 open_returns.rc_priv_data_len = sizeof (ibd_rc_msg_hello_t); 2737 open_returns.rc_priv_data = hello_ack_msg; 2738 2739 open_args.oc_path = path; 2740 2741 open_args.oc_path_rnr_retry_cnt = 1; 2742 open_args.oc_path_retry_cnt = 1; 2743 2744 /* We don't do RDMA */ 2745 open_args.oc_rdma_ra_out = 0; 2746 open_args.oc_rdma_ra_in = 0; 2747 2748 hello_req_msg.reserved_qpn = htonl(state->id_qpnum); 2749 hello_req_msg.rx_mtu = htonl(state->rc_mtu); 2750 open_args.oc_priv_data_len = sizeof (ibd_rc_msg_hello_t); 2751 open_args.oc_priv_data = (void *)(&hello_req_msg); 2752 2753 ASSERT(open_args.oc_priv_data_len <= IBT_REQ_PRIV_DATA_SZ); 2754 ASSERT(open_returns.rc_priv_data_len <= IBT_REP_PRIV_DATA_SZ); 2755 ASSERT(open_args.oc_cm_handler != NULL); 2756 2757 status = ibt_open_rc_channel(chan->chan_hdl, IBT_OCHAN_NO_FLAGS, 2758 IBT_BLOCKING, &open_args, &open_returns); 2759 2760 if (status == IBT_SUCCESS) { 2761 /* Success! */ 2762 DPRINT(2, "ibd_rc_connect: call ibt_open_rc_channel succ!"); 2763 state->rc_conn_succ++; 2764 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t)); 2765 return (IBT_SUCCESS); 2766 } 2767 2768 /* failure */ 2769 (void) ibt_flush_channel(chan->chan_hdl); 2770 ibd_rc_free_chan(chan); 2771 ace->ac_chan = NULL; 2772 2773 /* check open_returns report error and exit */ 2774 DPRINT(30, "ibd_rc_connect: call ibt_open_rc_chan fail." 2775 "ret status = %d, reason=%d, ace=%p, mtu=0x%x, qpn=0x%x," 2776 " peer qpn=0x%x", status, (int)open_returns.rc_status, ace, 2777 hello_req_msg.rx_mtu, hello_req_msg.reserved_qpn, 2778 dest_attrs.ud_dst_qpn); 2779 kmem_free(hello_ack_msg, sizeof (ibd_rc_msg_hello_t)); 2780 return (status); 2781 } 2782 2783 void 2784 ibd_rc_signal_act_close(ibd_state_t *state, ibd_ace_t *ace) 2785 { 2786 ibd_req_t *req; 2787 2788 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 2789 if (req == NULL) { 2790 ibd_print_warn(state, "ibd_rc_signal_act_close: alloc " 2791 "ibd_req_t fail"); 2792 mutex_enter(&state->rc_obs_act_chan_list.chan_list_mutex); 2793 ace->ac_chan->next = state->rc_obs_act_chan_list.chan_list; 2794 state->rc_obs_act_chan_list.chan_list = ace->ac_chan; 2795 mutex_exit(&state->rc_obs_act_chan_list.chan_list_mutex); 2796 } else { 2797 req->rq_ptr = ace->ac_chan; 2798 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_CLOSE_ACT_CHAN); 2799 } 2800 } 2801 2802 void 2803 ibd_rc_signal_ace_recycle(ibd_state_t *state, ibd_ace_t *ace) 2804 { 2805 ibd_req_t *req; 2806 2807 mutex_enter(&state->rc_ace_recycle_lock); 2808 if (state->rc_ace_recycle != NULL) { 2809 mutex_exit(&state->rc_ace_recycle_lock); 2810 return; 2811 } 2812 2813 req = kmem_cache_alloc(state->id_req_kmc, KM_NOSLEEP); 2814 if (req == NULL) { 2815 mutex_exit(&state->rc_ace_recycle_lock); 2816 return; 2817 } 2818 2819 state->rc_ace_recycle = ace; 2820 mutex_exit(&state->rc_ace_recycle_lock); 2821 ASSERT(ace->ac_mce == NULL); 2822 INC_REF(ace, 1); 2823 IBD_ACACHE_PULLOUT_ACTIVE(state, ace); 2824 req->rq_ptr = ace; 2825 ibd_queue_work_slot(state, req, IBD_ASYNC_RC_RECYCLE_ACE); 2826 } 2827 2828 /* 2829 * Close an active channel 2830 * 2831 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel() 2832 */ 2833 static void 2834 ibd_rc_act_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan) 2835 { 2836 ibd_state_t *state; 2837 ibd_ace_t *ace; 2838 uint_t times; 2839 ibt_status_t ret; 2840 2841 ASSERT(chan != NULL); 2842 2843 chan->state->rc_act_close++; 2844 switch (chan->chan_state) { 2845 case IBD_RC_STATE_ACT_CLOSING: /* stale, close it */ 2846 case IBD_RC_STATE_ACT_ESTAB: 2847 DPRINT(30, "ibd_rc_act_close-1: close and free chan, " 2848 "act_state=%d, chan=%p", chan->chan_state, chan); 2849 chan->chan_state = IBD_RC_STATE_ACT_CLOSED; 2850 ibt_set_cq_handler(chan->rcq_hdl, 0, 0); 2851 /* 2852 * Wait send queue empty. Its old value is 50 (5 seconds). But 2853 * in my experiment, 5 seconds is not enough time to let IBTL 2854 * return all buffers and ace->ac_ref. I tried 25 seconds, it 2855 * works well. As another evidence, I saw IBTL takes about 17 2856 * seconds every time it cleans a stale RC channel. 2857 */ 2858 times = 250; 2859 ace = chan->ace; 2860 ASSERT(ace != NULL); 2861 state = chan->state; 2862 ASSERT(state != NULL); 2863 mutex_enter(&state->id_ac_mutex); 2864 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2865 mutex_enter(&chan->tx_rel_list.dl_mutex); 2866 while (((chan->tx_wqe_list.dl_cnt + chan->tx_rel_list.dl_cnt) 2867 != chan->scq_size) || ((ace->ac_ref != 1) && 2868 (ace->ac_ref != (CYCLEVAL+1)))) { 2869 mutex_exit(&chan->tx_rel_list.dl_mutex); 2870 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2871 mutex_exit(&state->id_ac_mutex); 2872 times--; 2873 if (times == 0) { 2874 state->rc_act_close_not_clean++; 2875 DPRINT(40, "ibd_rc_act_close: dl_cnt(tx_wqe_" 2876 "list=%d, tx_rel_list=%d) != chan->" 2877 "scq_size=%d, OR ac_ref(=%d) not clean", 2878 chan->tx_wqe_list.dl_cnt, 2879 chan->tx_rel_list.dl_cnt, 2880 chan->scq_size, ace->ac_ref); 2881 break; 2882 } 2883 mutex_enter(&chan->tx_poll_lock); 2884 if (chan->tx_poll_busy & IBD_CQ_POLLING) { 2885 DPRINT(40, "ibd_rc_act_close: multiple " 2886 "polling threads"); 2887 mutex_exit(&chan->tx_poll_lock); 2888 } else { 2889 chan->tx_poll_busy = IBD_CQ_POLLING; 2890 mutex_exit(&chan->tx_poll_lock); 2891 ibd_rc_drain_scq(chan, chan->scq_hdl); 2892 mutex_enter(&chan->tx_poll_lock); 2893 chan->tx_poll_busy = 0; 2894 mutex_exit(&chan->tx_poll_lock); 2895 } 2896 delay(drv_usectohz(100000)); 2897 mutex_enter(&state->id_ac_mutex); 2898 mutex_enter(&chan->tx_wqe_list.dl_mutex); 2899 mutex_enter(&chan->tx_rel_list.dl_mutex); 2900 } 2901 if (times != 0) { 2902 mutex_exit(&chan->tx_rel_list.dl_mutex); 2903 mutex_exit(&chan->tx_wqe_list.dl_mutex); 2904 mutex_exit(&state->id_ac_mutex); 2905 } 2906 2907 ibt_set_cq_handler(chan->scq_hdl, 0, 0); 2908 if (is_close_rc_chan) { 2909 ret = ibt_close_rc_channel(chan->chan_hdl, 2910 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 2911 0); 2912 if (ret != IBT_SUCCESS) { 2913 DPRINT(40, "ibd_rc_act_close: ibt_close_rc_" 2914 "channel fail, chan=%p, ret=%d", 2915 chan, ret); 2916 } else { 2917 DPRINT(30, "ibd_rc_act_close: ibt_close_rc_" 2918 "channel succ, chan=%p", chan); 2919 } 2920 } 2921 2922 ibd_rc_free_chan(chan); 2923 break; 2924 case IBD_RC_STATE_ACT_REP_RECV: 2925 chan->chan_state = IBD_RC_STATE_ACT_CLOSED; 2926 (void) ibt_flush_channel(chan->chan_hdl); 2927 ibd_rc_free_chan(chan); 2928 break; 2929 case IBD_RC_STATE_ACT_ERROR: 2930 DPRINT(40, "ibd_rc_act_close: IBD_RC_STATE_ERROR branch"); 2931 break; 2932 default: 2933 DPRINT(40, "ibd_rc_act_close: default branch, act_state=%d, " 2934 "chan=%p", chan->chan_state, chan); 2935 } 2936 } 2937 2938 /* 2939 * Close a passive channel 2940 * 2941 * is_close_rc_chan: if B_TRUE, we will call ibt_close_rc_channel() 2942 * 2943 * is_timeout_close: if B_TRUE, this function is called by the connection 2944 * reaper (refer to function ibd_rc_conn_timeout_call). When the connection 2945 * reaper calls ibd_rc_pas_close(), and if it finds that dl_bufs_outstanding 2946 * or chan->rcq_invoking is non-zero, then it can simply put that channel back 2947 * on the passive channels list and move on, since it might be an indication 2948 * that the channel became active again by the time we started it's cleanup. 2949 * It is costlier to do the cleanup and then reinitiate the channel 2950 * establishment and hence it will help to be conservative when we do the 2951 * cleanup. 2952 */ 2953 int 2954 ibd_rc_pas_close(ibd_rc_chan_t *chan, boolean_t is_close_rc_chan, 2955 boolean_t is_timeout_close) 2956 { 2957 uint_t times; 2958 ibt_status_t ret; 2959 2960 ASSERT(chan != NULL); 2961 chan->state->rc_pas_close++; 2962 2963 switch (chan->chan_state) { 2964 case IBD_RC_STATE_PAS_ESTAB: 2965 if (is_timeout_close) { 2966 if ((chan->rcq_invoking != 0) || 2967 ((!chan->state->rc_enable_srq) && 2968 (chan->rx_wqe_list.dl_bufs_outstanding > 0))) { 2969 if (ibd_rc_re_add_to_pas_chan_list(chan)) { 2970 return (DDI_FAILURE); 2971 } 2972 } 2973 } 2974 /* 2975 * First, stop receive interrupts; this stops the 2976 * connection from handing up buffers to higher layers. 2977 * Wait for receive buffers to be returned; give up 2978 * after 5 seconds. 2979 */ 2980 ibt_set_cq_handler(chan->rcq_hdl, 0, 0); 2981 /* Wait 0.01 second to let ibt_set_cq_handler() take effect */ 2982 delay(drv_usectohz(10000)); 2983 if (!chan->state->rc_enable_srq) { 2984 times = 50; 2985 while (chan->rx_wqe_list.dl_bufs_outstanding > 0) { 2986 delay(drv_usectohz(100000)); 2987 if (--times == 0) { 2988 DPRINT(40, "ibd_rc_pas_close : " 2989 "reclaiming failed"); 2990 ibd_rc_poll_rcq(chan, chan->rcq_hdl); 2991 ibt_set_cq_handler(chan->rcq_hdl, 2992 ibd_rc_rcq_handler, 2993 (void *)(uintptr_t)chan); 2994 return (DDI_FAILURE); 2995 } 2996 } 2997 } 2998 times = 50; 2999 while (chan->rcq_invoking != 0) { 3000 delay(drv_usectohz(100000)); 3001 if (--times == 0) { 3002 DPRINT(40, "ibd_rc_pas_close : " 3003 "rcq handler is being invoked"); 3004 chan->state->rc_pas_close_rcq_invoking++; 3005 break; 3006 } 3007 } 3008 ibt_set_cq_handler(chan->scq_hdl, 0, 0); 3009 chan->chan_state = IBD_RC_STATE_PAS_CLOSED; 3010 DPRINT(30, "ibd_rc_pas_close-1: close and free chan, " 3011 "chan_state=%d, chan=%p", chan->chan_state, chan); 3012 if (is_close_rc_chan) { 3013 ret = ibt_close_rc_channel(chan->chan_hdl, 3014 IBT_BLOCKING|IBT_NOCALLBACKS, NULL, 0, NULL, NULL, 3015 0); 3016 if (ret != IBT_SUCCESS) { 3017 DPRINT(40, "ibd_rc_pas_close: ibt_close_rc_" 3018 "channel() fail, chan=%p, ret=%d", chan, 3019 ret); 3020 } else { 3021 DPRINT(30, "ibd_rc_pas_close: ibt_close_rc_" 3022 "channel() succ, chan=%p", chan); 3023 } 3024 } 3025 ibd_rc_free_chan(chan); 3026 break; 3027 case IBD_RC_STATE_PAS_REQ_RECV: 3028 chan->chan_state = IBD_RC_STATE_PAS_CLOSED; 3029 (void) ibt_flush_channel(chan->chan_hdl); 3030 ibd_rc_free_chan(chan); 3031 break; 3032 default: 3033 DPRINT(40, "ibd_rc_pas_close: default, chan_state=%d, chan=%p", 3034 chan->chan_state, chan); 3035 } 3036 return (DDI_SUCCESS); 3037 } 3038 3039 /* 3040 * Passive Side: 3041 * Handle an incoming CM REQ from active side. 3042 * 3043 * If success, this function allocates an ibd_rc_chan_t, then 3044 * assigns it to "*ret_conn". 3045 */ 3046 static ibt_cm_status_t 3047 ibd_rc_handle_req(void *arg, ibd_rc_chan_t **ret_conn, 3048 ibt_cm_event_t *ibt_cm_event, ibt_cm_return_args_t *ret_args, 3049 void *ret_priv_data) 3050 { 3051 ibd_rc_msg_hello_t *hello_msg; 3052 ibd_state_t *state = (ibd_state_t *)arg; 3053 ibd_rc_chan_t *chan; 3054 3055 if (ibd_rc_alloc_chan(&chan, state, B_FALSE) != IBT_SUCCESS) { 3056 DPRINT(40, "ibd_rc_handle_req: ibd_rc_alloc_chan() failed"); 3057 return (IBT_CM_REJECT); 3058 } 3059 3060 ibd_rc_add_to_chan_list(&state->rc_pass_chan_list, chan); 3061 3062 ibt_set_chan_private(chan->chan_hdl, (void *)(uintptr_t)chan); 3063 3064 if (!state->rc_enable_srq) { 3065 if (ibd_rc_init_rxlist(chan) != DDI_SUCCESS) { 3066 ibd_rc_free_chan(chan); 3067 DPRINT(40, "ibd_rc_handle_req: ibd_rc_init_rxlist() " 3068 "failed"); 3069 return (IBT_CM_REJECT); 3070 } 3071 } 3072 3073 ret_args->cm_ret.rep.cm_channel = chan->chan_hdl; 3074 3075 /* We don't do RDMA */ 3076 ret_args->cm_ret.rep.cm_rdma_ra_out = 0; 3077 ret_args->cm_ret.rep.cm_rdma_ra_in = 0; 3078 3079 ret_args->cm_ret.rep.cm_rnr_retry_cnt = 7; 3080 ret_args->cm_ret_len = sizeof (ibd_rc_msg_hello_t); 3081 3082 hello_msg = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data; 3083 DPRINT(30, "ibd_rc_handle_req(): peer qpn=0x%x, peer mtu=0x%x", 3084 ntohl(hello_msg->reserved_qpn), ntohl(hello_msg->rx_mtu)); 3085 3086 hello_msg = (ibd_rc_msg_hello_t *)ret_priv_data; 3087 hello_msg->reserved_qpn = htonl(state->id_qpnum); 3088 hello_msg->rx_mtu = htonl(state->rc_mtu); 3089 3090 chan->chan_state = IBD_RC_STATE_PAS_REQ_RECV; /* ready to receive */ 3091 *ret_conn = chan; 3092 3093 return (IBT_CM_ACCEPT); 3094 } 3095 3096 /* 3097 * ibd_rc_handle_act_estab -- handler for connection established completion 3098 * for active side. 3099 */ 3100 static ibt_cm_status_t 3101 ibd_rc_handle_act_estab(ibd_ace_t *ace) 3102 { 3103 ibt_status_t result; 3104 3105 switch (ace->ac_chan->chan_state) { 3106 case IBD_RC_STATE_ACT_REP_RECV: 3107 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_ESTAB; 3108 result = ibt_enable_cq_notify(ace->ac_chan->rcq_hdl, 3109 IBT_NEXT_COMPLETION); 3110 if (result != IBT_SUCCESS) { 3111 DPRINT(40, "ibd_rc_handle_act_estab: " 3112 "ibt_enable_cq_notify(rcq) " 3113 "failed: status %d", result); 3114 return (IBT_CM_REJECT); 3115 } 3116 break; 3117 default: 3118 DPRINT(40, "ibd_rc_handle_act_estab: default " 3119 "branch, act_state=%d", ace->ac_chan->chan_state); 3120 return (IBT_CM_REJECT); 3121 } 3122 return (IBT_CM_ACCEPT); 3123 } 3124 3125 /* 3126 * ibd_rc_handle_pas_estab -- handler for connection established completion 3127 * for passive side. 3128 */ 3129 static ibt_cm_status_t 3130 ibd_rc_handle_pas_estab(ibd_rc_chan_t *chan) 3131 { 3132 ibt_status_t result; 3133 3134 switch (chan->chan_state) { 3135 case IBD_RC_STATE_PAS_REQ_RECV: 3136 chan->chan_state = IBD_RC_STATE_PAS_ESTAB; 3137 3138 result = ibt_enable_cq_notify(chan->rcq_hdl, 3139 IBT_NEXT_COMPLETION); 3140 if (result != IBT_SUCCESS) { 3141 DPRINT(40, "ibd_rc_handle_pas_estab: " 3142 "ibt_enable_cq_notify(rcq) " 3143 "failed: status %d", result); 3144 return (IBT_CM_REJECT); 3145 } 3146 break; 3147 default: 3148 DPRINT(40, "ibd_rc_handle_pas_estab: default " 3149 "branch, chan_state=%d", chan->chan_state); 3150 return (IBT_CM_REJECT); 3151 } 3152 return (IBT_CM_ACCEPT); 3153 } 3154 3155 /* ARGSUSED */ 3156 static ibt_cm_status_t 3157 ibd_rc_dispatch_actv_mad(void *arg, ibt_cm_event_t *ibt_cm_event, 3158 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 3159 ibt_priv_data_len_t ret_len_max) 3160 { 3161 ibt_cm_status_t result = IBT_CM_ACCEPT; 3162 ibd_ace_t *ace = (ibd_ace_t *)(uintptr_t)arg; 3163 ibd_rc_chan_t *rc_chan; 3164 ibd_state_t *state; 3165 ibd_rc_msg_hello_t *hello_ack; 3166 3167 switch (ibt_cm_event->cm_type) { 3168 case IBT_CM_EVENT_REP_RCV: 3169 ASSERT(ace->ac_chan != NULL); 3170 ASSERT(ace->ac_chan->chan_state == IBD_RC_STATE_INIT); 3171 hello_ack = (ibd_rc_msg_hello_t *)ibt_cm_event->cm_priv_data; 3172 DPRINT(30, "ibd_rc_handle_rep: hello_ack->mtu=0x%x, " 3173 "hello_ack->qpn=0x%x", ntohl(hello_ack->rx_mtu), 3174 ntohl(hello_ack->reserved_qpn)); 3175 ace->ac_chan->chan_state = IBD_RC_STATE_ACT_REP_RECV; 3176 break; 3177 3178 case IBT_CM_EVENT_CONN_EST: 3179 ASSERT(ace->ac_chan != NULL); 3180 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_CONN_EST, " 3181 "ace=%p, act_state=%d, chan=%p", 3182 ace, ace->ac_chan->chan_state, ace->ac_chan); 3183 result = ibd_rc_handle_act_estab(ace); 3184 break; 3185 3186 case IBT_CM_EVENT_CONN_CLOSED: 3187 rc_chan = ace->ac_chan; 3188 if (rc_chan == NULL) { 3189 DPRINT(40, "ibd_rc_dispatch_actv_mad: " 3190 "rc_chan==NULL, IBT_CM_EVENT_CONN_CLOSED"); 3191 return (IBT_CM_ACCEPT); 3192 } 3193 state = rc_chan->state; 3194 mutex_enter(&state->id_ac_mutex); 3195 if ((rc_chan->chan_state == IBD_RC_STATE_ACT_ESTAB) && 3196 ((ace = ibd_acache_find(state, &ace->ac_mac, B_FALSE, 0)) 3197 != NULL) && (ace == rc_chan->ace)) { 3198 rc_chan->chan_state = IBD_RC_STATE_ACT_CLOSING; 3199 ASSERT(ace->ac_mce == NULL); 3200 INC_REF(ace, 1); 3201 IBD_ACACHE_PULLOUT_ACTIVE(state, ace); 3202 mutex_exit(&state->id_ac_mutex); 3203 DPRINT(30, "ibd_rc_dispatch_actv_mad: " 3204 "IBT_CM_EVENT_CONN_CLOSED, ace=%p, chan=%p, " 3205 "reason=%d", ace, rc_chan, 3206 ibt_cm_event->cm_event.closed); 3207 } else { 3208 mutex_exit(&state->id_ac_mutex); 3209 state->rc_act_close_simultaneous++; 3210 DPRINT(40, "ibd_rc_dispatch_actv_mad: other thread " 3211 "is closing it, IBT_CM_EVENT_CONN_CLOSED, " 3212 "chan_state=%d", rc_chan->chan_state); 3213 return (IBT_CM_ACCEPT); 3214 } 3215 ibd_rc_act_close(rc_chan, B_FALSE); 3216 mutex_enter(&state->id_ac_mutex); 3217 ace->ac_chan = NULL; 3218 ASSERT(ace->ac_ref != 0); 3219 atomic_dec_32(&ace->ac_ref); 3220 if ((ace->ac_ref == 0) || (ace->ac_ref == CYCLEVAL)) { 3221 IBD_ACACHE_INSERT_FREE(state, ace); 3222 ace->ac_ref = 0; 3223 } else { 3224 ace->ac_ref |= CYCLEVAL; 3225 state->rc_delay_ace_recycle++; 3226 } 3227 mutex_exit(&state->id_ac_mutex); 3228 break; 3229 3230 case IBT_CM_EVENT_FAILURE: 3231 DPRINT(30, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_FAILURE," 3232 "ace=%p, chan=%p, code: %d, msg: %d, reason=%d", 3233 ace, ace->ac_chan, 3234 ibt_cm_event->cm_event.failed.cf_code, 3235 ibt_cm_event->cm_event.failed.cf_msg, 3236 ibt_cm_event->cm_event.failed.cf_reason); 3237 /* 3238 * Don't need free resource here. The resource is freed 3239 * at function ibd_rc_connect() 3240 */ 3241 break; 3242 3243 case IBT_CM_EVENT_MRA_RCV: 3244 DPRINT(40, "ibd_rc_dispatch_actv_mad: IBT_CM_EVENT_MRA_RCV"); 3245 break; 3246 case IBT_CM_EVENT_LAP_RCV: 3247 DPRINT(40, "ibd_rc_dispatch_actv_mad: LAP message received"); 3248 break; 3249 case IBT_CM_EVENT_APR_RCV: 3250 DPRINT(40, "ibd_rc_dispatch_actv_mad: APR message received"); 3251 break; 3252 default: 3253 DPRINT(40, "ibd_rc_dispatch_actv_mad: default branch, " 3254 "ibt_cm_event->cm_type=%d", ibt_cm_event->cm_type); 3255 break; 3256 } 3257 3258 return (result); 3259 } 3260 3261 /* ARGSUSED */ 3262 static ibt_cm_status_t 3263 ibd_rc_dispatch_pass_mad(void *arg, ibt_cm_event_t *ibt_cm_event, 3264 ibt_cm_return_args_t *ret_args, void *ret_priv_data, 3265 ibt_priv_data_len_t ret_len_max) 3266 { 3267 ibt_cm_status_t result = IBT_CM_ACCEPT; 3268 ibd_rc_chan_t *chan; 3269 3270 if (ibt_cm_event->cm_type == IBT_CM_EVENT_REQ_RCV) { 3271 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_REQ_RCV," 3272 "req_pkey=%x", ibt_cm_event->cm_event.req.req_pkey); 3273 /* Receive an incoming CM REQ from active side */ 3274 result = ibd_rc_handle_req(arg, &chan, ibt_cm_event, ret_args, 3275 ret_priv_data); 3276 return (result); 3277 } 3278 3279 if (ibt_cm_event->cm_channel == 0) { 3280 DPRINT(30, "ibd_rc_dispatch_pass_mad: " 3281 "ERROR ibt_cm_event->cm_channel == 0"); 3282 return (IBT_CM_REJECT); 3283 } 3284 3285 chan = 3286 (ibd_rc_chan_t *)ibt_get_chan_private(ibt_cm_event->cm_channel); 3287 if (chan == NULL) { 3288 DPRINT(40, "ibd_rc_dispatch_pass_mad: conn == 0"); 3289 return (IBT_CM_REJECT); 3290 } 3291 3292 switch (ibt_cm_event->cm_type) { 3293 case IBT_CM_EVENT_CONN_EST: 3294 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_EST, " 3295 "chan=%p", chan); 3296 result = ibd_rc_handle_pas_estab(chan); 3297 break; 3298 case IBT_CM_EVENT_CONN_CLOSED: 3299 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_CONN_CLOSED," 3300 " chan=%p, reason=%d", chan, ibt_cm_event->cm_event.closed); 3301 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list, 3302 chan); 3303 if (chan != NULL) 3304 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE); 3305 break; 3306 case IBT_CM_EVENT_FAILURE: 3307 DPRINT(30, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_FAILURE," 3308 " chan=%p, code: %d, msg: %d, reason=%d", chan, 3309 ibt_cm_event->cm_event.failed.cf_code, 3310 ibt_cm_event->cm_event.failed.cf_msg, 3311 ibt_cm_event->cm_event.failed.cf_reason); 3312 chan = ibd_rc_rm_from_chan_list(&chan->state->rc_pass_chan_list, 3313 chan); 3314 if (chan != NULL) 3315 (void) ibd_rc_pas_close(chan, B_FALSE, B_FALSE); 3316 return (IBT_CM_ACCEPT); 3317 case IBT_CM_EVENT_MRA_RCV: 3318 DPRINT(40, "ibd_rc_dispatch_pass_mad: IBT_CM_EVENT_MRA_RCV"); 3319 break; 3320 case IBT_CM_EVENT_LAP_RCV: 3321 DPRINT(40, "ibd_rc_dispatch_pass_mad: LAP message received"); 3322 break; 3323 case IBT_CM_EVENT_APR_RCV: 3324 DPRINT(40, "ibd_rc_dispatch_pass_mad: APR message received"); 3325 break; 3326 default: 3327 DPRINT(40, "ibd_rc_dispatch_pass_mad: default, type=%d, " 3328 "chan=%p", ibt_cm_event->cm_type, chan); 3329 break; 3330 } 3331 3332 return (result); 3333 } 3334