1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #ifndef _SYS_IB_MGT_IBMF_IBMF_IMPL_H 27 #define _SYS_IB_MGT_IBMF_IBMF_IMPL_H 28 29 30 /* 31 * This file contains the IBMF implementation dependent structures and defines. 32 */ 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 38 #include <sys/types.h> 39 #include <sys/conf.h> 40 #include <sys/modctl.h> 41 #include <sys/kmem.h> 42 #include <sys/ksynch.h> 43 #include <sys/taskq.h> 44 #include <sys/sunddi.h> 45 #include <sys/disp.h> 46 #include <sys/ib/ibtl/ibvti.h> 47 #include <sys/ib/mgt/ibmf/ibmf.h> 48 #include <sys/ib/mgt/ibmf/ibmf_rmpp.h> 49 #include <sys/ib/mgt/ibmf/ibmf_kstat.h> 50 #include <sys/ib/mgt/ibmf/ibmf_trace.h> 51 52 #define IBMF_MEM_PER_WQE (IBMF_MAD_SIZE + sizeof (ib_grh_t)) 53 #define IBMF_MAX_SQ_WRE 64 54 #define IBMF_MAX_RQ_WRE 64 55 #define IBMF_MAX_POSTED_RQ_PER_QP 512 56 #define IBMF_MAX_POSTED_SQ_PER_QP 512 57 #define IBMF_MAX_SQ_WR_SGL_ELEMENTS 1 58 #define IBMF_MAX_RQ_WR_SGL_ELEMENTS 1 59 #define IBMF_MGMT_Q_KEY 0x80010000 60 #define IBMF_P_KEY_DEF_FULL 0xFFFF 61 #define IBMF_P_KEY_DEF_LIMITED 0x7FFF 62 #define IBMF_P_KEY_BASE_MASK 0x7FFF 63 #define IBMF_PKEY_MEMBERSHIP_MASK 0x8000 64 65 #define IBMF_TASKQ_1THREAD 1 66 #define IBMF_TASKQ_NTHREADS 128 67 68 /* 69 * Work request ID format used for receive requests. 70 * 71 * bit 0 set to 1 72 */ 73 #define IBMF_RCV_CQE 0x1 74 75 /* 76 * Convenience macro used in the RMPP protocol to obtain R_Method field 77 * of MAD header with Response bit flipped. 78 */ 79 #define IBMF_FLIP_RESP_BIT(r_method) \ 80 (((r_method & 0x80) ^ 0x80) | (r_method & 0x7F)) 81 82 /* Work Request ID macros */ 83 #define IBMF_IS_RECV_WR_ID(id) \ 84 (((uint64_t)(id) & IBMF_RCV_CQE) ? B_TRUE : B_FALSE) 85 #define IBMF_IS_SEND_WR_ID(id) \ 86 (!(IBMF_IS_RECV_WR_ID((id)))) 87 88 /* Decrement IBMF message reference count */ 89 #define IBMF_MSG_DECR_REFCNT(msg) { \ 90 ASSERT(MUTEX_HELD(&(msg)->im_mutex)); \ 91 (msg)->im_ref_count--; \ 92 } 93 94 /* Increment IBMF message reference count */ 95 #define IBMF_MSG_INCR_REFCNT(msg) \ 96 (msg)->im_ref_count++; 97 98 /* Callback setup/cleanup macros */ 99 #define IBMF_RECV_CB_SETUP(clp) { \ 100 ASSERT(MUTEX_HELD(&(clp)->ic_mutex)); \ 101 (clp)->ic_flags |= IBMF_CLIENT_RECV_CB_ACTIVE; \ 102 (clp)->ic_recvs_active++; \ 103 mutex_enter(&(clp)->ic_kstat_mutex); \ 104 IBMF_ADD32_KSTATS((clp), recvs_active, 1); \ 105 mutex_exit(&(clp)->ic_kstat_mutex); \ 106 } 107 108 #define IBMF_RECV_CB_CLEANUP(clp) { \ 109 ASSERT(MUTEX_HELD(&(clp)->ic_mutex)); \ 110 (clp)->ic_recvs_active--; \ 111 mutex_enter(&(clp)->ic_kstat_mutex); \ 112 IBMF_SUB32_KSTATS((clp), recvs_active, 1); \ 113 mutex_exit(&(clp)->ic_kstat_mutex); \ 114 if ((clp)->ic_recvs_active == 0) \ 115 (clp)->ic_flags &= ~IBMF_CLIENT_RECV_CB_ACTIVE; \ 116 if ((((clp)->ic_flags & IBMF_CLIENT_RECV_CB_ACTIVE) == 0) && \ 117 (((clp)->ic_flags & IBMF_CLIENT_TEAR_DOWN_CB) != 0)) \ 118 cv_signal(&(clp)->ic_recv_cb_teardown_cv); \ 119 } 120 121 #define IBMF_ALT_RECV_CB_SETUP(altqp) { \ 122 ASSERT(MUTEX_HELD(&(altqp)->isq_mutex)); \ 123 (altqp)->isq_flags |= IBMF_CLIENT_RECV_CB_ACTIVE; \ 124 (altqp)->isq_recvs_active++; \ 125 mutex_enter(&(altqp)->isq_client_hdl->ic_kstat_mutex); \ 126 IBMF_ADD32_KSTATS((altqp)->isq_client_hdl, recvs_active, 1); \ 127 mutex_exit(&(altqp)->isq_client_hdl->ic_kstat_mutex); \ 128 } 129 130 #define IBMF_ALT_RECV_CB_CLEANUP(altqp) { \ 131 ASSERT(MUTEX_HELD(&(altqp)->isq_mutex)); \ 132 (altqp)->isq_recvs_active--; \ 133 mutex_enter(&(altqp)->isq_client_hdl->ic_kstat_mutex); \ 134 IBMF_SUB32_KSTATS((altqp)->isq_client_hdl, recvs_active, 1); \ 135 mutex_exit(&(altqp)->isq_client_hdl->ic_kstat_mutex); \ 136 if ((altqp)->isq_recvs_active == 0) \ 137 (altqp)->isq_flags &= ~IBMF_CLIENT_RECV_CB_ACTIVE; \ 138 if ((((altqp)->isq_flags & IBMF_CLIENT_RECV_CB_ACTIVE) == 0) && \ 139 (((altqp)->isq_flags & IBMF_CLIENT_TEAR_DOWN_CB) != 0)) \ 140 cv_signal(&(altqp)->isq_recv_cb_teardown_cv); \ 141 } 142 143 /* warlock annotations for ibmf.h and ibmf_msg.h structures */ 144 _NOTE(READ_ONLY_DATA(_ibmf_msg::im_msgbufs_send.im_bufs_cl_data 145 _ibmf_msg::im_msgbufs_send.im_bufs_cl_data_len 146 _ibmf_msg::im_msgbufs_send.im_bufs_cl_hdr 147 _ibmf_msg::im_msgbufs_send.im_bufs_cl_hdr_len 148 _ibmf_msg::im_msgbufs_send.im_bufs_mad_hdr 149 _ib_mad_hdr_t)) 150 151 /* 152 * WQE pool management contexts 153 */ 154 typedef struct _ibmf_wqe_mgt { 155 struct _ibmf_wqe_mgt *wqe_mgt_next; /* next wqe management entry */ 156 void *wqes_kmem; /* kmem allocated for WQEs */ 157 uint64_t wqes_kmem_sz; /* sizeof WQE kmem allocated */ 158 ib_vaddr_t wqes_ib_mem; /* Registered memory */ 159 ibt_lkey_t wqes_ib_lkey; /* Lkey that goes with it */ 160 ibt_mr_hdl_t wqes_ib_mem_hdl; /* IB mem handle */ 161 kmutex_t wqes_mutex; /* WQE mgt context mutex */ 162 } ibmf_wqe_mgt_t; 163 _NOTE(MUTEX_PROTECTS_DATA(ibmf_wqe_mgt_t::wqes_mutex, 164 ibmf_wqe_mgt_t::wqes_kmem 165 ibmf_wqe_mgt_t::wqes_kmem_sz 166 ibmf_wqe_mgt_t::wqes_ib_mem 167 ibmf_wqe_mgt_t::wqes_ib_lkey 168 ibmf_wqe_mgt_t::wqes_ib_mem_hdl)) 169 170 /* 171 * structure used to keep track of qp handles 172 */ 173 typedef struct _ibmf_qp_t { 174 struct _ibmf_qp_t *iq_next; /* next in the list */ 175 ibt_qp_hdl_t iq_qp_handle; /* qp handle from IB xport */ 176 int iq_port_num; /* port num for this qp */ 177 int iq_qp_num; /* qp num */ 178 int iq_qp_ref; /* no. of clients using this */ 179 uint_t iq_flags; /* for implementing state m/c */ 180 uint_t iq_rwqes_posted; /* posted receive wqes */ 181 kmutex_t iq_mutex; /* mutex for some fields */ 182 } ibmf_qp_t; 183 _NOTE(READ_ONLY_DATA(ibmf_qp_t::iq_port_num ibmf_qp_t::iq_qp_handle)) 184 _NOTE(MUTEX_PROTECTS_DATA(ibmf_qp_t::iq_mutex, 185 ibmf_qp_t::iq_rwqes_posted)) 186 187 /* defines for iq_flags */ 188 #define IBMF_QP_FLAGS_INVALID 0x0001 189 #define IBMF_QP_FLAGS_INITING 0x0002 190 #define IBMF_QP_FLAGS_INITED 0x0004 191 #define IBMF_QP_FLAGS_UNINITING 0x0008 192 193 /* 194 * structure used to keep track of qp handles for qps other than 195 * the special qps 196 */ 197 typedef struct _ibmf_alt_qp_t { 198 struct _ibmf_alt_qp_t *isq_next; /* next qp ctx on list */ 199 ibt_qp_hdl_t isq_qp_handle; /* qp handle from IB xport */ 200 ibt_chan_sizes_t isq_qp_sizes; /* qp sizes returned by alloc */ 201 struct _ibmf_client *isq_client_hdl; /* associated client handle */ 202 ibmf_msg_cb_t isq_recv_cb; /* recv callback for this qp */ 203 void *isq_recv_cb_arg; /* arg for recv cb */ 204 kcondvar_t isq_recv_cb_teardown_cv; /* wait on teardown */ 205 kmutex_t isq_mutex; /* qp context mutex */ 206 int isq_flags; /* to keep track of state */ 207 int isq_sends_active; /* outstanding sends */ 208 int isq_recvs_active; /* outstanding recvs */ 209 ib_qpn_t isq_qpn; /* qp number */ 210 ib_pkey_t isq_pkey; /* qp's partition key */ 211 ib_qkey_t isq_qkey; /* qp's queue keye */ 212 int isq_port_num; /* port num for this qp */ 213 boolean_t isq_supports_rmpp; /* qp supports rmpp */ 214 kcondvar_t isq_sqd_cv; /* wait on SQD event */ 215 int isq_wqes_alloced; /* wqes allocated for QP */ 216 kcondvar_t isq_wqes_cv; /* wait on wqes destruction */ 217 uint_t isq_rwqes_posted; /* posted receive wqes */ 218 219 /* Manage Send/Receive WQEs for Special QPs */ 220 struct kmem_cache *isq_send_wqes_cache; /* Send WQE cache */ 221 struct kmem_cache *isq_recv_wqes_cache; /* Receive WQE cache */ 222 vmem_t *isq_wqe_ib_vmem; /* IB virtual address arena */ 223 kmutex_t isq_wqe_mutex; /* WQE management list mutex */ 224 ibmf_wqe_mgt_t *isq_wqe_mgt_list; /* WQE management list */ 225 } ibmf_alt_qp_t; 226 _NOTE(MUTEX_PROTECTS_DATA(ibmf_alt_qp_t::isq_mutex, 227 ibmf_alt_qp_t::isq_sends_active 228 ibmf_alt_qp_t::isq_recvs_active 229 ibmf_alt_qp_t::isq_pkey 230 ibmf_alt_qp_t::isq_qkey 231 ibmf_alt_qp_t::isq_recv_cb 232 ibmf_alt_qp_t::isq_recv_cb_arg 233 ibmf_alt_qp_t::isq_flags 234 ibmf_alt_qp_t::isq_rwqes_posted)) 235 _NOTE(MUTEX_PROTECTS_DATA(ibmf_alt_qp_t::isq_wqe_mutex, 236 ibmf_alt_qp_t::isq_wqe_mgt_list)) 237 _NOTE(READ_ONLY_DATA(ibmf_alt_qp_t::isq_port_num)) 238 239 #define IBMF_MSG_FLAGS_QUEUED 0x00001000 /* in the ib xport */ 240 #define IBMF_MSG_FLAGS_DONE 0x00002000 /* xport done */ 241 #define IBMF_MSG_FLAGS_BLOCKING 0x00004000 /* sync command */ 242 243 /* 244 * This structure is used to keep track of IBT returned ibt_ud_dest_t 245 * structures. 246 */ 247 typedef struct ibmf_ud_dest_s { 248 ibt_ud_dest_t ud_dest; 249 struct ibmf_ud_dest_s *ud_next; 250 } ibmf_ud_dest_t; 251 252 /* 253 * ibmf_msg_impl definition 254 * The IBMF client initializes various members of the msg while sending 255 * the message. IBMF fills in the various members of the msg when a message 256 * is received. 257 */ 258 typedef struct _ibmf_msg_impl { 259 ibmf_addr_info_t im_local_addr; /* local addressing info */ 260 ibmf_global_addr_info_t im_global_addr; /* global addressing info */ 261 int32_t im_msg_status; /* completion status */ 262 uint32_t im_msg_flags; /* flags */ 263 size_t im_msg_sz_limit; /* max. message size */ 264 ibmf_msg_bufs_t im_msgbufs_send; /* input data to ibmf */ 265 ibmf_msg_bufs_t im_msgbufs_recv; /* output data from ibmf */ 266 struct _ibmf_msg_impl *im_msg_next; /* next message on the list */ 267 struct _ibmf_msg_impl *im_msg_prev; /* prev message on the list */ 268 void *im_client; /* client that allocd the pkt */ 269 ibmf_qp_handle_t im_qp_hdl; /* qp handle */ 270 ibt_ud_dest_t *im_ud_dest; /* ptr to the pkt's ud_dest */ 271 ibmf_ud_dest_t *im_ibmf_ud_dest; /* ptr to the pkt's ud_dest */ 272 ibmf_msg_cb_t im_trans_cb; /* transaction completion cb */ 273 void *im_trans_cb_arg; /* arg for completion cb */ 274 uint64_t im_tid; /* transaction ID */ 275 uint8_t im_mgt_class; /* management class */ 276 kmutex_t im_mutex; /* protects trans context */ 277 uint32_t im_state; /* message state */ 278 uint32_t im_transp_op_flags; /* transaction operation */ 279 uint32_t im_flags; /* message flags */ 280 uint32_t im_trans_state_flags; /* state flags */ 281 kcondvar_t im_trans_cv; /* wait for op completion */ 282 ibmf_rmpp_ctx_t im_rmpp_ctx; /* RMPP context */ 283 ibmf_retrans_t im_retrans; /* retransmission info */ 284 timeout_id_t im_rp_timeout_id; /* response timeout ID */ 285 timeout_id_t im_tr_timeout_id; /* transaction timeout ID */ 286 timeout_id_t im_rp_unset_timeout_id; /* id for untimeout() */ 287 timeout_id_t im_tr_unset_timeout_id; /* id for untimeout() */ 288 int im_ref_count; /* reference count */ 289 boolean_t im_unsolicited; /* msg was unsolicited recv */ 290 int im_pending_send_compls; /* send completions */ 291 } ibmf_msg_impl_t; 292 _NOTE(READ_ONLY_DATA(ibmf_msg_impl_t::im_trans_cb 293 ibmf_msg_impl_t::im_trans_cb_arg 294 ibmf_msg_impl_t::im_transp_op_flags 295 ibmf_msg_impl_t::im_local_addr 296 ibmf_msg_impl_t::im_unsolicited 297 ibmf_msg_impl_t::im_client)) 298 _NOTE(MUTEX_PROTECTS_DATA(ibmf_msg_impl_t::im_mutex, 299 ibmf_msg_impl_t::im_flags 300 ibmf_msg_impl_t::im_trans_state_flags 301 ibmf_msg_impl_t::im_msgbufs_recv 302 ibmf_msg_impl_t::im_msg_status 303 ibmf_msg_impl_t::im_rmpp_ctx)) 304 305 /* im_flags */ 306 #define IBMF_MSG_FLAGS_SEQUENCED 0x1 307 #define IBMF_MSG_FLAGS_SEND_RMPP 0x2 308 #define IBMF_MSG_FLAGS_RECV_RMPP 0x4 309 #define IBMF_MSG_FLAGS_NOT_RMPP 0x8 310 #define IBMF_MSG_FLAGS_BUSY 0x10 311 #define IBMF_MSG_FLAGS_FREE 0x20 312 #define IBMF_MSG_FLAGS_ON_LIST 0x40 313 #define IBMF_MSG_FLAGS_SET_TERMINATION 0x80 314 #define IBMF_MSG_FLAGS_TERMINATION 0x100 315 316 /* retransmission parameter defaults for im_retrans field */ 317 #define IBMF_RETRANS_DEF_RTV 4000000 /* 4 seconds */ 318 #define IBMF_RETRANS_DEF_RTTV 100000 /* 100 milliseconds */ 319 #define IBMF_RETRANS_DEF_TRANS_TO 40000000 /* 40 seconds */ 320 #define IBMF_RETRANS_DEF_RETRIES 0 321 322 /* 323 * Transaction state flags (im_trans_state_flags) definitions 324 * Don't use 0x0 as a flag value since clients OR and AND the flags 325 */ 326 #define IBMF_TRANS_STATE_FLAG_UNINIT 0x1 327 #define IBMF_TRANS_STATE_FLAG_INIT 0x2 328 #define IBMF_TRANS_STATE_FLAG_WAIT 0x4 329 #define IBMF_TRANS_STATE_FLAG_DONE 0x8 330 #define IBMF_TRANS_STATE_FLAG_SIGNALED 0x10 331 #define IBMF_TRANS_STATE_FLAG_TIMEOUT 0x20 332 #define IBMF_TRANS_STATE_FLAG_RECV_ACTIVE 0x40 333 #define IBMF_TRANS_STATE_FLAG_RECV_DONE 0x80 334 #define IBMF_TRANS_STATE_FLAG_SEND_DONE 0x100 335 336 /* Timer types */ 337 typedef enum _ibmf_timer_t { 338 IBMF_RESP_TIMER = 1, 339 IBMF_TRANS_TIMER = 2 340 } ibmf_timer_t; 341 342 /* 343 * structure to hold specific client info taken from ibmf_register_info_t 344 * since we can register for more than one client at a time, but each specific 345 * ibmf_client_t only holds one client itself. 346 */ 347 typedef struct _ibmf_client_info { 348 ib_guid_t ci_guid; 349 uint_t port_num; 350 ibmf_client_type_t client_class; 351 } ibmf_client_info_t; 352 353 /* 354 * Defines for the client type (agent/manager/agent+manager) 355 * Bits 16-19 of the client_class specify the client type. 356 */ 357 #define IBMF_AGENT_ID 0x00010000 358 #define IBMF_MANAGER_ID 0x00020000 359 #define IBMF_AGENT_MANAGER_ID 0x00030000 360 361 /* 362 * structure used to keep track of clients 363 */ 364 typedef struct _ibmf_client { 365 void *ic_client_sig; /* set for valid handles */ 366 struct _ibmf_ci *ic_myci; /* pointer to CI */ 367 struct _ibmf_client *ic_next; /* next client on list */ 368 struct _ibmf_client *ic_prev; /* previous client on list */ 369 370 taskq_t *ic_send_taskq; /* taskq for send cb */ 371 taskq_t *ic_recv_taskq; /* taskq for receive cb */ 372 uint_t ic_init_state_class; /* taskq initialization */ 373 374 ibmf_msg_impl_t *ic_msg_list; /* protected by ic_mutex */ 375 ibmf_msg_impl_t *ic_msg_last; /* last message on list */ 376 ibmf_msg_impl_t *ic_term_msg_list; /* termination loop mesgs */ 377 ibmf_msg_impl_t *ic_term_msg_last; /* last message on list */ 378 kmutex_t ic_msg_mutex; /* protect the message list */ 379 380 /* IBTL asynchronous event callback (eg. HCA offline) */ 381 ibmf_async_event_cb_t ic_async_cb; /* async/unsolicited handling */ 382 void *ic_async_cb_arg; /* args for async cb */ 383 384 /* Asynchronous/Unsolicited message handler */ 385 ibmf_msg_cb_t ic_recv_cb; 386 void *ic_recv_cb_arg; 387 kcondvar_t ic_recv_cb_teardown_cv; /* wait on teardown */ 388 389 ibmf_client_info_t ic_client_info; /* client registration info */ 390 ibmf_qp_t *ic_qp; /* special qp context */ 391 ibt_hca_hdl_t ic_ci_handle; /* == ic_myci->ic_ci_handle */ 392 kmutex_t ic_mutex; /* prot the client struct */ 393 int ic_flags; /* to keep track of state */ 394 int ic_reg_flags; /* flags specified during */ 395 /* registration */ 396 397 /* Statistics */ 398 int ic_msgs_alloced; /* no. msgs alloced by/for */ 399 int ic_msgs_active; /* no. msgs active */ 400 int ic_trans_active; /* outstanding transacts */ 401 int ic_sends_active; /* outstanding sends */ 402 int ic_recvs_active; /* outstanding recvs */ 403 404 ib_lid_t ic_base_lid; /* used to calculate pathbits */ 405 kmutex_t ic_kstat_mutex; /* protect the kstat */ 406 struct kstat *ic_kstatp; /* kstats for client */ 407 } ibmf_client_t; 408 _NOTE(READ_ONLY_DATA(ibmf_client_t::ic_ci_handle 409 ibmf_client_t::ic_client_info 410 ibmf_client_t::ic_client_sig)) 411 _NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_msg_mutex, 412 ibmf_client_t::ic_msg_list 413 ibmf_client_t::ic_msg_last 414 ibmf_client_t::ic_term_msg_list 415 ibmf_client_t::ic_term_msg_last)) 416 _NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_mutex, 417 ibmf_client_t::ic_msgs_alloced 418 ibmf_client_t::ic_flags 419 ibmf_client_t::ic_recv_cb 420 ibmf_client_t::ic_recv_cb_arg)) 421 _NOTE(MUTEX_PROTECTS_DATA(ibmf_client_t::ic_kstat_mutex, 422 ibmf_client_t::ic_kstatp)) 423 424 #define IBMF_CLIENT_RECV_CB_ACTIVE 0x00000001 /* rcv CB active */ 425 #define IBMF_CLIENT_SEND_CB_ACTIVE 0x00000010 /* send CB active */ 426 #define IBMF_CLIENT_TEAR_DOWN_CB 0x00000100 /* client wants to */ 427 /* remove recv_cb */ 428 429 /* IBMF_MAD_ONLY is used by the alternate QP context only (isq_flags) */ 430 #define IBMF_MAD_ONLY 0x00002000 431 #define IBMF_RAW_ONLY 0x00004000 432 433 #define IBMF_REG_MSG_LIST 0 434 #define IBMF_TERM_MSG_LIST 1 435 436 /* 437 * Send WQE context 438 */ 439 typedef struct _ibmf_send_wqe { 440 struct _ibmf_send_wqe *send_wqe_next; 441 ibt_send_wr_t send_wr; /* IBT send work request */ 442 ibmf_client_t *send_client; /* client that sent this */ 443 void *send_mem; /* memory used in send */ 444 ib_vaddr_t send_sg_mem; /* registered memory */ 445 ibt_lkey_t send_sg_lkey; /* Lkey that goes with it */ 446 ibt_mr_hdl_t send_mem_hdl; /* == ci_send_mr_handle in ci */ 447 uint_t send_wqe_flags; 448 uchar_t send_port_num; /* port this is posted to */ 449 ibt_qp_hdl_t send_qp_handle; /* qp handle for this wqe */ 450 ibmf_qp_handle_t send_ibmf_qp_handle; /* ibmf qp handle */ 451 ibmf_msg_impl_t *send_msg; /* message context */ 452 uint32_t send_status; /* completion status */ 453 uint32_t send_rmpp_segment; /* rmpp segment */ 454 } ibmf_send_wqe_t; 455 456 /* 457 * Receive WQE context 458 */ 459 typedef struct _ibmf_recv_wqe { 460 struct _ibmf_recv_wqe *recv_wqe_next; 461 ibt_recv_wr_t recv_wr; 462 ibmf_client_t *recv_client; /* client that received this */ 463 void *recv_mem; /* memory used in WQEs */ 464 ibmf_qp_t *recv_qpp; /* qp this is posted */ 465 ibt_wc_t recv_wc; /* corresponding cqe */ 466 ib_vaddr_t recv_sg_mem; /* registered mem */ 467 ibt_lkey_t recv_sg_lkey; /* Lkey that goes with it */ 468 ibt_mr_hdl_t recv_mem_hdl; /* == ci_recv_mr_handle in ci */ 469 uint_t recv_wqe_flags; 470 uchar_t recv_port_num; /* port this is posted to */ 471 ibt_qp_hdl_t recv_qp_handle; /* ibt qp handle for this wqe */ 472 ibmf_qp_handle_t recv_ibmf_qp_handle; /* ibmf qp handle */ 473 ibmf_msg_impl_t *recv_msg; /* message context */ 474 } ibmf_recv_wqe_t; 475 476 #define IBMF_RECV_WQE_FREE 0x00000001 /* WQE is free */ 477 478 /* 479 * Struct that keeps track of the underlying IB channel interface. There 480 * is one per CI. Each clients on a given ci gets a reference to the CI. 481 * References are tracked used ci_ref field; when ci_ref drops to 0, the 482 * structure can be freed. 483 */ 484 typedef struct _ibmf_ci { 485 struct _ibmf_ci *ci_next; 486 kmutex_t ci_mutex; /* protects the CI struct */ 487 ibmf_client_t *ci_clients; /* list of clients;head */ 488 ibmf_client_t *ci_clients_last; /* tail */ 489 kmutex_t ci_clients_mutex; /* protect the client list */ 490 ib_guid_t ci_node_guid; /* node GUID */ 491 ibt_hca_hdl_t ci_ci_handle; /* HCA handle */ 492 ibt_pd_hdl_t ci_pd; /* protection domain */ 493 ibmf_qp_t *ci_qp_list; /* sp. QP list for all ports */ 494 ibmf_qp_t *ci_qp_list_tail; 495 kcondvar_t ci_qp_cv; /* wait for QP valid state */ 496 ibt_cq_hdl_t ci_cq_handle; /* CQ handle for sp. QPs */ 497 ibt_cq_hdl_t ci_alt_cq_handle; /* CQ handle for alt. QPs */ 498 ibmf_alt_qp_t *ci_alt_qp_list; /* alternate QP list */ 499 500 /* UD destination resources */ 501 uint32_t ci_ud_dest_list_count; /* resources in pool */ 502 kmutex_t ci_ud_dest_list_mutex; /* UD dest list mutex */ 503 ibmf_ud_dest_t *ci_ud_dest_list_head; /* start of list */ 504 505 /* Send/Receive WQEs for Special QPs */ 506 struct kmem_cache *ci_send_wqes_cache; /* Send WQE cache */ 507 struct kmem_cache *ci_recv_wqes_cache; /* Receive WQE cache */ 508 vmem_t *ci_wqe_ib_vmem; /* IB virtual address arena */ 509 kmutex_t ci_wqe_mutex; /* WQE management list mutex */ 510 ibmf_wqe_mgt_t *ci_wqe_mgt_list; /* WQE management list */ 511 512 uint_t ci_nports; /* num ports on the CI */ 513 uint32_t ci_vendor_id:24; /* HCA vendor ID */ 514 uint16_t ci_device_id; /* HCA device ID */ 515 uint_t ci_ref; /* reference count */ 516 uint16_t ci_state; /* CI context state */ 517 uint16_t ci_state_flags; /* CI context state flags */ 518 kcondvar_t ci_state_cv; /* wait on a state change */ 519 uint_t ci_init_state; /* used in cleanup */ 520 521 /* free QP synchronization with WQE completion processing */ 522 int ci_wqes_alloced; /* wqes alloced for sp QPs */ 523 kcondvar_t ci_wqes_cv; /* wait on wqes destruction */ 524 525 /* port kstats */ 526 struct kstat *ci_port_kstatp; /* kstats for client */ 527 } ibmf_ci_t; 528 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_ud_dest_list_mutex, 529 ibmf_ci_t::ci_ud_dest_list_count 530 ibmf_ci_t::ci_ud_dest_list_head)) 531 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_mutex, 532 ibmf_ci_t::ci_state 533 ibmf_ci_t::ci_port_kstatp)) 534 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_clients_mutex, 535 ibmf_ci_t::ci_clients 536 ibmf_ci_t::ci_clients_last)) 537 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_mutex, 538 ibmf_qp_t::iq_next 539 ibmf_qp_t::iq_flags)) 540 _NOTE(MUTEX_PROTECTS_DATA(ibmf_ci_t::ci_wqe_mutex, 541 ibmf_ci_t::ci_wqe_mgt_list)) 542 _NOTE(READ_ONLY_DATA(ibmf_ci_t::ci_cq_handle)) 543 544 #define IBMF_CI_BLOCKED_ON_SEND_WQE 0x00000001 /* blockers on wqe */ 545 546 /* defines for ci_init_state */ 547 #define IBMF_CI_INIT_HCA_INITED 0x0001 548 #define IBMF_CI_INIT_MUTEX_CV_INITED 0x0002 549 #define IBMF_CI_INIT_SEND_TASKQ_DONE 0x0004 550 #define IBMF_CI_INIT_RECV_TASKQ_DONE 0x0008 551 #define IBMF_CI_INIT_CQ_INITED 0x0010 552 #define IBMF_CI_INIT_WQES_ALLOCED 0x0020 553 #define IBMF_CI_INIT_HCA_LINKED 0x0040 554 #define IBMF_CI_INIT_QP_LIST_INITED 0x0080 555 556 /* defines for ci_state */ 557 #define IBMF_CI_STATE_PRESENT 0x0001 558 #define IBMF_CI_STATE_INITED 0x0002 559 #define IBMF_CI_STATE_GONE 0x0003 560 561 /* defines for ci_state_flags */ 562 #define IBMF_CI_STATE_INIT_WAIT 0x0001 563 #define IBMF_CI_STATE_UNINIT_WAIT 0x0002 564 #define IBMF_CI_STATE_VALIDATE_WAIT 0x0004 565 566 #define IBMF_CI_STATE_INVALIDATING 0x0100 567 #define IBMF_CI_STATE_VALIDATING 0x0200 568 #define IBMF_CI_STATE_UNINITING 0x0400 569 #define IBMF_CI_STATE_INITING 0x0800 570 571 /* 572 * for keeping track of ibmf state 573 */ 574 typedef struct _ibmf_state { 575 struct _ibmf_ci *ibmf_ci_list; 576 struct _ibmf_ci *ibmf_ci_list_tail; 577 ibt_clnt_hdl_t ibmf_ibt_handle; 578 ibt_cq_handler_t ibmf_cq_handler; 579 kmutex_t ibmf_mutex; 580 ibt_clnt_modinfo_t ibmf_ibt_modinfo; 581 taskq_t *ibmf_taskq; /* taskq for MAD processing */ 582 /* for classes not registered */ 583 } ibmf_state_t; 584 _NOTE(MUTEX_PROTECTS_DATA(ibmf_state_t::ibmf_mutex, 585 ibmf_ci_t::ci_next)) 586 587 /* UD Destination resource cache definitions */ 588 /* 589 * It is preferred that the difference between the hi and lo water 590 * marks be only a few ud_dest resources. The intent is that a 591 * thread that needs to run ibmf_i_populate_ud_dest_list() does not 592 * spend too much time in this ud_dest resource population process 593 * before it returns to its caller. A benefit of a higher lo water 594 * mark is that the larger available pool of resources supports high 595 * stress scenarios better. 596 */ 597 #define IBMF_UD_DEST_HI_WATER_MARK 512 598 #define IBMF_UD_DEST_LO_WATER_MARK 500 599 600 /* 601 * Prototypes 602 */ 603 /* ci related functions */ 604 int ibmf_i_validate_ci_guid_and_port(ib_guid_t hca_guid, uint8_t port_num); 605 int ibmf_i_get_ci(ibmf_register_info_t *client_infop, ibmf_ci_t **cipp); 606 void ibmf_i_release_ci(ibmf_ci_t *cip); 607 608 /* client related functions */ 609 int ibmf_i_validate_classes_and_port(ibmf_ci_t *ibmf_cip, 610 ibmf_register_info_t *client_infop); 611 int ibmf_i_validate_class_mask(ibmf_register_info_t *client_infop); 612 int ibmf_i_alloc_client(ibmf_register_info_t *client_infop, uint_t flags, 613 ibmf_client_t **clientpp); 614 void ibmf_i_add_client(ibmf_ci_t *ibmf_ci, ibmf_client_t *ibmf_clientp); 615 616 void ibmf_i_free_client(ibmf_client_t *clientp); 617 void ibmf_i_delete_client(ibmf_ci_t *ibmf_ci, ibmf_client_t *ibmf_clientp); 618 int ibmf_i_lookup_client_by_mgmt_class(ibmf_ci_t *ibmf_cip, int port_num, 619 ibmf_client_type_t class, ibmf_client_t **clientpp); 620 621 /* qp related functions */ 622 int ibmf_i_get_qp(ibmf_ci_t *ibmf_cip, uint_t port_num, 623 ibmf_client_type_t class, ibmf_qp_t **qppp); 624 void ibmf_i_release_qp(ibmf_ci_t *ibmf_cip, ibmf_qp_t **qpp); 625 int ibmf_i_alloc_qp(ibmf_client_t *clientp, ib_pkey_t p_key, 626 ib_qkey_t q_key, uint_t flags, ibmf_qp_handle_t *ibmf_qp_handlep); 627 int ibmf_i_free_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags); 628 int ibmf_i_query_qp(ibmf_qp_handle_t ibmf_qp_handle, uint_t flags, 629 uint_t *qp_nump, ib_pkey_t *p_keyp, ib_qkey_t *q_keyp, uint8_t *portnump); 630 int ibmf_i_modify_qp(ibmf_qp_handle_t ibmf_qp_handle, ib_pkey_t p_key, 631 ib_qkey_t q_key, uint_t flags); 632 int ibmf_i_get_pkeyix(ibt_hca_hdl_t hca_handle, ib_pkey_t pkey, 633 uint8_t port, ib_pkey_t *pkeyixp); 634 int ibmf_i_pkey_ix_to_key(ibmf_ci_t *cip, uint_t port_num, uint_t pkey_ix, 635 ib_pkey_t *pkeyp); 636 637 /* pkt related functions */ 638 int ibmf_i_issue_pkt(ibmf_client_t *clientp, ibmf_msg_impl_t *msgp, 639 ibmf_qp_handle_t ibmf_qp_handle, ibmf_send_wqe_t *send_wqep); 640 int ibmf_i_alloc_ud_dest(ibmf_client_t *clientp, 641 ibmf_msg_impl_t *msgimplp, ibt_ud_dest_hdl_t *ud_dest_p, boolean_t block); 642 void ibmf_i_free_ud_dest(ibmf_client_t *clientp, 643 ibmf_msg_impl_t *msgimplp); 644 void ibmf_i_init_ud_dest(ibmf_ci_t *cip); 645 void ibmf_i_fini_ud_dest(ibmf_ci_t *cip); 646 ibmf_ud_dest_t *ibmf_i_get_ud_dest(ibmf_ci_t *cip); 647 void ibmf_i_put_ud_dest(ibmf_ci_t *cip, ibmf_ud_dest_t *ud_dest); 648 void ibmf_i_pop_ud_dest_thread(void *argp); 649 void ibmf_i_clean_ud_dest_list(ibmf_ci_t *cip, boolean_t all); 650 int ibmf_i_alloc_send_resources(ibmf_ci_t *cip, ibmf_msg_impl_t *msgp, 651 boolean_t block, ibmf_send_wqe_t **swqepp); 652 void ibmf_i_free_send_resources(ibmf_ci_t *cip, ibmf_msg_impl_t *msgimplp, 653 ibmf_send_wqe_t *swqep); 654 int ibmf_i_post_recv_buffer(ibmf_ci_t *cip, ibmf_qp_t *qpp, boolean_t block, 655 ibmf_qp_handle_t ibmf_qp_handle); 656 int ibmf_i_is_ibmf_handle_valid(ibmf_handle_t ibmf_handle); 657 int ibmf_i_is_qp_handle_valid(ibmf_handle_t ibmf_handle, 658 ibmf_qp_handle_t ibmf_qp_handle); 659 int ibmf_i_check_for_loopback(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t msgp, 660 void *msg_cb_args, ibmf_retrans_t *retrans, boolean_t *loopback); 661 int ibmf_i_ibt_to_ibmf_status(ibt_status_t ibt_status); 662 int ibmf_i_ibt_wc_to_ibmf_status(ibt_wc_status_t ibt_wc_status); 663 int ibmf_i_send_pkt(ibmf_client_t *clientp, ibmf_qp_handle_t ibmf_qp_handle, 664 ibmf_msg_impl_t *msgimplp, int block); 665 int ibmf_i_send_single_pkt(ibmf_client_t *clientp, 666 ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, int block); 667 668 /* WQE related functions */ 669 int ibmf_i_init_wqes(ibmf_ci_t *cip); 670 void ibmf_i_fini_wqes(ibmf_ci_t *cip); 671 void ibmf_i_init_send_wqe(ibmf_client_t *clientp, 672 ibmf_msg_impl_t *msgimplp, ibt_wr_ds_t *sglp, ibmf_send_wqe_t *wqep, 673 ibt_ud_dest_hdl_t ud_dest, ibt_qp_hdl_t ibt_qp_handle, 674 ibmf_qp_handle_t ibmf_qp_handle); 675 void ibmf_i_init_recv_wqe(ibmf_qp_t *qpp, ibt_wr_ds_t *sglp, 676 ibmf_recv_wqe_t *wqep, ibt_qp_hdl_t ibt_qp_handle, 677 ibmf_qp_handle_t ibmf_qp_handle); 678 void ibmf_i_mad_completions(ibt_cq_hdl_t cq_handle, void *arg); 679 #ifdef DEBUG 680 void ibmf_i_dump_wcp(ibmf_ci_t *cip, ibt_wc_t *wcp, ibmf_recv_wqe_t *recv_wqep); 681 #endif 682 683 void ibmf_ibt_async_handler(void *clnt_private, ibt_hca_hdl_t hca_hdl, 684 ibt_async_code_t code, ibt_async_event_t *event); 685 686 /* msg related functions */ 687 void ibmf_i_init_msg(ibmf_msg_impl_t *msgimplp, ibmf_msg_cb_t trans_cb, 688 void *trans_cb_arg, ibmf_retrans_t *retrans, boolean_t block); 689 void ibmf_i_client_add_msg(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp); 690 void ibmf_i_client_rem_msg(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp, 691 uint_t *refcnt); 692 int ibmf_i_alloc_msg(ibmf_client_t *clientp, ibmf_msg_impl_t **msgp, 693 int km_flags); 694 void ibmf_i_free_msg(ibmf_msg_impl_t *msgimplp); 695 int ibmf_i_msg_transport(ibmf_client_t *clientp, 696 ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, int blocking); 697 void ibmf_i_decrement_ref_count(ibmf_msg_impl_t *msgimplp); 698 void ibmf_i_handle_send_completion(ibmf_ci_t *cip, ibt_wc_t *wcp); 699 void ibmf_i_handle_recv_completion(ibmf_ci_t *cip, ibt_wc_t *wcp); 700 int ibmf_setup_recvbuf_on_error(ibmf_msg_impl_t *msgimplp, uchar_t *mad); 701 702 /* transaction related functions */ 703 void ibmf_i_terminate_transaction(ibmf_client_t *clientp, 704 ibmf_msg_impl_t *msgimplp, uint32_t status); 705 void ibmf_i_notify_client(ibmf_msg_impl_t *msgimplp); 706 void ibmf_i_notify_sequence(ibmf_client_t *clientp, ibmf_msg_impl_t *msgimplp, 707 int msg_flags); 708 709 /* timer related functions */ 710 void ibmf_i_set_timer(void (*func)(void *), ibmf_msg_impl_t *msgimplp, 711 ibmf_timer_t type); 712 void ibmf_i_unset_timer(ibmf_msg_impl_t *msgimplp, ibmf_timer_t type); 713 void ibmf_i_recv_timeout(void *argp); 714 void ibmf_i_send_timeout(void *argp); 715 void ibmf_i_err_terminate_timeout(void *msgp); 716 717 /* rmpp related functions */ 718 boolean_t ibmf_i_find_msg_client(ibmf_client_t *cl, ibmf_msg_impl_t *msgimplp, 719 boolean_t inc_refcnt); 720 boolean_t ibmf_i_is_rmpp(ibmf_client_t *clientp, 721 ibmf_qp_handle_t ibmf_qp_handle); 722 void ibmf_i_mgt_class_to_hdr_sz_off(uint32_t mgt_class, uint32_t *szp, 723 uint32_t *offp); 724 ibmf_msg_impl_t *ibmf_i_find_msg(ibmf_client_t *clientp, uint64_t tid, 725 uint8_t mgt_class, uint8_t r_method, ib_lid_t lid, ib_gid_t *gid, 726 boolean_t gid_pr, ibmf_rmpp_hdr_t *rmpp_hdr, boolean_t msg_list); 727 #ifdef NOTDEF 728 ibmf_msg_impl_t *ibmf_i_find_term_msg(ibmf_client_t *clientp, uint64_t tid, 729 uint8_t mgt_class, ib_lid_t lid, ib_gid_t *gid, boolean_t gid_pr, 730 ibmf_rmpp_hdr_t *rmpp_hd); 731 #endif 732 void ibmf_i_handle_rmpp(ibmf_client_t *clientp, ibmf_qp_handle_t qp_hdl, 733 ibmf_msg_impl_t *msgimpl, uchar_t *madp); 734 int ibmf_i_send_rmpp(ibmf_msg_impl_t *msgimplp, uint8_t rmpp_type, 735 uint8_t rmpp_status, uint32_t segno, uint32_t nwl, int block); 736 int ibmf_i_send_rmpp_pkts(ibmf_client_t *clientp, 737 ibmf_qp_handle_t ibmf_qp_handle, ibmf_msg_impl_t *msgimplp, boolean_t isDS, 738 int block); 739 void ibmf_i_send_rmpp_window(ibmf_msg_impl_t *msgimplp, int block); 740 int ibmf_setup_term_ctx(ibmf_client_t *clientp, ibmf_msg_impl_t *regmsgimplp); 741 742 /* Alternate QP WQE cache functions */ 743 int ibmf_altqp_send_wqe_cache_constructor(void *buf, void *cdrarg, 744 int kmflags); 745 void ibmf_altqp_send_wqe_cache_destructor(void *buf, void *cdrarg); 746 int ibmf_altqp_recv_wqe_cache_constructor(void *buf, void *cdrarg, 747 int kmflags); 748 void ibmf_altqp_recv_wqe_cache_destructor(void *buf, void *cdrarg); 749 int ibmf_i_init_altqp_wqes(ibmf_alt_qp_t *qp_ctx); 750 void ibmf_i_fini_altqp_wqes(ibmf_alt_qp_t *qp_ctx); 751 int ibmf_i_extend_wqe_cache(ibmf_ci_t *cip, ibmf_qp_handle_t ibmf_qp_handle, 752 boolean_t block); 753 754 /* Receive callback functions */ 755 void ibmf_i_recv_cb_setup(ibmf_client_t *clientp); 756 void ibmf_i_recv_cb_cleanup(ibmf_client_t *clientp); 757 void ibmf_i_alt_recv_cb_setup(ibmf_alt_qp_t *qpp); 758 void ibmf_i_alt_recv_cb_cleanup(ibmf_alt_qp_t *qpp); 759 760 /* UD Dest population thread */ 761 int ibmf_ud_dest_tq_disp(ibmf_ci_t *cip); 762 763 #ifdef __cplusplus 764 } 765 #endif 766 767 #endif /* _SYS_IB_MGT_IBMF_IBMF_IMPL_H */ 768