1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2007, The Ohio State University. All rights reserved. 26 * 27 * Portions of this source code is developed by the team members of 28 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 29 * headed by Professor Dhabaleswar K. (DK) Panda. 30 * 31 * Acknowledgements to contributions from developors: 32 * Ranjit Noronha: noronha@cse.ohio-state.edu 33 * Lei Chai : chail@cse.ohio-state.edu 34 * Weikuan Yu : yuw@cse.ohio-state.edu 35 * 36 */ 37 38 39 #ifndef _IB_H 40 #define _IB_H 41 42 /* 43 * ib.h, rpcib plugin interface. 44 */ 45 46 #include <sys/types.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/conf.h> 50 #include <sys/stat.h> 51 #include <rpc/rpc.h> 52 #include <rpc/rpc_rdma.h> 53 #include <sys/ib/ibtl/ibti.h> 54 #include <sys/avl.h> 55 56 #ifdef __cplusplus 57 extern "C" { 58 #endif 59 60 #define MAX_BUFS 1024 /* max no. of buffers per pool */ 61 62 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 63 /* 64 * Tavor returns the next higher power of 2 65 * CQ entries than the requested size. 66 * For instance, if you request (2^12 - 1) 67 * CQ entries, Tavor returns 2^12 entries. 68 * 4K CQ entries suffice. Hence, 4096 - 1. 69 */ 70 #define DEF_SQ_SIZE 128 /* default SendQ size */ 71 #define DEF_RQ_SIZE 256 /* default RecvQ size */ 72 #define DSEG_MAX 2 73 #define RQ_DSEG_MAX 1 /* default RQ data seg */ 74 #define IBSRM_HB 0x8000 /* high order bit of pkey */ 75 76 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 77 #define REFRESH_ATTEMPTS 3 78 79 typedef struct rib_hca_s rib_hca_t; 80 typedef struct rib_qp_s rib_qp_t; 81 typedef struct rib_cq_s rib_cq_t; 82 83 /* 84 * Notification for RDMA_DONE is based on xid 85 */ 86 struct rdma_done_list { 87 uint32_t xid; /* XID waiting for RDMA_DONE */ 88 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 89 struct rdma_done_list *next; 90 struct rdma_done_list *prev; 91 }; 92 93 /* 94 * State of the plugin. 95 * ACCEPT = accepting new connections and requests 96 * NO_ACCEPT = not accepting new connection and requests 97 */ 98 #define ACCEPT 1 99 #define NO_ACCEPT 2 100 101 /* 102 * Send Wait states 103 */ 104 #define SEND_WAIT -1 105 106 /* 107 * Reply states 108 */ 109 #define REPLY_WAIT -1 110 111 typedef void * rib_pvoid; 112 typedef rib_pvoid RIB_SYNCMEM_HANDLE; 113 114 /* 115 * IB buffer pool management structure 116 */ 117 118 /* 119 * Buffer pool info 120 */ 121 typedef struct { 122 kmutex_t buflock; /* lock for this structure */ 123 caddr_t buf; /* pool address */ 124 uint32_t bufhandle; /* rkey for this pool */ 125 ulong_t bufsize; /* size of pool */ 126 int rsize; /* size of each element */ 127 int numelems; /* no. of elements allocated */ 128 int buffree; /* no. of free elements */ 129 void *buflist[1]; /* free elements in pool */ 130 } bufpool_t; 131 132 typedef struct { 133 bufpool_t *bpool; 134 ibt_mr_hdl_t *mr_hdl; 135 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 136 } rib_bufpool_t; 137 138 /* 139 * ATS relsted defines and structures. 140 */ 141 #define ATS_AR_DATA_LEN 16 142 143 144 /* 145 * Service types supported by RPCIB 146 * For now only NFS is supported. 147 */ 148 #define NFS 1 149 #define NLM 2 150 151 /* 152 * Tracks consumer state (client or server). 153 */ 154 typedef enum { 155 RIB_SERVER, 156 RIB_CLIENT 157 } rib_mode_t; 158 159 /* 160 * CQ structure 161 */ 162 struct rib_cq_s { 163 rib_hca_t *rib_hca; 164 ibt_cq_hdl_t rib_cq_hdl; 165 }; 166 167 /* 168 * Each registered service's data structure. 169 */ 170 typedef struct rib_service_s rib_service_t; 171 struct rib_service_s { 172 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 173 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 174 ib_svc_id_t srv_id; 175 rib_service_t *next; 176 }; 177 178 /* 179 * RPCIB plugin state 180 */ 181 typedef struct rpcib_state { 182 ibt_clnt_hdl_t ibt_clnt_hdl; 183 uint32_t hca_count; 184 uint32_t nhca_inited; 185 rib_hca_t *hcas_list; 186 krwlock_t hcas_list_lock; /* protects hcas_list */ 187 int refcount; 188 kmutex_t open_hca_lock; 189 queue_t *q; /* up queue for a serv_type */ 190 void *private; 191 rib_service_t *service_list; 192 krwlock_t service_list_lock; 193 kmutex_t listen_lock; 194 } rpcib_state_t; 195 196 /* 197 * Connection lists 198 */ 199 typedef struct { 200 krwlock_t conn_lock; /* list lock */ 201 CONN *conn_hd; /* list head */ 202 } rib_conn_list_t; 203 204 enum hca_state { 205 HCA_DETACHED, /* hca in detached state */ 206 HCA_INITED, /* hca in up and running state */ 207 }; 208 209 typedef struct rib_hca_service_s rib_hca_service_t; 210 struct rib_hca_service_s { 211 ib_svc_id_t srv_id; 212 ib_gid_t gid; 213 ibt_sbind_hdl_t sbind_hdl; 214 rib_hca_service_t *next; 215 }; 216 217 /* 218 * RPCIB per HCA structure 219 */ 220 struct rib_hca_s { 221 ibt_clnt_hdl_t ibt_clnt_hdl; 222 223 /* 224 * per HCA. 225 */ 226 ibt_hca_hdl_t hca_hdl; /* HCA handle */ 227 ibt_hca_attr_t hca_attrs; /* HCA attributes */ 228 ibt_pd_hdl_t pd_hdl; 229 rib_hca_service_t *bound_services; 230 krwlock_t bound_services_lock; 231 ib_guid_t hca_guid; 232 uint32_t hca_nports; 233 ibt_hca_portinfo_t *hca_ports; 234 size_t hca_pinfosz; 235 enum hca_state state; /* state of HCA */ 236 krwlock_t state_lock; /* protects state field */ 237 bool_t inuse; /* indicates HCA usage */ 238 kmutex_t inuse_lock; /* protects inuse field */ 239 240 rib_conn_list_t cl_conn_list; /* client conn list */ 241 rib_conn_list_t srv_conn_list; /* server conn list */ 242 243 rib_cq_t *clnt_scq; 244 rib_cq_t *clnt_rcq; 245 rib_cq_t *svc_scq; 246 rib_cq_t *svc_rcq; 247 kmutex_t cb_lock; 248 kcondvar_t cb_cv; 249 250 rib_bufpool_t *recv_pool; /* recv buf pool */ 251 rib_bufpool_t *send_pool; /* send buf pool */ 252 253 void *iblock; /* interrupt cookie */ 254 255 kmem_cache_t *server_side_cache; /* long reply pool */ 256 avl_tree_t avl_tree; 257 kmutex_t avl_lock; 258 krwlock_t avl_rw_lock; 259 volatile bool_t avl_init; 260 kmutex_t cache_allocation_lock; 261 ddi_taskq_t *cleanup_helper; 262 ib_svc_id_t srv_id; 263 ibt_srv_hdl_t srv_hdl; 264 uint_t reg_state; 265 266 volatile uint64_t cache_allocation; 267 uint64_t cache_hits; 268 uint64_t cache_misses; 269 uint64_t cache_cold_misses; 270 uint64_t cache_hot_misses; 271 uint64_t cache_misses_above_the_limit; 272 273 struct rib_hca_s *next; 274 }; 275 276 277 /* 278 * Structure on wait state of a post send 279 */ 280 struct send_wid { 281 uint32_t xid; 282 int cv_sig; 283 kmutex_t sendwait_lock; 284 kcondvar_t wait_cv; 285 uint_t status; 286 rib_qp_t *qp; 287 int nsbufs; /* # of send buffers posted */ 288 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 289 caddr_t c; 290 caddr_t c1; 291 int l1; 292 caddr_t c2; 293 int l2; 294 int wl, rl; 295 }; 296 297 /* 298 * Structure on reply descriptor for recv queue. 299 * Different from the above posting of a descriptor. 300 */ 301 struct reply { 302 uint32_t xid; 303 uint_t status; 304 uint64_t vaddr_cq; /* buf addr from CQ */ 305 uint_t bytes_xfer; 306 kcondvar_t wait_cv; 307 struct reply *next; 308 struct reply *prev; 309 }; 310 311 struct svc_recv { 312 rib_qp_t *qp; 313 uint64_t vaddr; 314 uint_t bytes_xfer; 315 }; 316 317 struct recv_wid { 318 uint32_t xid; 319 rib_qp_t *qp; 320 uint64_t addr; /* posted buf addr */ 321 }; 322 323 /* 324 * Per QP data structure 325 */ 326 struct rib_qp_s { 327 rib_hca_t *hca; 328 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 329 CONN rdmaconn; 330 ibt_channel_hdl_t qp_hdl; 331 uint_t port_num; 332 ib_qpn_t qpn; 333 int chan_flags; 334 clock_t timeout; 335 ibt_rc_chan_query_attr_t qp_q_attrs; 336 rib_cq_t *send_cq; /* send CQ */ 337 rib_cq_t *recv_cq; /* recv CQ */ 338 339 /* 340 * Number of pre-posted rbufs 341 */ 342 uint_t n_posted_rbufs; 343 kcondvar_t posted_rbufs_cv; 344 kmutex_t posted_rbufs_lock; 345 346 /* 347 * Number of SENDs pending completion 348 */ 349 350 uint_t n_send_rbufs; 351 kcondvar_t send_rbufs_cv; 352 kmutex_t send_rbufs_lock; 353 354 /* 355 * RPC reply 356 */ 357 uint_t rep_list_size; 358 struct reply *replylist; 359 kmutex_t replylist_lock; 360 361 /* 362 * server only, RDMA_DONE 363 */ 364 struct rdma_done_list *rdlist; 365 kmutex_t rdlist_lock; 366 367 kmutex_t cb_lock; 368 kcondvar_t cb_conn_cv; 369 370 caddr_t q; /* upstream queue */ 371 struct send_wid wd; 372 }; 373 374 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 375 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 376 377 /* 378 * Timeout for various calls 379 */ 380 #define CONN_WAIT_TIME 40 381 #define SEND_WAIT_TIME 40 /* time for send completion */ 382 383 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 384 385 #ifdef __cplusplus 386 } 387 #endif 388 389 #endif /* !_IB_H */ 390