1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2007, The Ohio State University. All rights reserved. 27 * 28 * Portions of this source code is developed by the team members of 29 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 30 * headed by Professor Dhabaleswar K. (DK) Panda. 31 * 32 * Acknowledgements to contributions from developors: 33 * Ranjit Noronha: noronha@cse.ohio-state.edu 34 * Lei Chai : chail@cse.ohio-state.edu 35 * Weikuan Yu : yuw@cse.ohio-state.edu 36 * 37 */ 38 39 40 #ifndef _IB_H 41 #define _IB_H 42 43 /* 44 * ib.h, rpcib plugin interface. 45 */ 46 47 #include <sys/types.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/conf.h> 51 #include <sys/stat.h> 52 #include <rpc/rpc.h> 53 #include <rpc/rpc_rdma.h> 54 #include <sys/ib/ibtl/ibti.h> 55 #include <sys/avl.h> 56 57 #ifdef __cplusplus 58 extern "C" { 59 #endif 60 61 #define MAX_BUFS 1024 /* max no. of buffers per pool */ 62 63 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 64 /* 65 * Tavor returns the next higher power of 2 66 * CQ entries than the requested size. 67 * For instance, if you request (2^12 - 1) 68 * CQ entries, Tavor returns 2^12 entries. 69 * 4K CQ entries suffice. Hence, 4096 - 1. 70 */ 71 #define DEF_SQ_SIZE 128 /* default SendQ size */ 72 #define DEF_RQ_SIZE 256 /* default RecvQ size */ 73 #define DSEG_MAX 2 74 #define RQ_DSEG_MAX 1 /* default RQ data seg */ 75 #define IBSRM_HB 0x8000 /* high order bit of pkey */ 76 77 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 78 #define REFRESH_ATTEMPTS 3 79 80 typedef struct rib_hca_s rib_hca_t; 81 typedef struct rib_qp_s rib_qp_t; 82 typedef struct rib_cq_s rib_cq_t; 83 84 /* 85 * Notification for RDMA_DONE is based on xid 86 */ 87 struct rdma_done_list { 88 uint32_t xid; /* XID waiting for RDMA_DONE */ 89 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 90 struct rdma_done_list *next; 91 struct rdma_done_list *prev; 92 }; 93 94 /* 95 * State of the plugin. 96 * ACCEPT = accepting new connections and requests 97 * NO_ACCEPT = not accepting new connection and requests 98 */ 99 #define ACCEPT 1 100 #define NO_ACCEPT 2 101 102 /* 103 * Send Wait states 104 */ 105 #define SEND_WAIT -1 106 107 /* 108 * Reply states 109 */ 110 #define REPLY_WAIT -1 111 112 typedef void * rib_pvoid; 113 typedef rib_pvoid RIB_SYNCMEM_HANDLE; 114 115 /* 116 * IB buffer pool management structure 117 */ 118 119 /* 120 * Buffer pool info 121 */ 122 typedef struct { 123 kmutex_t buflock; /* lock for this structure */ 124 caddr_t buf; /* pool address */ 125 uint32_t bufhandle; /* rkey for this pool */ 126 ulong_t bufsize; /* size of pool */ 127 int rsize; /* size of each element */ 128 int numelems; /* no. of elements allocated */ 129 int buffree; /* no. of free elements */ 130 void *buflist[1]; /* free elements in pool */ 131 } bufpool_t; 132 133 typedef struct { 134 bufpool_t *bpool; 135 ibt_mr_hdl_t *mr_hdl; 136 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 137 } rib_bufpool_t; 138 139 /* 140 * ATS relsted defines and structures. 141 */ 142 #define ATS_AR_DATA_LEN 16 143 #define IBD_NAME "ibd" 144 #define N_IBD_INSTANCES 4 145 146 147 /* 148 * Service types supported by RPCIB 149 * For now only NFS is supported. 150 */ 151 #define NFS 1 152 #define NLM 2 153 154 /* 155 * Tracks consumer state (client or server). 156 */ 157 typedef enum { 158 RIB_SERVER, 159 RIB_CLIENT 160 } rib_mode_t; 161 162 /* 163 * CQ structure 164 */ 165 struct rib_cq_s { 166 rib_hca_t *rib_hca; 167 ibt_cq_hdl_t rib_cq_hdl; 168 }; 169 170 /* 171 * Each registered service's data structure. 172 */ 173 typedef struct rib_service_s rib_service_t; 174 struct rib_service_s { 175 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 176 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 177 ib_svc_id_t srv_id; 178 rib_service_t *next; 179 }; 180 181 /* 182 * RPCIB plugin state 183 */ 184 typedef struct rpcib_state { 185 ibt_clnt_hdl_t ibt_clnt_hdl; 186 uint32_t hca_count; 187 uint32_t nhca_inited; 188 rib_hca_t *hcas_list; 189 krwlock_t hcas_list_lock; /* protects hcas_list */ 190 int refcount; 191 kmutex_t open_hca_lock; 192 queue_t *q; /* up queue for a serv_type */ 193 void *private; 194 rib_service_t *service_list; 195 krwlock_t service_list_lock; 196 kmutex_t listen_lock; 197 } rpcib_state_t; 198 199 /* 200 * Connection lists 201 */ 202 typedef struct { 203 krwlock_t conn_lock; /* list lock */ 204 CONN *conn_hd; /* list head */ 205 } rib_conn_list_t; 206 207 enum hca_state { 208 HCA_DETACHED, /* hca in detached state */ 209 HCA_INITED, /* hca in up and running state */ 210 }; 211 212 typedef struct rib_hca_service_s rib_hca_service_t; 213 struct rib_hca_service_s { 214 ib_svc_id_t srv_id; 215 ib_gid_t gid; 216 ibt_sbind_hdl_t sbind_hdl; 217 rib_hca_service_t *next; 218 }; 219 220 /* 221 * RPCIB per HCA structure 222 */ 223 struct rib_hca_s { 224 ibt_clnt_hdl_t ibt_clnt_hdl; 225 226 /* 227 * per HCA. 228 */ 229 ibt_hca_hdl_t hca_hdl; /* HCA handle */ 230 ibt_hca_attr_t hca_attrs; /* HCA attributes */ 231 ibt_pd_hdl_t pd_hdl; 232 rib_hca_service_t *bound_services; 233 krwlock_t bound_services_lock; 234 ib_guid_t hca_guid; 235 uint32_t hca_nports; 236 ibt_hca_portinfo_t *hca_ports; 237 size_t hca_pinfosz; 238 enum hca_state state; /* state of HCA */ 239 krwlock_t state_lock; /* protects state field */ 240 bool_t inuse; /* indicates HCA usage */ 241 kmutex_t inuse_lock; /* protects inuse field */ 242 243 rib_conn_list_t cl_conn_list; /* client conn list */ 244 rib_conn_list_t srv_conn_list; /* server conn list */ 245 246 rib_cq_t *clnt_scq; 247 rib_cq_t *clnt_rcq; 248 rib_cq_t *svc_scq; 249 rib_cq_t *svc_rcq; 250 kmutex_t cb_lock; 251 kcondvar_t cb_cv; 252 253 rib_bufpool_t *recv_pool; /* recv buf pool */ 254 rib_bufpool_t *send_pool; /* send buf pool */ 255 256 void *iblock; /* interrupt cookie */ 257 258 kmem_cache_t *server_side_cache; /* long reply pool */ 259 avl_tree_t avl_tree; 260 kmutex_t avl_lock; 261 krwlock_t avl_rw_lock; 262 volatile bool_t avl_init; 263 kmutex_t cache_allocation_lock; 264 ddi_taskq_t *cleanup_helper; 265 ib_svc_id_t srv_id; 266 ibt_srv_hdl_t srv_hdl; 267 uint_t reg_state; 268 269 volatile uint64_t cache_allocation; 270 uint64_t cache_hits; 271 uint64_t cache_misses; 272 uint64_t cache_cold_misses; 273 uint64_t cache_hot_misses; 274 uint64_t cache_misses_above_the_limit; 275 276 struct rib_hca_s *next; 277 }; 278 279 280 /* 281 * Structure on wait state of a post send 282 */ 283 struct send_wid { 284 uint32_t xid; 285 int cv_sig; 286 kmutex_t sendwait_lock; 287 kcondvar_t wait_cv; 288 uint_t status; 289 rib_qp_t *qp; 290 int nsbufs; /* # of send buffers posted */ 291 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 292 caddr_t c; 293 caddr_t c1; 294 int l1; 295 caddr_t c2; 296 int l2; 297 int wl, rl; 298 }; 299 300 /* 301 * Structure on reply descriptor for recv queue. 302 * Different from the above posting of a descriptor. 303 */ 304 struct reply { 305 uint32_t xid; 306 uint_t status; 307 uint64_t vaddr_cq; /* buf addr from CQ */ 308 uint_t bytes_xfer; 309 kcondvar_t wait_cv; 310 struct reply *next; 311 struct reply *prev; 312 }; 313 314 struct svc_recv { 315 rib_qp_t *qp; 316 uint64_t vaddr; 317 uint_t bytes_xfer; 318 }; 319 320 struct recv_wid { 321 uint32_t xid; 322 rib_qp_t *qp; 323 uint64_t addr; /* posted buf addr */ 324 }; 325 326 /* 327 * Per QP data structure 328 */ 329 struct rib_qp_s { 330 rib_hca_t *hca; 331 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 332 CONN rdmaconn; 333 ibt_channel_hdl_t qp_hdl; 334 uint_t port_num; 335 ib_qpn_t qpn; 336 int chan_flags; 337 clock_t timeout; 338 ibt_rc_chan_query_attr_t qp_q_attrs; 339 rib_cq_t *send_cq; /* send CQ */ 340 rib_cq_t *recv_cq; /* recv CQ */ 341 342 /* 343 * Number of pre-posted rbufs 344 */ 345 uint_t n_posted_rbufs; 346 kcondvar_t posted_rbufs_cv; 347 kmutex_t posted_rbufs_lock; 348 349 /* 350 * Number of SENDs pending completion 351 */ 352 353 uint_t n_send_rbufs; 354 kcondvar_t send_rbufs_cv; 355 kmutex_t send_rbufs_lock; 356 357 /* 358 * RPC reply 359 */ 360 uint_t rep_list_size; 361 struct reply *replylist; 362 kmutex_t replylist_lock; 363 364 /* 365 * server only, RDMA_DONE 366 */ 367 struct rdma_done_list *rdlist; 368 kmutex_t rdlist_lock; 369 370 kmutex_t cb_lock; 371 kcondvar_t cb_conn_cv; 372 373 caddr_t q; /* upstream queue */ 374 struct send_wid wd; 375 }; 376 377 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 378 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 379 380 /* 381 * Timeout for various calls 382 */ 383 #define CONN_WAIT_TIME 40 384 #define SEND_WAIT_TIME 40 /* time for send completion */ 385 386 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 387 388 #ifdef __cplusplus 389 } 390 #endif 391 392 #endif /* !_IB_H */ 393