1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2007, The Ohio State University. All rights reserved. 27 * 28 * Portions of this source code is developed by the team members of 29 * The Ohio State University's Network-Based Computing Laboratory (NBCL), 30 * headed by Professor Dhabaleswar K. (DK) Panda. 31 * 32 * Acknowledgements to contributions from developors: 33 * Ranjit Noronha: noronha@cse.ohio-state.edu 34 * Lei Chai : chail@cse.ohio-state.edu 35 * Weikuan Yu : yuw@cse.ohio-state.edu 36 * 37 */ 38 39 40 #ifndef _IB_H 41 #define _IB_H 42 43 /* 44 * ib.h, rpcib plugin interface. 45 */ 46 47 #include <sys/types.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/conf.h> 51 #include <sys/stat.h> 52 #include <rpc/rpc.h> 53 #include <rpc/rpc_rdma.h> 54 #include <sys/ib/ibtl/ibti.h> 55 #include <sys/avl.h> 56 57 #ifdef __cplusplus 58 extern "C" { 59 #endif 60 61 #define MAX_BUFS 1024 /* max no. of buffers per pool */ 62 63 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 64 /* 65 * Tavor returns the next higher power of 2 66 * CQ entries than the requested size. 67 * For instance, if you request (2^12 - 1) 68 * CQ entries, Tavor returns 2^12 entries. 69 * 4K CQ entries suffice. Hence, 4096 - 1. 70 */ 71 #define DEF_SQ_SIZE 128 /* default SendQ size */ 72 #define DEF_RQ_SIZE 256 /* default RecvQ size */ 73 #define DSEG_MAX 2 74 #define RQ_DSEG_MAX 1 /* default RQ data seg */ 75 #define IBSRM_HB 0x8000 /* high order bit of pkey */ 76 77 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 78 #define REFRESH_ATTEMPTS 3 79 80 typedef struct rib_hca_s rib_hca_t; 81 typedef struct rib_qp_s rib_qp_t; 82 typedef struct rib_cq_s rib_cq_t; 83 84 /* 85 * Notification for RDMA_DONE is based on xid 86 */ 87 struct rdma_done_list { 88 uint32_t xid; /* XID waiting for RDMA_DONE */ 89 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 90 struct rdma_done_list *next; 91 struct rdma_done_list *prev; 92 }; 93 94 /* 95 * State of the plugin. 96 * ACCEPT = accepting new connections and requests 97 * NO_ACCEPT = not accepting new connection and requests 98 */ 99 #define ACCEPT 1 100 #define NO_ACCEPT 2 101 102 /* 103 * Send Wait states 104 */ 105 #define SEND_WAIT -1 106 107 /* 108 * Reply states 109 */ 110 #define REPLY_WAIT -1 111 112 typedef void * rib_pvoid; 113 typedef rib_pvoid RIB_SYNCMEM_HANDLE; 114 115 /* 116 * IB buffer pool management structure 117 */ 118 119 /* 120 * Buffer pool info 121 */ 122 typedef struct { 123 kmutex_t buflock; /* lock for this structure */ 124 caddr_t buf; /* pool address */ 125 uint32_t bufhandle; /* rkey for this pool */ 126 ulong_t bufsize; /* size of pool */ 127 int rsize; /* size of each element */ 128 int numelems; /* no. of elements allocated */ 129 int buffree; /* no. of free elements */ 130 void *buflist[1]; /* free elements in pool */ 131 } bufpool_t; 132 133 typedef struct { 134 bufpool_t *bpool; 135 ibt_mr_hdl_t *mr_hdl; 136 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 137 } rib_bufpool_t; 138 139 /* 140 * ATS relsted defines and structures. 141 */ 142 #define ATS_AR_DATA_LEN 16 143 #define IBD_NAME "ibd" 144 #define N_IBD_INSTANCES 4 145 146 147 /* 148 * Service types supported by RPCIB 149 * For now only NFS is supported. 150 */ 151 #define NFS 1 152 #define NLM 2 153 154 /* 155 * Tracks consumer state (client or server). 156 */ 157 typedef enum { 158 RIB_SERVER, 159 RIB_CLIENT 160 } rib_mode_t; 161 162 /* 163 * CQ structure 164 */ 165 struct rib_cq_s { 166 rib_hca_t *rib_hca; 167 ibt_cq_hdl_t rib_cq_hdl; 168 }; 169 170 /* 171 * RPCIB plugin state 172 */ 173 typedef struct rpcib_state { 174 ibt_clnt_hdl_t ibt_clnt_hdl; 175 uint32_t hca_count; 176 uint32_t nhca_inited; 177 ib_guid_t *hca_guids; 178 rib_hca_t *hcas; 179 int refcount; 180 kmutex_t open_hca_lock; 181 rib_hca_t *hca; /* the hca being used */ 182 queue_t *q; /* up queue for a serv_type */ 183 uint32_t service_type; /* NFS, NLM, etc */ 184 void *private; 185 } rpcib_state_t; 186 187 /* 188 * Each registered service's data structure. 189 * Each HCA has a list of these structures, which are the registered 190 * services on this HCA. 191 */ 192 typedef struct rib_service rib_service_t; 193 struct rib_service { 194 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 195 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 196 rib_service_t *srv_next; 197 }; 198 199 /* 200 * Connection lists 201 */ 202 typedef struct { 203 krwlock_t conn_lock; /* list lock */ 204 CONN *conn_hd; /* list head */ 205 } rib_conn_list_t; 206 207 enum hca_state { 208 HCA_DETACHED, /* hca in detached state */ 209 HCA_INITED, /* hca in up and running state */ 210 }; 211 212 /* 213 * RPCIB per HCA structure 214 */ 215 struct rib_hca_s { 216 ibt_clnt_hdl_t ibt_clnt_hdl; 217 218 /* 219 * per HCA. 220 */ 221 ibt_hca_hdl_t hca_hdl; /* HCA handle */ 222 ibt_hca_attr_t hca_attrs; /* HCA attributes */ 223 ibt_pd_hdl_t pd_hdl; 224 ib_guid_t hca_guid; 225 uint32_t hca_nports; 226 ibt_hca_portinfo_t *hca_ports; 227 size_t hca_pinfosz; 228 enum hca_state state; /* state of HCA */ 229 krwlock_t state_lock; /* protects state field */ 230 bool_t inuse; /* indicates HCA usage */ 231 kmutex_t inuse_lock; /* protects inuse field */ 232 /* 233 * List of services registered on all ports available 234 * on this HCA. Only one consumer of KRPC can register 235 * its services at one time or tear them down at one 236 * time. 237 */ 238 rib_service_t *service_list; 239 krwlock_t service_list_lock; 240 241 242 rib_conn_list_t cl_conn_list; /* client conn list */ 243 rib_conn_list_t srv_conn_list; /* server conn list */ 244 245 rib_cq_t *clnt_scq; 246 rib_cq_t *clnt_rcq; 247 rib_cq_t *svc_scq; 248 rib_cq_t *svc_rcq; 249 kmutex_t cb_lock; 250 kcondvar_t cb_cv; 251 252 rib_bufpool_t *recv_pool; /* recv buf pool */ 253 rib_bufpool_t *send_pool; /* send buf pool */ 254 255 void *iblock; /* interrupt cookie */ 256 257 kmem_cache_t *server_side_cache; /* long reply pool */ 258 avl_tree_t avl_tree; 259 kmutex_t avl_lock; 260 krwlock_t avl_rw_lock; 261 volatile bool_t avl_init; 262 kmutex_t cache_allocation; 263 ddi_taskq_t *cleanup_helper; 264 ib_svc_id_t srv_id; 265 ibt_srv_hdl_t srv_hdl; 266 uint_t reg_state; 267 }; 268 269 270 /* 271 * Structure on wait state of a post send 272 */ 273 struct send_wid { 274 uint32_t xid; 275 int cv_sig; 276 kmutex_t sendwait_lock; 277 kcondvar_t wait_cv; 278 uint_t status; 279 rib_qp_t *qp; 280 int nsbufs; /* # of send buffers posted */ 281 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 282 caddr_t c; 283 caddr_t c1; 284 int l1; 285 caddr_t c2; 286 int l2; 287 int wl, rl; 288 }; 289 290 /* 291 * Structure on reply descriptor for recv queue. 292 * Different from the above posting of a descriptor. 293 */ 294 struct reply { 295 uint32_t xid; 296 uint_t status; 297 uint64_t vaddr_cq; /* buf addr from CQ */ 298 uint_t bytes_xfer; 299 kcondvar_t wait_cv; 300 struct reply *next; 301 struct reply *prev; 302 }; 303 304 struct svc_recv { 305 rib_qp_t *qp; 306 uint64_t vaddr; 307 uint_t bytes_xfer; 308 }; 309 310 struct recv_wid { 311 uint32_t xid; 312 rib_qp_t *qp; 313 uint64_t addr; /* posted buf addr */ 314 }; 315 316 /* 317 * Per QP data structure 318 */ 319 struct rib_qp_s { 320 rib_hca_t *hca; 321 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 322 CONN rdmaconn; 323 ibt_channel_hdl_t qp_hdl; 324 uint_t port_num; 325 ib_qpn_t qpn; 326 int chan_flags; 327 clock_t timeout; 328 ibt_rc_chan_query_attr_t qp_q_attrs; 329 rib_cq_t *send_cq; /* send CQ */ 330 rib_cq_t *recv_cq; /* recv CQ */ 331 332 /* 333 * Number of pre-posted rbufs 334 */ 335 uint_t n_posted_rbufs; 336 kcondvar_t posted_rbufs_cv; 337 kmutex_t posted_rbufs_lock; 338 339 /* 340 * Number of SENDs pending completion 341 */ 342 343 uint_t n_send_rbufs; 344 kcondvar_t send_rbufs_cv; 345 kmutex_t send_rbufs_lock; 346 347 /* 348 * RPC reply 349 */ 350 uint_t rep_list_size; 351 struct reply *replylist; 352 kmutex_t replylist_lock; 353 354 /* 355 * server only, RDMA_DONE 356 */ 357 struct rdma_done_list *rdlist; 358 kmutex_t rdlist_lock; 359 360 kmutex_t cb_lock; 361 kcondvar_t cb_conn_cv; 362 363 caddr_t q; /* upstream queue */ 364 struct send_wid wd; 365 }; 366 367 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 368 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 369 370 /* 371 * Timeout for various calls 372 */ 373 #define CONN_WAIT_TIME 40 374 #define SEND_WAIT_TIME 40 /* time for send completion */ 375 376 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 377 378 #ifdef __cplusplus 379 } 380 #endif 381 382 #endif /* !_IB_H */ 383