1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _IB_H 28 #define _IB_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * ib.h, rpcib plugin interface. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/ddi.h> 38 #include <sys/sunddi.h> 39 #include <sys/conf.h> 40 #include <sys/stat.h> 41 #include <rpc/rpc.h> 42 #include <rpc/rpc_rdma.h> 43 #include <sys/ib/ibtl/ibti.h> 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 #define MAX_BUFS 256 /* max no. of buffers per pool */ 50 #define DEF_CQ_SIZE 4096 - 1 /* default CQ size */ 51 /* 52 * Tavor returns the next higher power of 2 53 * CQ entries than the requested size. 54 * For instance, if you request (2^12 - 1) 55 * CQ entries, Tavor returns 2^12 entries. 56 * 4K CQ entries suffice. Hence, 4096 - 1. 57 */ 58 #define DEF_SQ_SIZE 128 /* default SendQ size */ 59 #define DEF_RQ_SIZE 256 /* default RecvQ size */ 60 #define DSEG_MAX 2 61 #define RQ_DSEG_MAX 1 /* default RQ data seg */ 62 #define IBSRM_HB 0x8000 /* high order bit of pkey */ 63 #define NFS_SEC_KEY0 0x6878 /* randomly selected NFS security key */ 64 #define NFS_SEC_KEY1 0x8679 65 66 /* max no. of refresh attempts on IBT_CM_CONN_STALE error */ 67 #define REFRESH_ATTEMPTS 3 68 69 typedef struct rib_hca_s rib_hca_t; 70 typedef struct rib_qp_s rib_qp_t; 71 typedef struct rib_cq_s rib_cq_t; 72 73 /* 74 * Notification for RDMA_DONE is based on xid 75 */ 76 struct rdma_done_list { 77 uint32_t xid; /* XID waiting for RDMA_DONE */ 78 kcondvar_t rdma_done_cv; /* cv for RDMA_DONE */ 79 struct rdma_done_list *next; 80 struct rdma_done_list *prev; 81 }; 82 83 /* 84 * State of the plugin. 85 * ACCEPT = accepting new connections and requests 86 * NO_ACCEPT = not accepting new connection and requests 87 */ 88 #define ACCEPT 1 89 #define NO_ACCEPT 2 90 91 /* 92 * Send Wait states 93 */ 94 #define SEND_WAIT -1 95 96 /* 97 * Reply states 98 */ 99 #define REPLY_WAIT -1 100 101 typedef void * rib_pvoid; 102 typedef rib_pvoid RIB_SYNCMEM_HANDLE; 103 104 /* 105 * IB buffer pool management structure 106 */ 107 108 /* 109 * Buffer pool info 110 */ 111 typedef struct { 112 kmutex_t buflock; /* lock for this structure */ 113 caddr_t buf; /* pool address */ 114 uint32_t bufhandle; /* rkey for this pool */ 115 ulong_t bufsize; /* size of pool */ 116 int rsize; /* size of each element */ 117 int numelems; /* no. of elements allocated */ 118 int buffree; /* no. of free elements */ 119 void *buflist[1]; /* free elements in pool */ 120 } bufpool_t; 121 122 typedef struct { 123 bufpool_t *bpool; 124 ibt_mr_hdl_t *mr_hdl; 125 ibt_mr_desc_t *mr_desc; /* vaddr, lkey, rkey */ 126 } rib_bufpool_t; 127 128 /* 129 * ATS relsted defines and structures. 130 */ 131 #define ATS_AR_DATA_LEN 16 132 #define IBD_NAME "ibd" 133 #define N_IBD_INSTANCES 4 134 135 typedef struct rpcib_ats_s { 136 int ras_inst; 137 ib_pkey_t ras_pkey; 138 ib_gid_t ras_port_gid; 139 sa_family_t ras_inet_type; 140 union { 141 struct sockaddr_in ras_sockaddr; 142 struct sockaddr_in6 ras_sockaddr6; 143 } ra_sin; 144 #define ras_sin ra_sin.ras_sockaddr 145 #define ras_sin6 ra_sin.ras_sockaddr6 146 } rpcib_ats_t; 147 148 typedef struct rpcib_ibd_insts_s { 149 int rib_ibd_alloc; 150 int rib_ibd_cnt; 151 rpcib_ats_t *rib_ats; 152 } rpcib_ibd_insts_t; 153 154 /* 155 * Service types supported by RPCIB 156 * For now only NFS is supported. 157 */ 158 #define NFS 1 159 #define NLM 2 160 161 /* 162 * Tracks consumer state (client or server). 163 */ 164 typedef enum { 165 RIB_SERVER, 166 RIB_CLIENT 167 } rib_mode_t; 168 169 /* 170 * CQ structure 171 */ 172 struct rib_cq_s { 173 rib_hca_t *rib_hca; 174 ibt_cq_hdl_t rib_cq_hdl; 175 }; 176 177 /* 178 * RPCIB plugin state 179 */ 180 typedef struct rpcib_state { 181 ibt_clnt_hdl_t ibt_clnt_hdl; 182 uint32_t hca_count; 183 uint32_t nhca_inited; 184 ib_guid_t *hca_guids; 185 rib_hca_t *hcas; 186 int refcount; 187 kmutex_t open_hca_lock; 188 rib_hca_t *hca; /* the hca being used */ 189 queue_t *q; /* up queue for a serv_type */ 190 uint32_t service_type; /* NFS, NLM, etc */ 191 void *private; 192 } rpcib_state_t; 193 194 /* 195 * Each registered service's data structure. 196 * Each HCA has a list of these structures, which are the registered 197 * services on this HCA. 198 */ 199 typedef struct rib_service rib_service_t; 200 struct rib_service { 201 uint32_t srv_type; /* i.e, NFS, NLM, v4CBD */ 202 203 /* 204 * service name, i.e, <IP>::NFS or <IP>::NLM. Since 205 * each type of service can be registered with many 206 * IP addrs(srv_name) and is running on all ports 207 * for all HCAs. 208 */ 209 char *srv_name; 210 211 uint32_t srv_port; /* port on which registered */ 212 ib_svc_id_t srv_id; /* from ibt_register call */ 213 ibt_srv_hdl_t srv_hdl; /* from ibt_register call */ 214 ibt_sbind_hdl_t *srv_sbind_hdl; /* from ibt_bind call */ 215 ibt_ar_t srv_ar; 216 217 /* 218 * pointer to the next service registered on this 219 * particular HCA 220 */ 221 rib_service_t *srv_next; 222 }; 223 224 /* 225 * Connection lists 226 */ 227 typedef struct { 228 krwlock_t conn_lock; /* list lock */ 229 CONN *conn_hd; /* list head */ 230 } rib_conn_list_t; 231 232 enum hca_state { 233 HCA_INITED, /* hca in up and running state */ 234 HCA_DETACHED /* hca in detached state */ 235 }; 236 237 /* 238 * RPCIB per HCA structure 239 */ 240 struct rib_hca_s { 241 ibt_clnt_hdl_t ibt_clnt_hdl; 242 243 /* 244 * per HCA. 245 */ 246 ibt_hca_hdl_t hca_hdl; /* HCA handle */ 247 ibt_hca_attr_t hca_attrs; /* HCA attributes */ 248 ibt_pd_hdl_t pd_hdl; 249 ib_guid_t hca_guid; 250 uint32_t hca_nports; 251 ibt_hca_portinfo_t *hca_ports; 252 size_t hca_pinfosz; 253 enum hca_state state; /* state of HCA */ 254 krwlock_t state_lock; /* protects state field */ 255 bool_t inuse; /* indicates HCA usage */ 256 kmutex_t inuse_lock; /* protects inuse field */ 257 /* 258 * List of services registered on all ports available 259 * on this HCA. Only one consumer of KRPC can register 260 * its services at one time or tear them down at one 261 * time. 262 */ 263 rib_service_t *service_list; 264 krwlock_t service_list_lock; 265 266 rib_service_t *ats_list; /* Service list for ATS */ 267 268 rib_conn_list_t cl_conn_list; /* client conn list */ 269 rib_conn_list_t srv_conn_list; /* server conn list */ 270 271 rib_cq_t *clnt_scq; 272 rib_cq_t *clnt_rcq; 273 rib_cq_t *svc_scq; 274 rib_cq_t *svc_rcq; 275 kmutex_t cb_lock; 276 kcondvar_t cb_cv; 277 278 rib_bufpool_t *recv_pool; /* recv buf pool */ 279 rib_bufpool_t *send_pool; /* send buf pool */ 280 281 void *iblock; /* interrupt cookie */ 282 }; 283 284 285 /* 286 * Structure on wait state of a post send 287 */ 288 struct send_wid { 289 uint32_t xid; 290 int cv_sig; 291 kmutex_t sendwait_lock; 292 kcondvar_t wait_cv; 293 uint_t status; 294 rib_qp_t *qp; 295 int nsbufs; /* # of send buffers posted */ 296 uint64_t sbufaddr[DSEG_MAX]; /* posted send buffers */ 297 }; 298 299 /* 300 * Structure on reply descriptor for recv queue. 301 * Different from the above posting of a descriptor. 302 */ 303 struct reply { 304 uint32_t xid; 305 uint_t status; 306 uint64_t vaddr_cq; /* buf addr from CQ */ 307 uint_t bytes_xfer; 308 kcondvar_t wait_cv; 309 struct reply *next; 310 struct reply *prev; 311 }; 312 313 struct svc_recv { 314 rib_qp_t *qp; 315 uint64_t vaddr; 316 uint_t bytes_xfer; 317 }; 318 319 struct recv_wid { 320 uint32_t xid; 321 rib_qp_t *qp; 322 uint64_t addr; /* posted buf addr */ 323 }; 324 325 /* 326 * Per QP data structure 327 */ 328 struct rib_qp_s { 329 rib_hca_t *hca; 330 rib_mode_t mode; /* RIB_SERVER or RIB_CLIENT */ 331 CONN rdmaconn; 332 ibt_channel_hdl_t qp_hdl; 333 uint_t port_num; 334 ib_qpn_t qpn; 335 int chan_flags; 336 clock_t timeout; 337 ibt_rc_chan_query_attr_t qp_q_attrs; 338 rib_cq_t *send_cq; /* send CQ */ 339 rib_cq_t *recv_cq; /* recv CQ */ 340 341 /* 342 * Number of pre-posted rbufs 343 */ 344 uint_t n_posted_rbufs; 345 kcondvar_t posted_rbufs_cv; 346 kmutex_t posted_rbufs_lock; 347 348 /* 349 * RPC reply 350 */ 351 uint_t rep_list_size; 352 struct reply *replylist; 353 kmutex_t replylist_lock; 354 355 /* 356 * server only, RDMA_DONE 357 */ 358 struct rdma_done_list *rdlist; 359 kmutex_t rdlist_lock; 360 361 kmutex_t cb_lock; 362 kcondvar_t cb_conn_cv; 363 364 caddr_t q; /* upstream queue */ 365 }; 366 367 #define ctoqp(conn) ((rib_qp_t *)((conn)->c_private)) 368 #define qptoc(rqp) ((CONN *)&((rqp)->rdmaconn)) 369 370 /* 371 * Timeout for various calls 372 */ 373 #define CONN_WAIT_TIME 40 374 #define SEND_WAIT_TIME 40 /* time for send completion */ 375 376 #define REPLY_WAIT_TIME 40 /* time to get reply from remote QP */ 377 378 #ifdef __cplusplus 379 } 380 #endif 381 382 #endif /* !_IB_H */ 383