1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _RPC_RPC_RDMA_H 28 #define _RPC_RPC_RDMA_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <rpc/rpc.h> 33 #include <rpc/rpc_sztypes.h> 34 #include <sys/sunddi.h> 35 #include <sys/sunldi.h> 36 37 #ifdef __cplusplus 38 extern "C" { 39 #endif 40 41 #define RPCRDMA_VERS 0 /* Version of the RPC over RDMA protocol */ 42 #define RDMATF_VERS 1 /* Version of the API used by RPC for RDMA */ 43 #define RDMATF_VERS_1 1 /* Current version of RDMATF */ 44 45 /* 46 * The size of an RPC call or reply message 47 */ 48 #define RPC_MSG_SZ 1024 49 50 /* 51 * Storage for a chunk list 52 */ 53 #define RPC_CL_SZ 1024 54 55 /* 56 * Chunk size 57 */ 58 #define MINCHUNK 1024 59 60 /* 61 * Size of receive buffer 62 */ 63 #define RPC_BUF_SIZE 2048 64 65 #define NOWAIT 0 /* don't wait for operation of complete */ 66 #define WAIT 1 /* wait and ensure that operation is complete */ 67 68 /* 69 * RDMA xdr buffer control and other control flags. Add new flags here, 70 * set them in private structure for xdr over RDMA in xdr_rdma.c 71 */ 72 #define RDMA_NOCHUNK 0x1 73 74 /* 75 * Return codes from RDMA operations 76 */ 77 typedef enum { 78 79 RDMA_SUCCESS = 0, /* successful operation */ 80 81 RDMA_INVAL = 1, /* invalid parameter */ 82 RDMA_TIMEDOUT = 2, /* operation timed out */ 83 RDMA_INTR = 3, /* operation interrupted */ 84 RDMA_NORESOURCE = 4, /* insufficient resource */ 85 /* 86 * connection errors 87 */ 88 RDMA_REJECT = 5, /* connection req rejected */ 89 RDMA_NOLISTENER = 6, /* no listener on server */ 90 RDMA_UNREACHABLE = 7, /* host unreachable */ 91 RDMA_CONNLOST = 8, /* connection lost */ 92 93 RDMA_XPRTFAILED = 9, /* RDMA transport failed */ 94 RDMA_PROTECTERR = 10, /* memory protection error */ 95 RDMA_OVERRUN = 11, /* transport overrun */ 96 RDMA_RECVQEMPTY = 12, /* incoming pkt dropped, recv q empty */ 97 RDMA_PROTFAILED = 13, /* RDMA protocol failed */ 98 RDMA_NOTSUPP = 14, /* requested feature not supported */ 99 RDMA_REMOTERR = 15, /* error at remote end */ 100 /* 101 * RDMATF errors 102 */ 103 RDMA_BADVERS = 16, /* mismatch RDMATF versions */ 104 RDMA_REG_EXIST = 17, /* RDMATF registration already exists */ 105 106 /* 107 * fallback error 108 */ 109 RDMA_FAILED = 18 /* generic error */ 110 } rdma_stat; 111 112 /* 113 * Memory region context. This is an RDMA provider generated 114 * handle for a registered arbitrary size contiguous virtual 115 * memory. The RDMA Interface Adapter needs this for local or 116 * remote memory access. 117 * 118 * The mrc_rmr field holds the remote memory region context 119 * which is sent over-the-wire to provide the remote host 120 * with RDMA access to the memory region. 121 */ 122 struct mrc { 123 uint32_t mrc_rmr; /* Remote MR context, sent OTW */ 124 union { 125 struct mr { 126 uint32_t lmr; /* Local MR context */ 127 uint64_t linfo; /* Local memory info */ 128 } mr; 129 } lhdl; 130 }; 131 132 #define mrc_lmr lhdl.mr.lmr 133 #define mrc_linfo lhdl.mr.linfo 134 135 /* 136 * The XDR offset value is used by the XDR 137 * routine to identify the position in the 138 * RPC message where the opaque object would 139 * normally occur. Neither the data content 140 * of the chunk, nor its size field are included 141 * in the RPC message. The XDR offset is calculated 142 * as if the chunks were present. 143 * 144 * The remaining fields identify the chunk of data 145 * on the sender. The c_memhandle identifies a 146 * registered RDMA memory region and the c_addr 147 * and c_len fields identify the chunk within it. 148 */ 149 struct clist { 150 uint32 c_xdroff; /* XDR offset */ 151 uint32 c_len; /* Length */ 152 struct mrc c_smemhandle; /* src memory handle */ 153 uint64 c_ssynchandle; /* src sync handle */ 154 uint64 c_saddr; /* src address */ 155 struct mrc c_dmemhandle; /* dst memory handle */ 156 uint64 c_dsynchandle; /* dst sync handle */ 157 uint64 c_daddr; /* dst address */ 158 struct clist *c_next; /* Next chunk */ 159 }; 160 161 typedef struct clist clist; 162 163 enum rdma_proc { 164 RDMA_MSG = 0, /* chunk list and RPC msg follow */ 165 RDMA_NOMSG = 1, /* only chunk list follows */ 166 RDMA_MSGP = 2, /* chunk list and RPC msg with padding follow */ 167 RDMA_DONE = 3 /* signal completion of chunk transfer */ 168 }; 169 170 /* 171 * Listener information for a service 172 */ 173 struct rdma_svc_data { 174 queue_t q; /* queue_t to place incoming pkts */ 175 int active; /* If active, after registeration startup */ 176 rdma_stat err_code; /* Error code from plugin layer */ 177 int32_t svcid; /* RDMA based service identifier */ 178 }; 179 180 /* 181 * Per RDMA plugin module information. 182 * Will be populated by each plugin 183 * module during its initialization. 184 */ 185 typedef struct rdma_mod { 186 char *rdma_api; /* "kvipl", "ibtf", etc */ 187 uint_t rdma_version; /* RDMATF API version */ 188 int rdma_count; /* # of devices */ 189 struct rdmaops *rdma_ops; /* rdma op vector for api */ 190 } rdma_mod_t; 191 192 /* 193 * Registry of RDMA plugins 194 */ 195 typedef struct rdma_registry { 196 rdma_mod_t *r_mod; /* plugin mod info */ 197 struct rdma_registry *r_next; /* next registered RDMA plugin */ 198 } rdma_registry_t; 199 200 /* 201 * RDMA transport information 202 */ 203 typedef struct rdma_info { 204 uint_t addrlen; /* address length */ 205 uint_t mts; /* max transfer size */ 206 uint_t mtu; /* native mtu size of unlerlying network */ 207 } rdma_info_t; 208 209 /* 210 * RDMA Connection information 211 */ 212 typedef struct conn { 213 rdma_mod_t *c_rdmamod; /* RDMA transport info for conn */ 214 struct netbuf c_raddr; /* remote address */ 215 struct netbuf c_laddr; /* local address */ 216 int c_ref; /* no. of clients of connection */ 217 struct conn *c_next; /* next in list of connections */ 218 struct conn *c_prev; /* prev in list of connections */ 219 caddr_t c_private; /* transport specific stuff */ 220 221 #define C_IDLE 0x80000000 222 #define C_CONN_PEND 0x40000000 223 #define C_CONNECTED 0x20000000 224 #define C_ERROR 0x10000000 225 #define C_DISCONN_PEND 0x08000000 226 #define C_REMOTE_DOWN 0x04000000 227 228 uint_t c_state; /* state of connection */ 229 kmutex_t c_lock; /* protect c_state and c_ref fields */ 230 kcondvar_t c_cv; /* to signal when pending is done */ 231 } CONN; 232 233 234 /* 235 * Memory management for the RDMA buffers 236 */ 237 /* 238 * RDMA buffer types 239 */ 240 typedef enum { 241 SEND_BUFFER, /* buf for send msg */ 242 SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */ 243 RECV_BUFFER, /* buf for recv msg */ 244 RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */ 245 CHUNK_BUFFER /* chunk buf used in RDMATF only and not in plugins */ 246 } rdma_btype; 247 248 /* 249 * RDMA buffer information 250 */ 251 typedef struct rdma_buf { 252 rdma_btype type; /* buffer type */ 253 int len; /* length of buffer */ 254 caddr_t addr; /* buffer address */ 255 struct mrc handle; /* buffer registration handle */ 256 } rdma_buf_t; 257 258 /* 259 * Data transferred from plugin interrupt to svc_queuereq() 260 */ 261 struct recv_data { 262 CONN *conn; 263 int status; 264 rdma_buf_t rpcmsg; 265 }; 266 267 /* 268 * Operations vector for RDMA transports. 269 */ 270 typedef struct rdmaops { 271 /* Network */ 272 rdma_stat (*rdma_reachable)(int addr_type, struct netbuf *, 273 void **handle); 274 /* Connection */ 275 rdma_stat (*rdma_get_conn)(struct netbuf *, int addr_type, 276 void *, CONN **); 277 rdma_stat (*rdma_rel_conn)(CONN *); 278 /* Server side listner start and stop routines */ 279 void (*rdma_svc_listen)(struct rdma_svc_data *); 280 void (*rdma_svc_stop)(struct rdma_svc_data *); 281 /* Memory */ 282 rdma_stat (*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *); 283 rdma_stat (*rdma_deregmem)(CONN *, caddr_t, struct mrc); 284 rdma_stat (*rdma_regmemsync)(CONN *, caddr_t, uint_t, 285 struct mrc *, void **); 286 rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc, 287 void *); 288 rdma_stat (*rdma_syncmem)(CONN *, void *, caddr_t, int, int); 289 /* Buffer */ 290 rdma_stat (*rdma_buf_alloc)(CONN *, rdma_buf_t *); 291 void (*rdma_buf_free)(CONN *, rdma_buf_t *); 292 /* Transfer */ 293 rdma_stat (*rdma_send)(CONN *, clist *, uint32_t); 294 rdma_stat (*rdma_send_resp)(CONN *, clist *, uint32_t); 295 rdma_stat (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t); 296 rdma_stat (*rdma_svc_recvbuf)(CONN *, clist *); 297 rdma_stat (*rdma_recv)(CONN *, clist **, uint32_t); 298 /* RDMA */ 299 rdma_stat (*rdma_read)(CONN *, clist *, int); 300 rdma_stat (*rdma_write)(CONN *, clist *, int); 301 /* INFO */ 302 rdma_stat (*rdma_getinfo)(rdma_info_t *info); 303 304 } rdmaops_t; 305 306 /* 307 * RDMA operations. 308 */ 309 #define RDMA_REACHABLE(rdma_ops, addr_type, addr, handle) \ 310 (*(rdma_ops)->rdma_reachable)(addr_type, addr, handle) 311 312 #define RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn) \ 313 (*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn) 314 315 #define RDMA_REL_CONN(conn) \ 316 (*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn) 317 318 #define RDMA_REGMEM(conn, buff, len, handle) \ 319 (*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle) 320 321 #define RDMA_DEREGMEM(conn, buff, handle) \ 322 (*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle) 323 324 #define RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle) \ 325 (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \ 326 len, handle, synchandle) 327 328 #define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle) \ 329 (*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \ 330 handle, synchandle) 331 332 #define RDMA_SYNCMEM(conn, handle, buff, len, direction) \ 333 (*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \ 334 buff, len, direction) 335 336 #define RDMA_BUF_ALLOC(conn, rbuf) \ 337 (*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf) 338 339 #define RDMA_BUF_FREE(conn, rbuf) \ 340 (*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf) 341 342 #define RDMA_SEND(conn, sendlist, xid) \ 343 (*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid) 344 345 #define RDMA_SEND_RESP(conn, sendlist, xid) \ 346 (*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid) 347 348 #define RDMA_CLNT_RECVBUF(conn, cl, xid) \ 349 (*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid) 350 351 #define RDMA_SVC_RECVBUF(conn, cl) \ 352 (*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl) 353 354 #define RDMA_RECV(conn, recvlist, xid) \ 355 (*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid) 356 357 #define RDMA_READ(conn, cl, wait) \ 358 (*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait) 359 360 #define RDMA_WRITE(conn, cl, wait) \ 361 (*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait) 362 363 #define RDMA_GETINFO(rdma_mod, info) \ 364 (*(rdma_mod)->rdma_ops->rdma_getinfo)(info) 365 366 #ifdef _KERNEL 367 extern rdma_registry_t *rdma_mod_head; 368 extern krwlock_t rdma_lock; /* protects rdma_mod_head list */ 369 extern int rdma_modloaded; /* flag for loading RDMA plugins */ 370 extern int rdma_dev_available; /* rdma device is loaded or not */ 371 extern kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */ 372 extern uint_t rdma_minchunk; 373 extern ldi_ident_t rpcmod_li; /* needed by layed driver framework */ 374 375 /* 376 * General RDMA routines 377 */ 378 extern void clist_add(struct clist **clp, uint32_t xdroff, int len, 379 struct mrc *shandle, caddr_t saddr, 380 struct mrc *dhandle, caddr_t daddr); 381 extern void clist_free(struct clist *cl); 382 extern int clist_register(CONN *conn, struct clist *cl, bool_t src); 383 extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src); 384 rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid); 385 rdma_stat rdma_svc_postrecv(CONN *conn); 386 extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src); 387 extern rdma_stat rdma_register_mod(rdma_mod_t *mod); 388 extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod); 389 extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf); 390 extern int rdma_modload(); 391 392 /* 393 * RDMA XDR 394 */ 395 extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *, 396 enum xdr_op, CONN *); 397 extern void xdrrdma_destroy(XDR *); 398 extern struct clist *xdrrdma_clist(XDR *); 399 extern uint_t xdrrdma_getpos(XDR *); 400 extern bool_t xdrrdma_setpos(XDR *, uint_t); 401 extern bool_t xdr_clist(XDR *, clist *); 402 extern bool_t xdr_do_clist(XDR *, clist **); 403 extern uint_t xdr_getbufsize(XDR *); 404 unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk); 405 unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk); 406 #endif /* _KERNEL */ 407 408 #ifdef __cplusplus 409 } 410 #endif 411 412 #endif /* _RPC_RPC_RDMA_H */ 413