/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _RPC_RPC_RDMA_H #define _RPC_RPC_RDMA_H #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #ifdef __cplusplus extern "C" { #endif #define RPCRDMA_VERS 0 /* Version of the RPC over RDMA protocol */ #define RDMATF_VERS 1 /* Version of the API used by RPC for RDMA */ #define RDMATF_VERS_1 1 /* Current version of RDMATF */ /* * The size of an RPC call or reply message */ #define RPC_MSG_SZ 1024 /* * Storage for a chunk list */ #define RPC_CL_SZ 1024 /* * Chunk size */ #define MINCHUNK 1024 /* * Size of receive buffer */ #define RPC_BUF_SIZE 2048 #define NOWAIT 0 /* don't wait for operation of complete */ #define WAIT 1 /* wait and ensure that operation is complete */ /* * RDMA xdr buffer control and other control flags. Add new flags here, * set them in private structure for xdr over RDMA in xdr_rdma.c */ #define RDMA_NOCHUNK 0x1 /* * Return codes from RDMA operations */ typedef enum { RDMA_SUCCESS = 0, /* successful operation */ RDMA_INVAL = 1, /* invalid parameter */ RDMA_TIMEDOUT = 2, /* operation timed out */ RDMA_INTR = 3, /* operation interrupted */ RDMA_NORESOURCE = 4, /* insufficient resource */ /* * connection errors */ RDMA_REJECT = 5, /* connection req rejected */ RDMA_NOLISTENER = 6, /* no listener on server */ RDMA_UNREACHABLE = 7, /* host unreachable */ RDMA_CONNLOST = 8, /* connection lost */ RDMA_XPRTFAILED = 9, /* RDMA transport failed */ RDMA_PROTECTERR = 10, /* memory protection error */ RDMA_OVERRUN = 11, /* transport overrun */ RDMA_RECVQEMPTY = 12, /* incoming pkt dropped, recv q empty */ RDMA_PROTFAILED = 13, /* RDMA protocol failed */ RDMA_NOTSUPP = 14, /* requested feature not supported */ RDMA_REMOTERR = 15, /* error at remote end */ /* * RDMATF errors */ RDMA_BADVERS = 16, /* mismatch RDMATF versions */ RDMA_REG_EXIST = 17, /* RDMATF registration already exists */ /* * fallback error */ RDMA_FAILED = 18 /* generic error */ } rdma_stat; /* * Memory region context. This is an RDMA provider generated * handle for a registered arbitrary size contiguous virtual * memory. The RDMA Interface Adapter needs this for local or * remote memory access. * * The mrc_rmr field holds the remote memory region context * which is sent over-the-wire to provide the remote host * with RDMA access to the memory region. */ struct mrc { uint32_t mrc_rmr; /* Remote MR context, sent OTW */ union { struct mr { uint32_t lmr; /* Local MR context */ uint64_t linfo; /* Local memory info */ } mr; } lhdl; }; #define mrc_lmr lhdl.mr.lmr #define mrc_linfo lhdl.mr.linfo /* * The XDR offset value is used by the XDR * routine to identify the position in the * RPC message where the opaque object would * normally occur. Neither the data content * of the chunk, nor its size field are included * in the RPC message. The XDR offset is calculated * as if the chunks were present. * * The remaining fields identify the chunk of data * on the sender. The c_memhandle identifies a * registered RDMA memory region and the c_addr * and c_len fields identify the chunk within it. */ struct clist { uint32 c_xdroff; /* XDR offset */ uint32 c_len; /* Length */ struct mrc c_smemhandle; /* src memory handle */ uint64 c_ssynchandle; /* src sync handle */ uint64 c_saddr; /* src address */ struct mrc c_dmemhandle; /* dst memory handle */ uint64 c_dsynchandle; /* dst sync handle */ uint64 c_daddr; /* dst address */ struct clist *c_next; /* Next chunk */ }; typedef struct clist clist; enum rdma_proc { RDMA_MSG = 0, /* chunk list and RPC msg follow */ RDMA_NOMSG = 1, /* only chunk list follows */ RDMA_MSGP = 2, /* chunk list and RPC msg with padding follow */ RDMA_DONE = 3 /* signal completion of chunk transfer */ }; /* * Listener information for a service */ struct rdma_svc_data { queue_t q; /* queue_t to place incoming pkts */ int active; /* If active, after registeration startup */ rdma_stat err_code; /* Error code from plugin layer */ int32_t svcid; /* RDMA based service identifier */ }; /* * Per RDMA plugin module information. * Will be populated by each plugin * module during its initialization. */ typedef struct rdma_mod { char *rdma_api; /* "kvipl", "ibtf", etc */ uint_t rdma_version; /* RDMATF API version */ int rdma_count; /* # of devices */ struct rdmaops *rdma_ops; /* rdma op vector for api */ } rdma_mod_t; /* * Registry of RDMA plugins */ typedef struct rdma_registry { rdma_mod_t *r_mod; /* plugin mod info */ struct rdma_registry *r_next; /* next registered RDMA plugin */ } rdma_registry_t; /* * RDMA transport information */ typedef struct rdma_info { uint_t addrlen; /* address length */ uint_t mts; /* max transfer size */ uint_t mtu; /* native mtu size of unlerlying network */ } rdma_info_t; /* * RDMA Connection information */ typedef struct conn { rdma_mod_t *c_rdmamod; /* RDMA transport info for conn */ struct netbuf c_raddr; /* remote address */ struct netbuf c_laddr; /* local address */ int c_ref; /* no. of clients of connection */ struct conn *c_next; /* next in list of connections */ struct conn *c_prev; /* prev in list of connections */ caddr_t c_private; /* transport specific stuff */ #define C_IDLE 0x80000000 #define C_CONN_PEND 0x40000000 #define C_CONNECTED 0x20000000 #define C_ERROR 0x10000000 #define C_DISCONN_PEND 0x08000000 #define C_REMOTE_DOWN 0x04000000 uint_t c_state; /* state of connection */ kmutex_t c_lock; /* protect c_state and c_ref fields */ kcondvar_t c_cv; /* to signal when pending is done */ } CONN; /* * Memory management for the RDMA buffers */ /* * RDMA buffer types */ typedef enum { SEND_BUFFER, /* buf for send msg */ SEND_DESCRIPTOR, /* buf used for send msg descriptor in plugins only */ RECV_BUFFER, /* buf for recv msg */ RECV_DESCRIPTOR, /* buf used for recv msg descriptor in plugins only */ CHUNK_BUFFER /* chunk buf used in RDMATF only and not in plugins */ } rdma_btype; /* * RDMA buffer information */ typedef struct rdma_buf { rdma_btype type; /* buffer type */ int len; /* length of buffer */ caddr_t addr; /* buffer address */ struct mrc handle; /* buffer registration handle */ } rdma_buf_t; /* * Data transferred from plugin interrupt to svc_queuereq() */ struct recv_data { CONN *conn; int status; rdma_buf_t rpcmsg; }; /* * Operations vector for RDMA transports. */ typedef struct rdmaops { /* Network */ rdma_stat (*rdma_reachable)(int addr_type, struct netbuf *, void **handle); /* Connection */ rdma_stat (*rdma_get_conn)(struct netbuf *, int addr_type, void *, CONN **); rdma_stat (*rdma_rel_conn)(CONN *); /* Server side listner start and stop routines */ void (*rdma_svc_listen)(struct rdma_svc_data *); void (*rdma_svc_stop)(struct rdma_svc_data *); /* Memory */ rdma_stat (*rdma_regmem)(CONN *, caddr_t, uint_t, struct mrc *); rdma_stat (*rdma_deregmem)(CONN *, caddr_t, struct mrc); rdma_stat (*rdma_regmemsync)(CONN *, caddr_t, uint_t, struct mrc *, void **); rdma_stat (*rdma_deregmemsync)(CONN *, caddr_t, struct mrc, void *); rdma_stat (*rdma_syncmem)(CONN *, void *, caddr_t, int, int); /* Buffer */ rdma_stat (*rdma_buf_alloc)(CONN *, rdma_buf_t *); void (*rdma_buf_free)(CONN *, rdma_buf_t *); /* Transfer */ rdma_stat (*rdma_send)(CONN *, clist *, uint32_t); rdma_stat (*rdma_send_resp)(CONN *, clist *, uint32_t); rdma_stat (*rdma_clnt_recvbuf)(CONN *, clist *, uint32_t); rdma_stat (*rdma_svc_recvbuf)(CONN *, clist *); rdma_stat (*rdma_recv)(CONN *, clist **, uint32_t); /* RDMA */ rdma_stat (*rdma_read)(CONN *, clist *, int); rdma_stat (*rdma_write)(CONN *, clist *, int); /* INFO */ rdma_stat (*rdma_getinfo)(rdma_info_t *info); } rdmaops_t; /* * RDMA operations. */ #define RDMA_REACHABLE(rdma_ops, addr_type, addr, handle) \ (*(rdma_ops)->rdma_reachable)(addr_type, addr, handle) #define RDMA_GET_CONN(rdma_ops, addr, addr_type, handle, conn) \ (*(rdma_ops)->rdma_get_conn)(addr, addr_type, handle, conn) #define RDMA_REL_CONN(conn) \ (*(conn)->c_rdmamod->rdma_ops->rdma_rel_conn)(conn) #define RDMA_REGMEM(conn, buff, len, handle) \ (*(conn)->c_rdmamod->rdma_ops->rdma_regmem)(conn, buff, len, handle) #define RDMA_DEREGMEM(conn, buff, handle) \ (*(conn)->c_rdmamod->rdma_ops->rdma_deregmem)(conn, buff, handle) #define RDMA_REGMEMSYNC(conn, buff, len, handle, synchandle) \ (*(conn)->c_rdmamod->rdma_ops->rdma_regmemsync)(conn, buff, \ len, handle, synchandle) #define RDMA_DEREGMEMSYNC(conn, buff, handle, synchandle) \ (*(conn)->c_rdmamod->rdma_ops->rdma_deregmemsync)(conn, buff, \ handle, synchandle) #define RDMA_SYNCMEM(conn, handle, buff, len, direction) \ (*(conn)->c_rdmamod->rdma_ops->rdma_syncmem)(conn, handle, \ buff, len, direction) #define RDMA_BUF_ALLOC(conn, rbuf) \ (*(conn)->c_rdmamod->rdma_ops->rdma_buf_alloc)(conn, rbuf) #define RDMA_BUF_FREE(conn, rbuf) \ (*(conn)->c_rdmamod->rdma_ops->rdma_buf_free)(conn, rbuf) #define RDMA_SEND(conn, sendlist, xid) \ (*(conn)->c_rdmamod->rdma_ops->rdma_send)(conn, sendlist, xid) #define RDMA_SEND_RESP(conn, sendlist, xid) \ (*(conn)->c_rdmamod->rdma_ops->rdma_send_resp)(conn, sendlist, xid) #define RDMA_CLNT_RECVBUF(conn, cl, xid) \ (*(conn)->c_rdmamod->rdma_ops->rdma_clnt_recvbuf)(conn, cl, xid) #define RDMA_SVC_RECVBUF(conn, cl) \ (*(conn)->c_rdmamod->rdma_ops->rdma_svc_recvbuf)(conn, cl) #define RDMA_RECV(conn, recvlist, xid) \ (*(conn)->c_rdmamod->rdma_ops->rdma_recv)(conn, recvlist, xid) #define RDMA_READ(conn, cl, wait) \ (*(conn)->c_rdmamod->rdma_ops->rdma_read)(conn, cl, wait) #define RDMA_WRITE(conn, cl, wait) \ (*(conn)->c_rdmamod->rdma_ops->rdma_write)(conn, cl, wait) #define RDMA_GETINFO(rdma_mod, info) \ (*(rdma_mod)->rdma_ops->rdma_getinfo)(info) #ifdef _KERNEL extern rdma_registry_t *rdma_mod_head; extern krwlock_t rdma_lock; /* protects rdma_mod_head list */ extern int rdma_modloaded; /* flag for loading RDMA plugins */ extern int rdma_dev_available; /* rdma device is loaded or not */ extern kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */ extern uint_t rdma_minchunk; extern ldi_ident_t rpcmod_li; /* needed by layed driver framework */ /* * General RDMA routines */ extern void clist_add(struct clist **clp, uint32_t xdroff, int len, struct mrc *shandle, caddr_t saddr, struct mrc *dhandle, caddr_t daddr); extern void clist_free(struct clist *cl); extern int clist_register(CONN *conn, struct clist *cl, bool_t src); extern int clist_deregister(CONN *conn, struct clist *cl, bool_t src); rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid); rdma_stat rdma_svc_postrecv(CONN *conn); extern rdma_stat clist_syncmem(CONN *conn, struct clist *cl, bool_t src); extern rdma_stat rdma_register_mod(rdma_mod_t *mod); extern rdma_stat rdma_unregister_mod(rdma_mod_t *mod); extern void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf); extern int rdma_modload(); /* * RDMA XDR */ extern void xdrrdma_create(XDR *, caddr_t, uint_t, int, struct clist *, enum xdr_op, CONN *); extern void xdrrdma_destroy(XDR *); extern struct clist *xdrrdma_clist(XDR *); extern uint_t xdrrdma_getpos(XDR *); extern bool_t xdrrdma_setpos(XDR *, uint_t); extern bool_t xdr_clist(XDR *, clist *); extern bool_t xdr_do_clist(XDR *, clist **); extern uint_t xdr_getbufsize(XDR *); unsigned int xdrrdma_sizeof(xdrproc_t func, void *data, int min_chunk); unsigned int xdrrdma_authsize(AUTH *auth, struct cred *cred, int min_chunk); #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _RPC_RPC_RDMA_H */