xref: /illumos-gate/usr/src/uts/common/sys/ib/clients/rds/rdsib_ep.h (revision 00a3eaf3896a33935e11fd5c5fb5c1714225c067)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *	- Redistributions of source code must retain the above
39  *	  copyright notice, this list of conditions and the following
40  *	  disclaimer.
41  *
42  *	- Redistributions in binary form must reproduce the above
43  *	  copyright notice, this list of conditions and the following
44  *	  disclaimer in the documentation and/or other materials
45  *	  provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 /*
58  * Sun elects to include this software in Sun product
59  * under the OpenIB BSD license.
60  *
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72  * POSSIBILITY OF SUCH DAMAGE.
73  */
74 
75 #ifndef _RDSIB_EP_H
76 #define	_RDSIB_EP_H
77 
78 #ifdef __cplusplus
79 extern "C" {
80 #endif
81 
82 #include <netinet/in.h>
83 
84 /*
85  * Control channel or Data channel
86  */
87 typedef enum rds_ep_type_s {
88 	RDS_EP_TYPE_CTRL		= 1,
89 	RDS_EP_TYPE_DATA		= 2
90 } rds_ep_type_t;
91 
92 /*
93  * Channel States
94  *
95  * RDS_EP_STATE_UNCONNECTED - Initial state when rds_ep_t is created
96  * RDS_EP_STATE_ACTIVE_PENDING - Active side connection in progress
97  * RDS_EP_STATE_PASSIVE_PENDING - Passice side connection in progress
98  * RDS_EP_STATE_CONNECTED - Channel is connected
99  * RDS_EP_STATE_DESTROY_TIMEWAIT - Channel is closed
100  */
101 typedef enum rds_ep_state_s {
102 	RDS_EP_STATE_UNCONNECTED		= 0,
103 	RDS_EP_STATE_ACTIVE_PENDING		= 1,
104 	RDS_EP_STATE_PASSIVE_PENDING		= 2,
105 	RDS_EP_STATE_CONNECTED			= 3,
106 	RDS_EP_STATE_CLOSING			= 4,
107 	RDS_EP_STATE_CLOSED			= 5,
108 	RDS_EP_STATE_ERROR			= 6
109 } rds_ep_state_t;
110 
111 /*
112  * Session State Machine Diagram
113  *
114  *                     -----------------
115  *                    |       (6)       |
116  *                    |                 |
117  *                    v                 |
118  *             --> (Created)-------->(Failed)
119  *            |     |         (5)       ^
120  *            |     |(1)                |
121  *            |     |                   |(9)
122  *            |     v                   |
123  *            |    (Init)<--------------|
124  *            |     | |       (8)       |
125  *            |     | |                 |
126  *            |  (2)|  --------------   |
127  *        (11)|     |         (7)    |  |
128  *            |     v                v  |
129  *            |    (Connected)------>(Error)
130  *            |     |         (10)
131  *            |     |(3)
132  *            |     |
133  *            |     v
134  *            |    (Closed)
135  *            |     |
136  *            |     |(4)
137  *            |     |
138  *            |     v
139  *             --- (Fini) ------->(Destroy)
140  *                         (12)
141  *
142  *	(1) rds_session_init()
143  *	(2) rds_session_open()
144  *	(3) rds_session_close()
145  *	(4) rds_session_fini()
146  *	(4) rds_passive_session_fini()
147  *	(5) Failure in rds_session_init()
148  *	(6) rds_sendmsg(3SOCKET)/Incoming CM REQ
149  *	(7) Failure in rds_session_open()
150  *	(8) rds_session_close(), rds_get_ibaddr() and rds_session_reinit()
151  *	(9) rds_session_close() and rds_session_fini()
152  *	(9) rds_cleanup_passive_session() and rds_passive_session_fini()
153  *	(10) Connection Error/Incoming REQ
154  *	(11) rds_sendmsg(3SOCKET)/Incoming REQ
155  *
156  *
157  * Created   - Session is allocated and inserted into the sessionlist but
158  *             not all members are initialized.
159  * Init      - All members are initialized, send buffer pool is allocated.
160  * Connected - Data and ctrl RC channels are opened.
161  * Closed    - Data and ctrl RC channels are closed.
162  * Fini      - Send buffer pool and buffers in the receive pool are freed.
163  * Destroy   - Session is removed from the session list and is ready to be
164  *             freed.
165  * Failed    - Session initialization has failed (send buffer pool allocation).
166  * Error     - (1) Failed to open the RC channels.
167  *             (2) An error occurred on the RC channels while sending.
168  *             (3) Received a new CM REQ message on the existing connection.
169  */
170 typedef enum rds_session_state_s {
171 	RDS_SESSION_STATE_CREATED		= 0,
172 	RDS_SESSION_STATE_FAILED		= 1,
173 	RDS_SESSION_STATE_INIT			= 2,
174 	RDS_SESSION_STATE_CONNECTED		= 3,
175 	RDS_SESSION_STATE_HCA_CLOSING		= 4,
176 	RDS_SESSION_STATE_ERROR			= 5,
177 	RDS_SESSION_STATE_ACTIVE_CLOSING	= 6,
178 	RDS_SESSION_STATE_PASSIVE_CLOSING	= 7,
179 	RDS_SESSION_STATE_CLOSED		= 8,
180 	RDS_SESSION_STATE_FINI			= 9,
181 	RDS_SESSION_STATE_DESTROY		= 10
182 } rds_session_state_t;
183 
184 #define	RDS_SESSION_TRANSITION(sp, state)			\
185 		rw_enter(&sp->session_lock, RW_WRITER);		\
186 		sp->session_state = state;			\
187 		rw_exit(&sp->session_lock)
188 
189 /* Active or Passive */
190 #define	RDS_SESSION_ACTIVE	1
191 #define	RDS_SESSION_PASSIVE	2
192 
193 /*
194  * RDS QP Information
195  *
196  * lock  - Synchronize access
197  * depth - Max number of WRs that can be posted.
198  * level - Number of outstanding WRs in the QP
199  * lwm   - Water mark at which to post more receive WRs.
200  * taskqpending - Indicates if a taskq thread is dispatched to post receive
201  *		WRs in the RQ
202  */
203 typedef struct rds_qp_s {
204 	kmutex_t		qp_lock;
205 	uint32_t		qp_depth;
206 	uint32_t		qp_level;
207 	uint32_t		qp_lwm;
208 	boolean_t		qp_taskqpending;
209 } rds_qp_t;
210 
211 /*
212  * RDS EndPoint(One end of RC connection)
213  *
214  * sp        - Parent Session
215  * type      - Control or Data Channel
216  * remip     - Same as session_remip
217  * myip      - Same as session_myip
218  * snd_lkey  - LKey for the send buffer pool
219  * hca_guid  - HCA guid
220  * snd_mrhdl - Memory handle for the send buffer pool
221  * lock      - Protects the members
222  * state     - See rds_ep_state_t
223  * chanhdl   - RC channel handle
224  * sendcq    - Send CQ handle
225  * recvcq    - Recv CQ handle
226  * sndpool   - Send buffer Pool
227  * rcvpool   - Recv buffer Pool
228  * segfbp    - First packet of a segmented message.
229  * seglbp    - Last packet of a segmented message.
230  * lbufid    - Last successful buffer that was received by the remote.
231  *             Valid only during session failover/reconnect.
232  * rbufid    - Last buffer (remote buffer) that was received successfully
233  *             from the remote node.
234  * ds        - SGL used for send acknowledgement.
235  * ackwr     - WR to send acknowledgement.
236  * ackhdl    - Memory handle for 'ack_addr'.
237  * ack_rkey  - RKey for 'ack_addr'.
238  * ack_addr  - Memory region to receive RDMA acknowledgement from remote.
239  */
240 typedef struct rds_ep_s {
241 	struct rds_session_s	*ep_sp;
242 	rds_ep_type_t		ep_type;
243 	ipaddr_t		ep_remip;
244 	ipaddr_t		ep_myip;
245 	ibt_lkey_t		ep_snd_lkey;
246 	ib_guid_t		ep_hca_guid;
247 	ibt_mr_hdl_t		ep_snd_mrhdl;
248 	kmutex_t		ep_lock;
249 	rds_ep_state_t		ep_state;
250 	ibt_channel_hdl_t	ep_chanhdl;
251 	ibt_cq_hdl_t		ep_sendcq;
252 	ibt_cq_hdl_t		ep_recvcq;
253 	rds_bufpool_t		ep_sndpool;
254 	rds_bufpool_t		ep_rcvpool;
255 	rds_qp_t		ep_recvqp;
256 	uint_t			ep_rdmacnt;
257 	rds_buf_t		*ep_segfbp;
258 	rds_buf_t		*ep_seglbp;
259 	uintptr_t		ep_lbufid;
260 	uintptr_t		ep_rbufid;
261 	ibt_wr_ds_t		ep_ackds;
262 	ibt_send_wr_t		ep_ackwr;
263 	ibt_mr_hdl_t		ep_ackhdl;
264 	ibt_rkey_t		ep_ack_rkey;
265 	uintptr_t		ep_ack_addr;
266 } rds_ep_t;
267 
268 /*
269  * One end of an RDS session
270  *
271  * nextp   - Pointer to the next session in the session list.
272  *           This is protected by rds_state_t:rds_sessionlock.
273  * remip   - IP address of the node having the remote end of the session.
274  * myip    - IP address of this end of the session.
275  * lgid    - IB local (source) gid, hosting "myip".
276  * rgid    - IB remote (destination) gid, hosting "remip".
277  * lock    - Provides read/write access to members of the session.
278  * type    - Identifies which end of session (active or passive).
279  * state   - State of session (rds_session_state_t).
280  * dataep  - Data endpoint
281  * ctrlep  - Control endpoint
282  * failover- Flag to indicate that an error occured and the session is
283  *           re-connecting.
284  * portmap_lock - To serialize access to portmap.
285  * portmap - Bitmap of sockets.
286  *           The maximum number of sockets seem to be 65536, the portmap has
287  *           1 bit for each remote socket. A set bit indicates that the
288  *           corresponding remote socket is stalled and vice versa.
289  */
290 typedef struct rds_session_s {
291 	struct rds_session_s	*session_nextp;
292 	ipaddr_t		session_remip;
293 	ipaddr_t		session_myip;
294 	ib_guid_t		session_hca_guid;
295 	ib_gid_t		session_lgid;
296 	ib_gid_t		session_rgid;
297 	krwlock_t		session_lock;
298 	uint8_t			session_type;
299 	uint8_t			session_state;
300 	struct rds_ep_s		session_dataep;
301 	struct rds_ep_s		session_ctrlep;
302 	uint_t			session_failover;
303 	krwlock_t		session_local_portmap_lock;
304 	krwlock_t		session_remote_portmap_lock;
305 	uint8_t			session_local_portmap[RDS_PORT_MAP_SIZE];
306 	uint8_t			session_remote_portmap[RDS_PORT_MAP_SIZE];
307 	ibt_path_info_t		session_pinfo;
308 } rds_session_t;
309 
310 /* defined in rds_ep.c */
311 int rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid);
312 rds_session_t *rds_session_create(rds_state_t *statep, ipaddr_t destip,
313     ipaddr_t srcip, ibt_cm_req_rcv_t *reqp, uint8_t type);
314 int rds_session_init(rds_session_t *sp);
315 int rds_session_reinit(rds_session_t *sp, ib_gid_t lgid);
316 void rds_session_open(rds_session_t *sp);
317 void rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode,
318     uint_t wait);
319 rds_session_t *rds_session_lkup(rds_state_t *statep, ipaddr_t destip,
320     ib_guid_t node_guid);
321 void rds_recycle_session(rds_session_t *sp);
322 void rds_session_active(rds_session_t *sp);
323 void rds_close_sessions(void *arg);
324 void rds_received_msg(rds_ep_t *ep, rds_buf_t *bp);
325 void rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cp);
326 void rds_handle_send_error(rds_ep_t *ep);
327 void rds_session_fini(rds_session_t *sp);
328 void rds_passive_session_fini(rds_session_t *sp);
329 void rds_cleanup_passive_session(void *arg);
330 
331 /* defined in rds_ib.c */
332 ibt_channel_hdl_t rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port);
333 void rds_ep_free_rc_channel(rds_ep_t *ep);
334 void rds_post_recv_buf(void *arg);
335 void rds_poll_send_completions(ibt_cq_hdl_t cq, struct rds_ep_s *ep,
336     boolean_t lock);
337 
338 /* defined in rds_cm.c */
339 int rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
340     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl);
341 int rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode);
342 
343 int rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
344     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid);
345 
346 /* defined in rds_sc.c */
347 int rds_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip);
348 
349 #ifdef __cplusplus
350 }
351 #endif
352 
353 #endif	/* _RDSIB_EP_H */
354