1 /*-
2 * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3 *
4 * Copyright (c) 2015 - 2023 Intel Corporation
5 *
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenFabrics.org BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
14 * conditions are met:
15 *
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer.
19 *
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35 #ifndef IRDMA_CM_H
36 #define IRDMA_CM_H
37
38 #define IRDMA_MPA_REQUEST_ACCEPT 1
39 #define IRDMA_MPA_REQUEST_REJECT 2
40
41 /* IETF MPA -- defines */
42 #define IEFT_MPA_KEY_REQ "MPA ID Req Frame"
43 #define IEFT_MPA_KEY_REP "MPA ID Rep Frame"
44 #define IETF_MPA_KEY_SIZE 16
45 #define IETF_MPA_VER 1
46 #define IETF_MAX_PRIV_DATA_LEN 512
47 #define IETF_MPA_FRAME_SIZE 20
48 #define IETF_RTR_MSG_SIZE 4
49 #define IETF_MPA_V2_FLAG 0x10
50 #define SNDMARKER_SEQNMASK 0x000001ff
51 #define IRDMA_MAX_IETF_SIZE 32
52
53 /* IETF RTR MSG Fields */
54 #define IETF_PEER_TO_PEER 0x8000
55 #define IETF_FLPDU_ZERO_LEN 0x4000
56 #define IETF_RDMA0_WRITE 0x8000
57 #define IETF_RDMA0_READ 0x4000
58 #define IETF_NO_IRD_ORD 0x3fff
59
60 #define MAX_PORTS 65536
61
62 #define IRDMA_PASSIVE_STATE_INDICATED 0
63 #define IRDMA_DO_NOT_SEND_RESET_EVENT 1
64 #define IRDMA_SEND_RESET_EVENT 2
65
66 #define MAX_IRDMA_IFS 4
67
68 #define SET_ACK 1
69 #define SET_SYN 2
70 #define SET_FIN 4
71 #define SET_RST 8
72
73 #define TCP_OPTIONS_PADDING 3
74
75 #define IRDMA_DEFAULT_RETRYS 64
76 #define IRDMA_DEFAULT_RETRANS 32
77 #define IRDMA_DEFAULT_TTL 0x40
78 #define IRDMA_DEFAULT_RTT_VAR 6
79 #define IRDMA_DEFAULT_SS_THRESH 0x3fffffff
80 #define IRDMA_DEFAULT_REXMIT_THRESH 8
81
82 #define IRDMA_RETRY_TIMEOUT HZ
83 #define IRDMA_SHORT_TIME 10
84 #define IRDMA_LONG_TIME (2 * HZ)
85 #define IRDMA_MAX_TIMEOUT ((unsigned long)(12 * HZ))
86
87 #define IRDMA_CM_HASHTABLE_SIZE 1024
88 #define IRDMA_CM_TCP_TIMER_INTERVAL 3000
89 #define IRDMA_CM_DEFAULT_MTU 1540
90 #define IRDMA_CM_DEFAULT_FRAME_CNT 10
91 #define IRDMA_CM_THREAD_STACK_SIZE 256
92 #define IRDMA_CM_DEFAULT_RCV_WND 64240
93 #define IRDMA_CM_DEFAULT_RCV_WND_SCALED 0x3FFFC
94 #define IRDMA_CM_DEFAULT_RCV_WND_SCALE 2
95 #define IRDMA_CM_DEFAULT_FREE_PKTS 10
96 #define IRDMA_CM_FREE_PKT_LO_WATERMARK 2
97 #define IRDMA_CM_DEFAULT_MSS 536
98 #define IRDMA_CM_DEFAULT_MPA_VER 2
99 #define IRDMA_CM_DEFAULT_SEQ 0x159bf75f
100 #define IRDMA_CM_DEFAULT_LOCAL_ID 0x3b47
101 #define IRDMA_CM_DEFAULT_SEQ2 0x18ed5740
102 #define IRDMA_CM_DEFAULT_LOCAL_ID2 0xb807
103 #define IRDMA_MAX_CM_BUF (IRDMA_MAX_IETF_SIZE + IETF_MAX_PRIV_DATA_LEN)
104
105 enum ietf_mpa_flags {
106 IETF_MPA_FLAGS_REJECT = 0x20,
107 IETF_MPA_FLAGS_CRC = 0x40,
108 IETF_MPA_FLAGS_MARKERS = 0x80,
109 };
110
111 enum irdma_timer_type {
112 IRDMA_TIMER_TYPE_SEND,
113 IRDMA_TIMER_TYPE_CLOSE,
114 };
115
116 enum option_nums {
117 OPTION_NUM_EOL,
118 OPTION_NUM_NONE,
119 OPTION_NUM_MSS,
120 OPTION_NUM_WINDOW_SCALE,
121 OPTION_NUM_SACK_PERM,
122 OPTION_NUM_SACK,
123 OPTION_NUM_WRITE0 = 0xbc,
124 };
125
126 /* cm node transition states */
127 enum irdma_cm_node_state {
128 IRDMA_CM_STATE_UNKNOWN,
129 IRDMA_CM_STATE_INITED,
130 IRDMA_CM_STATE_LISTENING,
131 IRDMA_CM_STATE_SYN_RCVD,
132 IRDMA_CM_STATE_SYN_SENT,
133 IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED,
134 IRDMA_CM_STATE_ESTABLISHED,
135 IRDMA_CM_STATE_ACCEPTING,
136 IRDMA_CM_STATE_MPAREQ_SENT,
137 IRDMA_CM_STATE_MPAREQ_RCVD,
138 IRDMA_CM_STATE_MPAREJ_RCVD,
139 IRDMA_CM_STATE_OFFLOADED,
140 IRDMA_CM_STATE_FIN_WAIT1,
141 IRDMA_CM_STATE_FIN_WAIT2,
142 IRDMA_CM_STATE_CLOSE_WAIT,
143 IRDMA_CM_STATE_TIME_WAIT,
144 IRDMA_CM_STATE_LAST_ACK,
145 IRDMA_CM_STATE_CLOSING,
146 IRDMA_CM_STATE_LISTENER_DESTROYED,
147 IRDMA_CM_STATE_CLOSED,
148 };
149
150 enum mpa_frame_ver {
151 IETF_MPA_V1 = 1,
152 IETF_MPA_V2 = 2,
153 };
154
155 enum mpa_frame_key {
156 MPA_KEY_REQUEST,
157 MPA_KEY_REPLY,
158 };
159
160 enum send_rdma0 {
161 SEND_RDMA_READ_ZERO = 1,
162 SEND_RDMA_WRITE_ZERO = 2,
163 };
164
165 enum irdma_tcpip_pkt_type {
166 IRDMA_PKT_TYPE_UNKNOWN,
167 IRDMA_PKT_TYPE_SYN,
168 IRDMA_PKT_TYPE_SYNACK,
169 IRDMA_PKT_TYPE_ACK,
170 IRDMA_PKT_TYPE_FIN,
171 IRDMA_PKT_TYPE_RST,
172 };
173
174 enum irdma_cm_listener_state {
175 IRDMA_CM_LISTENER_PASSIVE_STATE = 1,
176 IRDMA_CM_LISTENER_ACTIVE_STATE = 2,
177 IRDMA_CM_LISTENER_EITHER_STATE = 3,
178 };
179
180 /* CM event codes */
181 enum irdma_cm_event_type {
182 IRDMA_CM_EVENT_UNKNOWN,
183 IRDMA_CM_EVENT_ESTABLISHED,
184 IRDMA_CM_EVENT_MPA_REQ,
185 IRDMA_CM_EVENT_MPA_CONNECT,
186 IRDMA_CM_EVENT_MPA_ACCEPT,
187 IRDMA_CM_EVENT_MPA_REJECT,
188 IRDMA_CM_EVENT_MPA_ESTABLISHED,
189 IRDMA_CM_EVENT_CONNECTED,
190 IRDMA_CM_EVENT_RESET,
191 IRDMA_CM_EVENT_ABORTED,
192 };
193
194 struct ietf_mpa_v1 {
195 u8 key[IETF_MPA_KEY_SIZE];
196 u8 flags;
197 u8 rev;
198 __be16 priv_data_len;
199 u8 priv_data[];
200 };
201
202 struct ietf_rtr_msg {
203 __be16 ctrl_ird;
204 __be16 ctrl_ord;
205 };
206
207 struct ietf_mpa_v2 {
208 u8 key[IETF_MPA_KEY_SIZE];
209 u8 flags;
210 u8 rev;
211 __be16 priv_data_len;
212 struct ietf_rtr_msg rtr_msg;
213 u8 priv_data[];
214 };
215
216 struct option_base {
217 u8 optionnum;
218 u8 len;
219 };
220
221 struct option_mss {
222 u8 optionnum;
223 u8 len;
224 __be16 mss;
225 };
226
227 struct option_windowscale {
228 u8 optionnum;
229 u8 len;
230 u8 shiftcount;
231 };
232
233 union all_known_options {
234 char eol;
235 struct option_base base;
236 struct option_mss mss;
237 struct option_windowscale windowscale;
238 };
239
240 struct irdma_timer_entry {
241 struct list_head list;
242 unsigned long timetosend; /* jiffies */
243 struct irdma_puda_buf *sqbuf;
244 u32 type;
245 u32 retrycount;
246 u32 retranscount;
247 u32 context;
248 u32 send_retrans;
249 int close_when_complete;
250 };
251
252 /* CM context params */
253 struct irdma_cm_tcp_context {
254 u8 client;
255 u32 loc_seq_num;
256 u32 loc_ack_num;
257 u32 rem_ack_num;
258 u32 rcv_nxt;
259 u32 loc_id;
260 u32 rem_id;
261 u32 snd_wnd;
262 u32 max_snd_wnd;
263 u32 rcv_wnd;
264 u32 mss;
265 u8 snd_wscale;
266 u8 rcv_wscale;
267 };
268
269 struct irdma_apbvt_entry {
270 struct hlist_node hlist;
271 u32 use_cnt;
272 u16 port;
273 };
274
275 struct irdma_cm_listener {
276 struct list_head list;
277 struct iw_cm_id *cm_id;
278 struct irdma_cm_core *cm_core;
279 struct irdma_device *iwdev;
280 struct list_head child_listen_list;
281 struct irdma_apbvt_entry *apbvt_entry;
282 enum irdma_cm_listener_state listener_state;
283 atomic_t refcnt;
284 atomic_t pend_accepts_cnt;
285 u32 loc_addr[4];
286 u32 reused_node;
287 int backlog;
288 u16 loc_port;
289 u16 vlan_id;
290 u8 loc_mac[ETHER_ADDR_LEN];
291 u8 user_pri;
292 u8 tos;
293 bool qhash_set:1;
294 bool ipv4:1;
295 };
296
297 struct irdma_kmem_info {
298 void *addr;
299 u32 size;
300 };
301
302 struct irdma_mpa_priv_info {
303 const void *addr;
304 u32 size;
305 };
306
307 struct irdma_cm_node {
308 struct irdma_qp *iwqp;
309 struct irdma_device *iwdev;
310 struct irdma_sc_dev *dev;
311 struct irdma_cm_tcp_context tcp_cntxt;
312 struct irdma_cm_core *cm_core;
313 struct irdma_timer_entry *send_entry;
314 struct irdma_timer_entry *close_entry;
315 struct irdma_cm_listener *listener;
316 struct list_head timer_entry;
317 struct list_head reset_entry;
318 struct list_head teardown_entry;
319 struct irdma_apbvt_entry *apbvt_entry;
320 struct rcu_head rcu_head;
321 struct irdma_mpa_priv_info pdata;
322 struct irdma_sc_ah *ah;
323 struct irdma_kmem_info mpa_hdr;
324 struct iw_cm_id *cm_id;
325 struct hlist_node list;
326 struct completion establish_comp;
327 spinlock_t retrans_list_lock; /* protect CM node rexmit updates*/
328 atomic_t passive_state;
329 atomic_t refcnt;
330 enum irdma_cm_node_state state;
331 enum send_rdma0 send_rdma0_op;
332 enum mpa_frame_ver mpa_frame_rev;
333 u32 loc_addr[4], rem_addr[4];
334 u16 loc_port, rem_port;
335 int apbvt_set;
336 int accept_pend;
337 u16 vlan_id;
338 u16 ird_size;
339 u16 ord_size;
340 u16 mpav2_ird_ord;
341 u16 lsmm_size;
342 u8 pdata_buf[IETF_MAX_PRIV_DATA_LEN];
343 u8 loc_mac[ETHER_ADDR_LEN];
344 u8 rem_mac[ETHER_ADDR_LEN];
345 u8 user_pri;
346 u8 tos;
347 bool ack_rcvd:1;
348 bool qhash_set:1;
349 bool ipv4:1;
350 bool snd_mark_en:1;
351 bool rcv_mark_en:1;
352 bool do_lpb:1;
353 bool accelerated:1;
354 struct ietf_mpa_v2 mpa_v2_frame;
355 };
356
357 /* Used by internal CM APIs to pass CM information*/
358 struct irdma_cm_info {
359 struct iw_cm_id *cm_id;
360 struct irdma_cqp_request *cqp_request;
361 u16 loc_port;
362 u16 rem_port;
363 u32 loc_addr[4];
364 u32 rem_addr[4];
365 u32 qh_qpid;
366 u16 vlan_id;
367 int backlog;
368 u8 user_pri;
369 u8 tos;
370 bool ipv4;
371 };
372
373 struct irdma_cm_event {
374 enum irdma_cm_event_type type;
375 struct irdma_cm_info cm_info;
376 struct work_struct event_work;
377 struct irdma_cm_node *cm_node;
378 };
379
380 struct irdma_cm_core {
381 struct irdma_device *iwdev;
382 struct irdma_sc_dev *dev;
383 struct list_head listen_list;
384 DECLARE_HASHTABLE(cm_hash_tbl, 8);
385 DECLARE_HASHTABLE(apbvt_hash_tbl, 8);
386 struct timer_list tcp_timer;
387 struct workqueue_struct *event_wq;
388 spinlock_t ht_lock; /* protect CM node (active side) list */
389 spinlock_t listen_list_lock; /* protect listener list */
390 spinlock_t apbvt_lock; /*serialize apbvt add/del entries*/
391 u64 stats_nodes_created;
392 u64 stats_nodes_destroyed;
393 u64 stats_listen_created;
394 u64 stats_listen_destroyed;
395 u64 stats_listen_nodes_created;
396 u64 stats_listen_nodes_destroyed;
397 u64 stats_lpbs;
398 u64 stats_accepts;
399 u64 stats_rejects;
400 u64 stats_connect_errs;
401 u64 stats_passive_errs;
402 u64 stats_pkt_retrans;
403 u64 stats_backlog_drops;
404 struct irdma_puda_buf *(*form_cm_frame)(struct irdma_cm_node *cm_node,
405 struct irdma_kmem_info *options,
406 struct irdma_kmem_info *hdr,
407 struct irdma_mpa_priv_info *pdata,
408 u8 flags);
409 int (*cm_create_ah)(struct irdma_cm_node *cm_node, bool wait);
410 void (*cm_free_ah)(struct irdma_cm_node *cm_node);
411 };
412
413 struct irdma_add_mqh_cbs {
414 struct irdma_device *iwdev;
415 struct irdma_cm_info *cm_info;
416 struct irdma_cm_listener *cm_listen_node;
417 };
418
419 int irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
420 struct irdma_puda_buf *sqbuf,
421 enum irdma_timer_type type, int send_retrans,
422 int close_when_complete);
423
irdma_tos2dscp(u8 tos)424 static inline u8 irdma_tos2dscp(u8 tos)
425 {
426 #define IRDMA_DSCP_S 2
427 #define IRDMA_DSCP GENMASK(7, 2)
428 return FIELD_GET(IRDMA_DSCP, tos);
429 }
430
431 int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
432 int irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
433 int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
434 int irdma_create_listen(struct iw_cm_id *cm_id, int backlog);
435 int irdma_destroy_listen(struct iw_cm_id *cm_id);
436 int irdma_add_arp(struct irdma_pci_f *rf, u32 *ip, const u8 *mac);
437 int irdma_cm_start(struct irdma_device *dev);
438 int irdma_cm_stop(struct irdma_device *dev);
439 bool irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr);
440 bool irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr);
441 int irdma_arp_table(struct irdma_pci_f *rf, u32 *ip_addr,
442 const u8 *mac_addr, u32 action);
443 bool irdma_port_in_use(struct irdma_cm_core *cm_core, u16 port);
444 void irdma_send_ack(struct irdma_cm_node *cm_node);
445 void irdma_lpb_nop(struct irdma_sc_qp *qp);
446 void irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node);
447 void irdma_add_conn_est_qh(struct irdma_cm_node *cm_node);
448 #endif /* IRDMA_CM_H */
449