xref: /freebsd/sys/dev/irdma/irdma_cm.c (revision d09a955a605d03471c5ab7bd17b8a6186fdc148c)
1 /*-
2  * SPDX-License-Identifier: GPL-2.0 or Linux-OpenIB
3  *
4  * Copyright (c) 2015 - 2022 Intel Corporation
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenFabrics.org BSD license below:
11  *
12  *   Redistribution and use in source and binary forms, with or
13  *   without modification, are permitted provided that the following
14  *   conditions are met:
15  *
16  *    - Redistributions of source code must retain the above
17  *	copyright notice, this list of conditions and the following
18  *	disclaimer.
19  *
20  *    - Redistributions in binary form must reproduce the above
21  *	copyright notice, this list of conditions and the following
22  *	disclaimer in the documentation and/or other materials
23  *	provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34 /*$FreeBSD$*/
35 
36 #include "irdma_main.h"
37 
38 static void irdma_cm_post_event(struct irdma_cm_event *event);
39 static void irdma_disconnect_worker(struct work_struct *work);
40 
41 /**
42  * irdma_free_sqbuf - put back puda buffer if refcount is 0
43  * @vsi: The VSI structure of the device
44  * @bufp: puda buffer to free
45  */
46 void
47 irdma_free_sqbuf(struct irdma_sc_vsi *vsi, void *bufp)
48 {
49 	struct irdma_puda_buf *buf = bufp;
50 	struct irdma_puda_rsrc *ilq = vsi->ilq;
51 
52 	if (atomic_dec_and_test(&buf->refcount))
53 		irdma_puda_ret_bufpool(ilq, buf);
54 }
55 
56 /**
57  * irdma_record_ird_ord - Record IRD/ORD passed in
58  * @cm_node: connection's node
59  * @conn_ird: connection IRD
60  * @conn_ord: connection ORD
61  */
62 static void
63 irdma_record_ird_ord(struct irdma_cm_node *cm_node, u32 conn_ird,
64 		     u32 conn_ord)
65 {
66 	if (conn_ird > cm_node->dev->hw_attrs.max_hw_ird)
67 		conn_ird = cm_node->dev->hw_attrs.max_hw_ird;
68 
69 	if (conn_ord > cm_node->dev->hw_attrs.max_hw_ord)
70 		conn_ord = cm_node->dev->hw_attrs.max_hw_ord;
71 	else if (!conn_ord && cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO)
72 		conn_ord = 1;
73 	cm_node->ird_size = conn_ird;
74 	cm_node->ord_size = conn_ord;
75 }
76 
77 /**
78  * irdma_copy_ip_ntohl - copy IP address from  network to host
79  * @dst: IP address in host order
80  * @src: IP address in network order (big endian)
81  */
82 void
83 irdma_copy_ip_ntohl(u32 *dst, __be32 *src)
84 {
85 	*dst++ = ntohl(*src++);
86 	*dst++ = ntohl(*src++);
87 	*dst++ = ntohl(*src++);
88 	*dst = ntohl(*src);
89 }
90 
91 /**
92  * irdma_copy_ip_htonl - copy IP address from host to network order
93  * @dst: IP address in network order (big endian)
94  * @src: IP address in host order
95  */
96 void
97 irdma_copy_ip_htonl(__be32 *dst, u32 *src)
98 {
99 	*dst++ = htonl(*src++);
100 	*dst++ = htonl(*src++);
101 	*dst++ = htonl(*src++);
102 	*dst = htonl(*src);
103 }
104 
105 /**
106  * irdma_get_addr_info
107  * @cm_node: contains ip/tcp info
108  * @cm_info: to get a copy of the cm_node ip/tcp info
109  */
110 static void
111 irdma_get_addr_info(struct irdma_cm_node *cm_node,
112 		    struct irdma_cm_info *cm_info)
113 {
114 	memset(cm_info, 0, sizeof(*cm_info));
115 	cm_info->ipv4 = cm_node->ipv4;
116 	cm_info->vlan_id = cm_node->vlan_id;
117 	memcpy(cm_info->loc_addr, cm_node->loc_addr, sizeof(cm_info->loc_addr));
118 	memcpy(cm_info->rem_addr, cm_node->rem_addr, sizeof(cm_info->rem_addr));
119 	cm_info->loc_port = cm_node->loc_port;
120 	cm_info->rem_port = cm_node->rem_port;
121 }
122 
123 /**
124  * irdma_fill_sockaddr4 - fill in addr info for IPv4 connection
125  * @cm_node: connection's node
126  * @event: upper layer's cm event
127  */
128 static inline void
129 irdma_fill_sockaddr4(struct irdma_cm_node *cm_node,
130 		     struct iw_cm_event *event)
131 {
132 	struct sockaddr_in *laddr = (struct sockaddr_in *)&event->local_addr;
133 	struct sockaddr_in *raddr = (struct sockaddr_in *)&event->remote_addr;
134 
135 	laddr->sin_family = AF_INET;
136 	raddr->sin_family = AF_INET;
137 
138 	laddr->sin_port = htons(cm_node->loc_port);
139 	raddr->sin_port = htons(cm_node->rem_port);
140 
141 	laddr->sin_addr.s_addr = htonl(cm_node->loc_addr[0]);
142 	raddr->sin_addr.s_addr = htonl(cm_node->rem_addr[0]);
143 }
144 
145 /**
146  * irdma_fill_sockaddr6 - fill in addr info for IPv6 connection
147  * @cm_node: connection's node
148  * @event: upper layer's cm event
149  */
150 static inline void
151 irdma_fill_sockaddr6(struct irdma_cm_node *cm_node,
152 		     struct iw_cm_event *event)
153 {
154 	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)&event->local_addr;
155 	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)&event->remote_addr;
156 
157 	laddr6->sin6_family = AF_INET6;
158 	raddr6->sin6_family = AF_INET6;
159 
160 	laddr6->sin6_port = htons(cm_node->loc_port);
161 	raddr6->sin6_port = htons(cm_node->rem_port);
162 
163 	irdma_copy_ip_htonl(laddr6->sin6_addr.__u6_addr.__u6_addr32,
164 			    cm_node->loc_addr);
165 	irdma_copy_ip_htonl(raddr6->sin6_addr.__u6_addr.__u6_addr32,
166 			    cm_node->rem_addr);
167 }
168 
169 /**
170  * irdma_get_cmevent_info - for cm event upcall
171  * @cm_node: connection's node
172  * @cm_id: upper layers cm struct for the event
173  * @event: upper layer's cm event
174  */
175 static inline void
176 irdma_get_cmevent_info(struct irdma_cm_node *cm_node,
177 		       struct iw_cm_id *cm_id,
178 		       struct iw_cm_event *event)
179 {
180 	memcpy(&event->local_addr, &cm_id->m_local_addr,
181 	       sizeof(event->local_addr));
182 	memcpy(&event->remote_addr, &cm_id->m_remote_addr,
183 	       sizeof(event->remote_addr));
184 	if (cm_node) {
185 		event->private_data = cm_node->pdata_buf;
186 		event->private_data_len = (u8)cm_node->pdata.size;
187 		event->ird = cm_node->ird_size;
188 		event->ord = cm_node->ord_size;
189 	}
190 }
191 
192 /**
193  * irdma_send_cm_event - upcall cm's event handler
194  * @cm_node: connection's node
195  * @cm_id: upper layer's cm info struct
196  * @type: Event type to indicate
197  * @status: status for the event type
198  */
199 static int
200 irdma_send_cm_event(struct irdma_cm_node *cm_node,
201 		    struct iw_cm_id *cm_id,
202 		    enum iw_cm_event_type type, int status)
203 {
204 	struct iw_cm_event event = {0};
205 
206 	event.event = type;
207 	event.status = status;
208 
209 	irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
210 		    "cm_node %p cm_id=%p state=%d accel=%d event_type=%d status=%d\n",
211 		    cm_node, cm_id, cm_node->accelerated, cm_node->state, type,
212 		    status);
213 
214 	switch (type) {
215 	case IW_CM_EVENT_CONNECT_REQUEST:
216 		if (cm_node->ipv4)
217 			irdma_fill_sockaddr4(cm_node, &event);
218 		else
219 			irdma_fill_sockaddr6(cm_node, &event);
220 		event.provider_data = cm_node;
221 		event.private_data = cm_node->pdata_buf;
222 		event.private_data_len = (u8)cm_node->pdata.size;
223 		event.ird = cm_node->ird_size;
224 		break;
225 	case IW_CM_EVENT_CONNECT_REPLY:
226 		irdma_get_cmevent_info(cm_node, cm_id, &event);
227 		break;
228 	case IW_CM_EVENT_ESTABLISHED:
229 		event.ird = cm_node->ird_size;
230 		event.ord = cm_node->ord_size;
231 		break;
232 	case IW_CM_EVENT_DISCONNECT:
233 	case IW_CM_EVENT_CLOSE:
234 		/* Wait if we are in RTS but havent issued the iwcm event upcall */
235 		if (!cm_node->accelerated)
236 			wait_for_completion(&cm_node->establish_comp);
237 		break;
238 	default:
239 		return -EINVAL;
240 	}
241 
242 	return cm_id->event_handler(cm_id, &event);
243 }
244 
245 /**
246  * irdma_timer_list_prep - add connection nodes to a list to perform timer tasks
247  * @cm_core: cm's core
248  * @timer_list: a timer list to which cm_node will be selected
249  */
250 static void
251 irdma_timer_list_prep(struct irdma_cm_core *cm_core,
252 		      struct list_head *timer_list)
253 {
254 	struct irdma_cm_node *cm_node;
255 	int bkt;
256 
257 	HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) {
258 		if ((cm_node->close_entry || cm_node->send_entry) &&
259 		    atomic_inc_not_zero(&cm_node->refcnt))
260 			list_add(&cm_node->timer_entry, timer_list);
261 	}
262 }
263 
264 /**
265  * irdma_create_event - create cm event
266  * @cm_node: connection's node
267  * @type: Event type to generate
268  */
269 static struct irdma_cm_event *
270 irdma_create_event(struct irdma_cm_node *cm_node,
271 		   enum irdma_cm_event_type type)
272 {
273 	struct irdma_cm_event *event;
274 
275 	if (!cm_node->cm_id)
276 		return NULL;
277 
278 	event = kzalloc(sizeof(*event), GFP_ATOMIC);
279 
280 	if (!event)
281 		return NULL;
282 
283 	event->type = type;
284 	event->cm_node = cm_node;
285 	memcpy(event->cm_info.rem_addr, cm_node->rem_addr,
286 	       sizeof(event->cm_info.rem_addr));
287 	memcpy(event->cm_info.loc_addr, cm_node->loc_addr,
288 	       sizeof(event->cm_info.loc_addr));
289 	event->cm_info.rem_port = cm_node->rem_port;
290 	event->cm_info.loc_port = cm_node->loc_port;
291 	event->cm_info.cm_id = cm_node->cm_id;
292 	irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
293 		    "node=%p event=%p type=%u dst=%pI4 src=%pI4\n",
294 		    cm_node,
295 		    event, type, event->cm_info.loc_addr,
296 		    event->cm_info.rem_addr);
297 	irdma_cm_post_event(event);
298 
299 	return event;
300 }
301 
302 /**
303  * irdma_free_retrans_entry - free send entry
304  * @cm_node: connection's node
305  */
306 static void
307 irdma_free_retrans_entry(struct irdma_cm_node *cm_node)
308 {
309 	struct irdma_device *iwdev = cm_node->iwdev;
310 	struct irdma_timer_entry *send_entry;
311 
312 	send_entry = cm_node->send_entry;
313 	if (!send_entry)
314 		return;
315 
316 	cm_node->send_entry = NULL;
317 	irdma_free_sqbuf(&iwdev->vsi, send_entry->sqbuf);
318 	kfree(send_entry);
319 	atomic_dec(&cm_node->refcnt);
320 }
321 
322 /**
323  * irdma_cleanup_retrans_entry - free send entry with lock
324  * @cm_node: connection's node
325  */
326 static void
327 irdma_cleanup_retrans_entry(struct irdma_cm_node *cm_node)
328 {
329 	unsigned long flags;
330 
331 	spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
332 	irdma_free_retrans_entry(cm_node);
333 	spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
334 }
335 
336 /**
337  * irdma_form_ah_cm_frame - get a free packet and build frame with address handle
338  * @cm_node: connection's node ionfo to use in frame
339  * @options: pointer to options info
340  * @hdr: pointer mpa header
341  * @pdata: pointer to private data
342  * @flags:  indicates FIN or ACK
343  */
344 static struct irdma_puda_buf *
345 irdma_form_ah_cm_frame(struct irdma_cm_node *cm_node,
346 		       struct irdma_kmem_info *options,
347 		       struct irdma_kmem_info *hdr,
348 		       struct irdma_mpa_priv_info *pdata,
349 		       u8 flags)
350 {
351 	struct irdma_puda_buf *sqbuf;
352 	struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
353 	u8 *buf;
354 	struct tcphdr *tcph;
355 	u16 pktsize;
356 	u32 opts_len = 0;
357 	u32 pd_len = 0;
358 	u32 hdr_len = 0;
359 
360 	if (!cm_node->ah || !cm_node->ah->ah_info.ah_valid) {
361 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "AH invalid\n");
362 		return NULL;
363 	}
364 
365 	sqbuf = irdma_puda_get_bufpool(vsi->ilq);
366 	if (!sqbuf) {
367 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "SQ buf NULL\n");
368 		return NULL;
369 	}
370 
371 	sqbuf->ah_id = cm_node->ah->ah_info.ah_idx;
372 	buf = sqbuf->mem.va;
373 	if (options)
374 		opts_len = (u32)options->size;
375 
376 	if (hdr)
377 		hdr_len = hdr->size;
378 
379 	if (pdata)
380 		pd_len = pdata->size;
381 
382 	pktsize = sizeof(*tcph) + opts_len + hdr_len + pd_len;
383 
384 	memset(buf, 0, pktsize);
385 
386 	sqbuf->totallen = pktsize;
387 	sqbuf->tcphlen = sizeof(*tcph) + opts_len;
388 	sqbuf->scratch = cm_node;
389 
390 	tcph = (struct tcphdr *)buf;
391 	buf += sizeof(*tcph);
392 
393 	tcph->th_sport = htons(cm_node->loc_port);
394 	tcph->th_dport = htons(cm_node->rem_port);
395 	tcph->th_seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
396 
397 	if (flags & SET_ACK) {
398 		cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
399 		tcph->th_ack = htonl(cm_node->tcp_cntxt.loc_ack_num);
400 		tcph->th_flags |= TH_ACK;
401 	} else {
402 		tcph->th_ack = 0;
403 	}
404 
405 	if (flags & SET_SYN) {
406 		cm_node->tcp_cntxt.loc_seq_num++;
407 		tcph->th_flags |= TH_SYN;
408 	} else {
409 		cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
410 	}
411 
412 	if (flags & SET_FIN) {
413 		cm_node->tcp_cntxt.loc_seq_num++;
414 		tcph->th_flags |= TH_FIN;
415 	}
416 
417 	if (flags & SET_RST)
418 		tcph->th_flags |= TH_RST;
419 
420 	tcph->th_off = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
421 	sqbuf->tcphlen = tcph->th_off << 2;
422 	tcph->th_win = htons(cm_node->tcp_cntxt.rcv_wnd);
423 	tcph->th_urp = 0;
424 
425 	if (opts_len) {
426 		memcpy(buf, options->addr, opts_len);
427 		buf += opts_len;
428 	}
429 
430 	if (hdr_len) {
431 		memcpy(buf, hdr->addr, hdr_len);
432 		buf += hdr_len;
433 	}
434 
435 	if (pdata && pdata->addr)
436 		memcpy(buf, pdata->addr, pdata->size);
437 
438 	atomic_set(&sqbuf->refcount, 1);
439 
440 	irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER",
441 			sqbuf->mem.va, sqbuf->totallen);
442 
443 	return sqbuf;
444 }
445 
446 /**
447  * irdma_form_uda_cm_frame - get a free packet and build frame full tcpip packet
448  * @cm_node: connection's node ionfo to use in frame
449  * @options: pointer to options info
450  * @hdr: pointer mpa header
451  * @pdata: pointer to private data
452  * @flags:  indicates FIN or ACK
453  */
454 static struct irdma_puda_buf *
455 irdma_form_uda_cm_frame(struct irdma_cm_node *cm_node,
456 			struct irdma_kmem_info *options,
457 			struct irdma_kmem_info *hdr,
458 			struct irdma_mpa_priv_info *pdata,
459 			u8 flags)
460 {
461 	struct irdma_puda_buf *sqbuf;
462 	struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
463 	u8 *buf;
464 
465 	struct tcphdr *tcph;
466 	struct ip *iph;
467 	struct ip6_hdr *ip6h;
468 	struct ether_header *ethh;
469 	u16 pktsize;
470 	u16 eth_hlen = ETH_HLEN;
471 	u32 opts_len = 0;
472 	u32 pd_len = 0;
473 	u32 hdr_len = 0;
474 
475 	u16 vtag;
476 
477 	sqbuf = irdma_puda_get_bufpool(vsi->ilq);
478 	if (!sqbuf)
479 		return NULL;
480 
481 	buf = sqbuf->mem.va;
482 
483 	if (options)
484 		opts_len = (u32)options->size;
485 
486 	if (hdr)
487 		hdr_len = hdr->size;
488 
489 	if (pdata)
490 		pd_len = pdata->size;
491 
492 	if (cm_node->vlan_id < VLAN_N_VID)
493 		eth_hlen += 4;
494 
495 	if (cm_node->ipv4)
496 		pktsize = sizeof(*iph) + sizeof(*tcph);
497 	else
498 		pktsize = sizeof(*ip6h) + sizeof(*tcph);
499 	pktsize += opts_len + hdr_len + pd_len;
500 
501 	memset(buf, 0, eth_hlen + pktsize);
502 
503 	sqbuf->totallen = pktsize + eth_hlen;
504 	sqbuf->maclen = eth_hlen;
505 	sqbuf->tcphlen = sizeof(*tcph) + opts_len;
506 	sqbuf->scratch = cm_node;
507 
508 	ethh = (struct ether_header *)buf;
509 	buf += eth_hlen;
510 
511 	if (cm_node->do_lpb)
512 		sqbuf->do_lpb = true;
513 
514 	if (cm_node->ipv4) {
515 		sqbuf->ipv4 = true;
516 
517 		iph = (struct ip *)buf;
518 		buf += sizeof(*iph);
519 		tcph = (struct tcphdr *)buf;
520 		buf += sizeof(*tcph);
521 
522 		ether_addr_copy(ethh->ether_dhost, cm_node->rem_mac);
523 		ether_addr_copy(ethh->ether_shost, cm_node->loc_mac);
524 		if (cm_node->vlan_id < VLAN_N_VID) {
525 			((struct ether_vlan_header *)ethh)->evl_proto =
526 			    htons(ETH_P_8021Q);
527 			vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) |
528 			    cm_node->vlan_id;
529 			((struct ether_vlan_header *)ethh)->evl_tag = htons(vtag);
530 
531 			((struct ether_vlan_header *)ethh)->evl_encap_proto =
532 			    htons(ETH_P_IP);
533 		} else {
534 			ethh->ether_type = htons(ETH_P_IP);
535 		}
536 
537 		iph->ip_v = IPVERSION;
538 		iph->ip_hl = 5;	/* 5 * 4Byte words, IP headr len */
539 		iph->ip_tos = cm_node->tos;
540 		iph->ip_len = htons(pktsize);
541 		iph->ip_id = htons(++cm_node->tcp_cntxt.loc_id);
542 
543 		iph->ip_off = htons(0x4000);
544 		iph->ip_ttl = 0x40;
545 		iph->ip_p = IPPROTO_TCP;
546 		iph->ip_src.s_addr = htonl(cm_node->loc_addr[0]);
547 		iph->ip_dst.s_addr = htonl(cm_node->rem_addr[0]);
548 	} else {
549 		sqbuf->ipv4 = false;
550 		ip6h = (struct ip6_hdr *)buf;
551 		buf += sizeof(*ip6h);
552 		tcph = (struct tcphdr *)buf;
553 		buf += sizeof(*tcph);
554 
555 		ether_addr_copy(ethh->ether_dhost, cm_node->rem_mac);
556 		ether_addr_copy(ethh->ether_shost, cm_node->loc_mac);
557 		if (cm_node->vlan_id < VLAN_N_VID) {
558 			((struct ether_vlan_header *)ethh)->evl_proto =
559 			    htons(ETH_P_8021Q);
560 			vtag = (cm_node->user_pri << VLAN_PRIO_SHIFT) |
561 			    cm_node->vlan_id;
562 			((struct ether_vlan_header *)ethh)->evl_tag = htons(vtag);
563 			((struct ether_vlan_header *)ethh)->evl_encap_proto =
564 			    htons(ETH_P_IPV6);
565 		} else {
566 			ethh->ether_type = htons(ETH_P_IPV6);
567 		}
568 		ip6h->ip6_vfc = 6 << 4;
569 		ip6h->ip6_vfc |= cm_node->tos >> 4;
570 		ip6h->ip6_flow = cm_node->tos << 20;
571 		ip6h->ip6_plen = htons(pktsize - sizeof(*ip6h));
572 		ip6h->ip6_nxt = 6;
573 		ip6h->ip6_hops = 128;
574 		irdma_copy_ip_htonl(ip6h->ip6_src.__u6_addr.__u6_addr32,
575 				    cm_node->loc_addr);
576 		irdma_copy_ip_htonl(ip6h->ip6_dst.__u6_addr.__u6_addr32,
577 				    cm_node->rem_addr);
578 	}
579 
580 	tcph->th_sport = htons(cm_node->loc_port);
581 	tcph->th_dport = htons(cm_node->rem_port);
582 	tcph->th_seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
583 
584 	if (flags & SET_ACK) {
585 		cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
586 		tcph->th_ack = htonl(cm_node->tcp_cntxt.loc_ack_num);
587 		tcph->th_flags |= TH_ACK;
588 	} else {
589 		tcph->th_ack = 0;
590 	}
591 
592 	if (flags & SET_SYN) {
593 		cm_node->tcp_cntxt.loc_seq_num++;
594 		tcph->th_flags |= TH_SYN;
595 	} else {
596 		cm_node->tcp_cntxt.loc_seq_num += hdr_len + pd_len;
597 	}
598 
599 	if (flags & SET_FIN) {
600 		cm_node->tcp_cntxt.loc_seq_num++;
601 		tcph->th_flags |= TH_FIN;
602 	}
603 
604 	if (flags & SET_RST)
605 		tcph->th_flags |= TH_RST;
606 
607 	tcph->th_off = (u16)((sizeof(*tcph) + opts_len + 3) >> 2);
608 	sqbuf->tcphlen = tcph->th_off << 2;
609 	tcph->th_win = htons(cm_node->tcp_cntxt.rcv_wnd);
610 	tcph->th_urp = 0;
611 
612 	if (opts_len) {
613 		memcpy(buf, options->addr, opts_len);
614 		buf += opts_len;
615 	}
616 
617 	if (hdr_len) {
618 		memcpy(buf, hdr->addr, hdr_len);
619 		buf += hdr_len;
620 	}
621 
622 	if (pdata && pdata->addr)
623 		memcpy(buf, pdata->addr, pdata->size);
624 
625 	atomic_set(&sqbuf->refcount, 1);
626 
627 	irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "TRANSMIT ILQ BUFFER",
628 			sqbuf->mem.va, sqbuf->totallen);
629 
630 	return sqbuf;
631 }
632 
633 /**
634  * irdma_send_reset - Send RST packet
635  * @cm_node: connection's node
636  */
637 int
638 irdma_send_reset(struct irdma_cm_node *cm_node)
639 {
640 	struct irdma_puda_buf *sqbuf;
641 	int flags = SET_RST | SET_ACK;
642 
643 	sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
644 						flags);
645 	if (!sqbuf)
646 		return -ENOMEM;
647 
648 	irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
649 		    "caller: %pS cm_node %p cm_id=%p accel=%d state=%d rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n",
650 		    __builtin_return_address(0), cm_node, cm_node->cm_id,
651 		    cm_node->accelerated, cm_node->state, cm_node->rem_port,
652 		    cm_node->loc_port, cm_node->rem_addr, cm_node->loc_addr);
653 
654 	return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 0,
655 				       1);
656 }
657 
658 /**
659  * irdma_active_open_err - send event for active side cm error
660  * @cm_node: connection's node
661  * @reset: Flag to send reset or not
662  */
663 static void
664 irdma_active_open_err(struct irdma_cm_node *cm_node, bool reset)
665 {
666 	irdma_cleanup_retrans_entry(cm_node);
667 	cm_node->cm_core->stats_connect_errs++;
668 	if (reset) {
669 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
670 			    IRDMA_DEBUG_CM, "cm_node=%p state=%d\n", cm_node,
671 			    cm_node->state);
672 		atomic_inc(&cm_node->refcnt);
673 		irdma_send_reset(cm_node);
674 	}
675 
676 	cm_node->state = IRDMA_CM_STATE_CLOSED;
677 	irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
678 }
679 
680 /**
681  * irdma_passive_open_err - handle passive side cm error
682  * @cm_node: connection's node
683  * @reset: send reset or just free cm_node
684  */
685 static void
686 irdma_passive_open_err(struct irdma_cm_node *cm_node, bool reset)
687 {
688 	irdma_cleanup_retrans_entry(cm_node);
689 	cm_node->cm_core->stats_passive_errs++;
690 	cm_node->state = IRDMA_CM_STATE_CLOSED;
691 	irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
692 		    "cm_node=%p state =%d\n",
693 		    cm_node, cm_node->state);
694 	if (reset)
695 		irdma_send_reset(cm_node);
696 	else
697 		irdma_rem_ref_cm_node(cm_node);
698 }
699 
700 /**
701  * irdma_event_connect_error - to create connect error event
702  * @event: cm information for connect event
703  */
704 static void
705 irdma_event_connect_error(struct irdma_cm_event *event)
706 {
707 	struct irdma_qp *iwqp;
708 	struct iw_cm_id *cm_id;
709 
710 	cm_id = event->cm_node->cm_id;
711 	if (!cm_id)
712 		return;
713 
714 	iwqp = cm_id->provider_data;
715 
716 	if (!iwqp || !iwqp->iwdev)
717 		return;
718 
719 	iwqp->cm_id = NULL;
720 	cm_id->provider_data = NULL;
721 	irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
722 			    -ECONNRESET);
723 	irdma_rem_ref_cm_node(event->cm_node);
724 }
725 
726 /**
727  * irdma_process_options - process options from TCP header
728  * @cm_node: connection's node
729  * @optionsloc: point to start of options
730  * @optionsize: size of all options
731  * @syn_pkt: flag if syn packet
732  */
733 static int
734 irdma_process_options(struct irdma_cm_node *cm_node, u8 *optionsloc,
735 		      u32 optionsize, u32 syn_pkt)
736 {
737 	u32 tmp;
738 	u32 offset = 0;
739 	union all_known_options *all_options;
740 	char got_mss_option = 0;
741 
742 	while (offset < optionsize) {
743 		all_options = (union all_known_options *)(optionsloc + offset);
744 		switch (all_options->base.optionnum) {
745 		case OPTION_NUM_EOL:
746 			offset = optionsize;
747 			break;
748 		case OPTION_NUM_NONE:
749 			offset += 1;
750 			continue;
751 		case OPTION_NUM_MSS:
752 			irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
753 				    "MSS Length: %d Offset: %d Size: %d\n",
754 				    all_options->mss.len, offset, optionsize);
755 			got_mss_option = 1;
756 			if (all_options->mss.len != 4)
757 				return -EINVAL;
758 			tmp = ntohs(all_options->mss.mss);
759 			if ((cm_node->ipv4 &&
760 			     (tmp + IRDMA_MTU_TO_MSS_IPV4) < IRDMA_MIN_MTU_IPV4) ||
761 			    (!cm_node->ipv4 &&
762 			     (tmp + IRDMA_MTU_TO_MSS_IPV6) < IRDMA_MIN_MTU_IPV6))
763 				return -EINVAL;
764 			if (tmp < cm_node->tcp_cntxt.mss)
765 				cm_node->tcp_cntxt.mss = tmp;
766 			break;
767 		case OPTION_NUM_WINDOW_SCALE:
768 			cm_node->tcp_cntxt.snd_wscale =
769 			    all_options->windowscale.shiftcount;
770 			break;
771 		default:
772 			irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
773 				    "Unsupported TCP Option: %x\n",
774 				    all_options->base.optionnum);
775 			break;
776 		}
777 		offset += all_options->base.len;
778 	}
779 	if (!got_mss_option && syn_pkt)
780 		cm_node->tcp_cntxt.mss = IRDMA_CM_DEFAULT_MSS;
781 
782 	return 0;
783 }
784 
785 /**
786  * irdma_handle_tcp_options - setup TCP context info after parsing TCP options
787  * @cm_node: connection's node
788  * @tcph: pointer tcp header
789  * @optionsize: size of options rcvd
790  * @passive: active or passive flag
791  */
792 static int
793 irdma_handle_tcp_options(struct irdma_cm_node *cm_node,
794 			 struct tcphdr *tcph, int optionsize,
795 			 int passive)
796 {
797 	u8 *optionsloc = (u8 *)&tcph[1];
798 	int ret;
799 
800 	if (optionsize) {
801 		ret = irdma_process_options(cm_node, optionsloc, optionsize,
802 					    (u32)tcph->th_flags & TH_SYN);
803 		if (ret) {
804 			irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
805 				    "Node %p, Sending Reset\n",
806 				    cm_node);
807 			if (passive)
808 				irdma_passive_open_err(cm_node, true);
809 			else
810 				irdma_active_open_err(cm_node, true);
811 			return ret;
812 		}
813 	}
814 
815 	cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->th_win)
816 	    << cm_node->tcp_cntxt.snd_wscale;
817 
818 	if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
819 		cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
820 
821 	return 0;
822 }
823 
824 /**
825  * irdma_build_mpa_v1 - build a MPA V1 frame
826  * @cm_node: connection's node
827  * @start_addr: address where to build frame
828  * @mpa_key: to do read0 or write0
829  */
830 static void
831 irdma_build_mpa_v1(struct irdma_cm_node *cm_node, void *start_addr,
832 		   u8 mpa_key)
833 {
834 	struct ietf_mpa_v1 *mpa_frame = start_addr;
835 
836 	switch (mpa_key) {
837 	case MPA_KEY_REQUEST:
838 		memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
839 		break;
840 	case MPA_KEY_REPLY:
841 		memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
842 		break;
843 	default:
844 		break;
845 	}
846 	mpa_frame->flags = IETF_MPA_FLAGS_CRC;
847 	mpa_frame->rev = cm_node->mpa_frame_rev;
848 	mpa_frame->priv_data_len = htons(cm_node->pdata.size);
849 }
850 
851 /**
852  * irdma_build_mpa_v2 - build a MPA V2 frame
853  * @cm_node: connection's node
854  * @start_addr: buffer start address
855  * @mpa_key: to do read0 or write0
856  */
857 static void
858 irdma_build_mpa_v2(struct irdma_cm_node *cm_node, void *start_addr,
859 		   u8 mpa_key)
860 {
861 	struct ietf_mpa_v2 *mpa_frame = start_addr;
862 	struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
863 	u16 ctrl_ird, ctrl_ord;
864 
865 	/* initialize the upper 5 bytes of the frame */
866 	irdma_build_mpa_v1(cm_node, start_addr, mpa_key);
867 	mpa_frame->flags |= IETF_MPA_V2_FLAG;
868 	if (cm_node->iwdev->iw_ooo) {
869 		mpa_frame->flags |= IETF_MPA_FLAGS_MARKERS;
870 		cm_node->rcv_mark_en = true;
871 	}
872 	mpa_frame->priv_data_len = cpu_to_be16(be16_to_cpu(mpa_frame->priv_data_len) +
873 					       IETF_RTR_MSG_SIZE);
874 
875 	/* initialize RTR msg */
876 	if (cm_node->mpav2_ird_ord == IETF_NO_IRD_ORD) {
877 		ctrl_ird = IETF_NO_IRD_ORD;
878 		ctrl_ord = IETF_NO_IRD_ORD;
879 	} else {
880 		ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
881 		    IETF_NO_IRD_ORD :
882 		    cm_node->ird_size;
883 		ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
884 		    IETF_NO_IRD_ORD :
885 		    cm_node->ord_size;
886 	}
887 	ctrl_ird |= IETF_PEER_TO_PEER;
888 
889 	switch (mpa_key) {
890 	case MPA_KEY_REQUEST:
891 		ctrl_ord |= IETF_RDMA0_WRITE;
892 		ctrl_ord |= IETF_RDMA0_READ;
893 		break;
894 	case MPA_KEY_REPLY:
895 		switch (cm_node->send_rdma0_op) {
896 		case SEND_RDMA_WRITE_ZERO:
897 			ctrl_ord |= IETF_RDMA0_WRITE;
898 			break;
899 		case SEND_RDMA_READ_ZERO:
900 			ctrl_ord |= IETF_RDMA0_READ;
901 			break;
902 		}
903 		break;
904 	default:
905 		break;
906 	}
907 	rtr_msg->ctrl_ird = htons(ctrl_ird);
908 	rtr_msg->ctrl_ord = htons(ctrl_ord);
909 }
910 
911 /**
912  * irdma_cm_build_mpa_frame - build mpa frame for mpa version 1 or version 2
913  * @cm_node: connection's node
914  * @mpa: mpa: data buffer
915  * @mpa_key: to do read0 or write0
916  */
917 static int
918 irdma_cm_build_mpa_frame(struct irdma_cm_node *cm_node,
919 			 struct irdma_kmem_info *mpa, u8 mpa_key)
920 {
921 	int hdr_len = 0;
922 
923 	switch (cm_node->mpa_frame_rev) {
924 	case IETF_MPA_V1:
925 		hdr_len = sizeof(struct ietf_mpa_v1);
926 		irdma_build_mpa_v1(cm_node, mpa->addr, mpa_key);
927 		break;
928 	case IETF_MPA_V2:
929 		hdr_len = sizeof(struct ietf_mpa_v2);
930 		irdma_build_mpa_v2(cm_node, mpa->addr, mpa_key);
931 		break;
932 	default:
933 		break;
934 	}
935 
936 	return hdr_len;
937 }
938 
939 /**
940  * irdma_send_mpa_request - active node send mpa request to passive node
941  * @cm_node: connection's node
942  */
943 static int
944 irdma_send_mpa_request(struct irdma_cm_node *cm_node)
945 {
946 	struct irdma_puda_buf *sqbuf;
947 
948 	cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame;
949 	cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
950 							 &cm_node->mpa_hdr,
951 							 MPA_KEY_REQUEST);
952 	if (!cm_node->mpa_hdr.size) {
953 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
954 			    "mpa size = %d\n",
955 			    cm_node->mpa_hdr.size);
956 		return -EINVAL;
957 	}
958 
959 	sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL,
960 						&cm_node->mpa_hdr,
961 						&cm_node->pdata, SET_ACK);
962 	if (!sqbuf)
963 		return -ENOMEM;
964 
965 	return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
966 				       0);
967 }
968 
969 /**
970  * irdma_send_mpa_reject -
971  * @cm_node: connection's node
972  * @pdata: reject data for connection
973  * @plen: length of reject data
974  */
975 static int
976 irdma_send_mpa_reject(struct irdma_cm_node *cm_node,
977 		      const void *pdata, u8 plen)
978 {
979 	struct irdma_puda_buf *sqbuf;
980 	struct irdma_mpa_priv_info priv_info;
981 
982 	cm_node->mpa_hdr.addr = &cm_node->mpa_v2_frame;
983 	cm_node->mpa_hdr.size = irdma_cm_build_mpa_frame(cm_node,
984 							 &cm_node->mpa_hdr,
985 							 MPA_KEY_REPLY);
986 
987 	cm_node->mpa_v2_frame.flags |= IETF_MPA_FLAGS_REJECT;
988 	priv_info.addr = pdata;
989 	priv_info.size = plen;
990 
991 	sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL,
992 						&cm_node->mpa_hdr, &priv_info,
993 						SET_ACK | SET_FIN);
994 	if (!sqbuf)
995 		return -ENOMEM;
996 
997 	cm_node->state = IRDMA_CM_STATE_FIN_WAIT1;
998 
999 	return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
1000 				       0);
1001 }
1002 
1003 /**
1004  * irdma_negotiate_mpa_v2_ird_ord - negotiate MPAv2 IRD/ORD
1005  * @cm_node: connection's node
1006  * @buf: Data pointer
1007  */
1008 static int
1009 irdma_negotiate_mpa_v2_ird_ord(struct irdma_cm_node *cm_node,
1010 			       u8 *buf)
1011 {
1012 	struct ietf_mpa_v2 *mpa_v2_frame;
1013 	struct ietf_rtr_msg *rtr_msg;
1014 	u16 ird_size;
1015 	u16 ord_size;
1016 	u16 ctrl_ord;
1017 	u16 ctrl_ird;
1018 
1019 	mpa_v2_frame = (struct ietf_mpa_v2 *)buf;
1020 	rtr_msg = &mpa_v2_frame->rtr_msg;
1021 
1022 	/* parse rtr message */
1023 	ctrl_ord = ntohs(rtr_msg->ctrl_ord);
1024 	ctrl_ird = ntohs(rtr_msg->ctrl_ird);
1025 	ird_size = ctrl_ird & IETF_NO_IRD_ORD;
1026 	ord_size = ctrl_ord & IETF_NO_IRD_ORD;
1027 
1028 	if (!(ctrl_ird & IETF_PEER_TO_PEER))
1029 		return -EOPNOTSUPP;
1030 
1031 	if (ird_size == IETF_NO_IRD_ORD || ord_size == IETF_NO_IRD_ORD) {
1032 		cm_node->mpav2_ird_ord = IETF_NO_IRD_ORD;
1033 		goto negotiate_done;
1034 	}
1035 
1036 	if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
1037 		/* responder */
1038 		if (!ord_size && (ctrl_ord & IETF_RDMA0_READ))
1039 			cm_node->ird_size = 1;
1040 		if (cm_node->ord_size > ird_size)
1041 			cm_node->ord_size = ird_size;
1042 	} else {
1043 		/* initiator */
1044 		if (!ird_size && (ctrl_ord & IETF_RDMA0_READ))
1045 			/* Remote peer doesn't support RDMA0_READ */
1046 			return -EOPNOTSUPP;
1047 
1048 		if (cm_node->ord_size > ird_size)
1049 			cm_node->ord_size = ird_size;
1050 
1051 		if (cm_node->ird_size < ord_size)
1052 			/* no resources available */
1053 			return -EINVAL;
1054 	}
1055 
1056 negotiate_done:
1057 	if (ctrl_ord & IETF_RDMA0_READ)
1058 		cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
1059 	else if (ctrl_ord & IETF_RDMA0_WRITE)
1060 		cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
1061 	else
1062 		/* Not supported RDMA0 operation */
1063 		return -EOPNOTSUPP;
1064 
1065 	irdma_debug(&cm_node->iwdev->rf->sc_dev,
1066 		    IRDMA_DEBUG_CM, "MPAV2 Negotiated ORD: %d, IRD: %d\n",
1067 		    cm_node->ord_size, cm_node->ird_size);
1068 	return 0;
1069 }
1070 
1071 /**
1072  * irdma_parse_mpa - process an IETF MPA frame
1073  * @cm_node: connection's node
1074  * @buf: Data pointer
1075  * @type: to return accept or reject
1076  * @len: Len of mpa buffer
1077  */
1078 static int
1079 irdma_parse_mpa(struct irdma_cm_node *cm_node, u8 *buf, u32 *type,
1080 		u32 len)
1081 {
1082 	struct ietf_mpa_v1 *mpa_frame;
1083 	int mpa_hdr_len, priv_data_len, ret;
1084 
1085 	*type = IRDMA_MPA_REQUEST_ACCEPT;
1086 
1087 	if (len < sizeof(struct ietf_mpa_v1)) {
1088 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
1089 			    IRDMA_DEBUG_CM, "ietf buffer small (%x)\n", len);
1090 		return -EINVAL;
1091 	}
1092 
1093 	mpa_frame = (struct ietf_mpa_v1 *)buf;
1094 	mpa_hdr_len = sizeof(struct ietf_mpa_v1);
1095 	priv_data_len = ntohs(mpa_frame->priv_data_len);
1096 
1097 	if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) {
1098 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1099 			    "private_data too big %d\n",
1100 			    priv_data_len);
1101 		return -EOVERFLOW;
1102 	}
1103 
1104 	if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) {
1105 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1106 			    "unsupported mpa rev = %d\n",
1107 			    mpa_frame->rev);
1108 		return -EINVAL;
1109 	}
1110 
1111 	if (mpa_frame->rev > cm_node->mpa_frame_rev) {
1112 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1113 			    "rev %d\n",
1114 			    mpa_frame->rev);
1115 		return -EINVAL;
1116 	}
1117 
1118 	cm_node->mpa_frame_rev = mpa_frame->rev;
1119 	if (cm_node->state != IRDMA_CM_STATE_MPAREQ_SENT) {
1120 		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ,
1121 			   IETF_MPA_KEY_SIZE)) {
1122 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
1123 				    IRDMA_DEBUG_CM, "Unexpected MPA Key received\n");
1124 			return -EINVAL;
1125 		}
1126 	} else {
1127 		if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REP,
1128 			   IETF_MPA_KEY_SIZE)) {
1129 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
1130 				    IRDMA_DEBUG_CM, "Unexpected MPA Key received\n");
1131 			return -EINVAL;
1132 		}
1133 	}
1134 
1135 	if (priv_data_len + mpa_hdr_len > len) {
1136 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
1137 			    IRDMA_DEBUG_CM, "ietf buffer len(%x + %x != %x)\n",
1138 			    priv_data_len, mpa_hdr_len, len);
1139 		return -EOVERFLOW;
1140 	}
1141 
1142 	if (len > IRDMA_MAX_CM_BUF) {
1143 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
1144 			    IRDMA_DEBUG_CM, "ietf buffer large len = %d\n", len);
1145 		return -EOVERFLOW;
1146 	}
1147 
1148 	switch (mpa_frame->rev) {
1149 	case IETF_MPA_V2:
1150 		mpa_hdr_len += IETF_RTR_MSG_SIZE;
1151 		ret = irdma_negotiate_mpa_v2_ird_ord(cm_node, buf);
1152 		if (ret)
1153 			return ret;
1154 		break;
1155 	case IETF_MPA_V1:
1156 	default:
1157 		break;
1158 	}
1159 
1160 	memcpy(cm_node->pdata_buf, buf + mpa_hdr_len, priv_data_len);
1161 	cm_node->pdata.size = priv_data_len;
1162 
1163 	if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
1164 		*type = IRDMA_MPA_REQUEST_REJECT;
1165 
1166 	if (mpa_frame->flags & IETF_MPA_FLAGS_MARKERS)
1167 		cm_node->snd_mark_en = true;
1168 
1169 	return 0;
1170 }
1171 
1172 /**
1173  * irdma_schedule_cm_timer
1174  * @cm_node: connection's node
1175  * @sqbuf: buffer to send
1176  * @type: if it is send or close
1177  * @send_retrans: if rexmits to be done
1178  * @close_when_complete: is cm_node to be removed
1179  *
1180  * note - cm_node needs to be protected before calling this. Encase in:
1181  *		irdma_rem_ref_cm_node(cm_core, cm_node);
1182  *		irdma_schedule_cm_timer(...)
1183  *		atomic_inc(&cm_node->refcnt);
1184  */
1185 int
1186 irdma_schedule_cm_timer(struct irdma_cm_node *cm_node,
1187 			struct irdma_puda_buf *sqbuf,
1188 			enum irdma_timer_type type, int send_retrans,
1189 			int close_when_complete)
1190 {
1191 	struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
1192 	struct irdma_cm_core *cm_core = cm_node->cm_core;
1193 	struct irdma_timer_entry *new_send;
1194 	u32 was_timer_set;
1195 	unsigned long flags;
1196 
1197 	new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
1198 	if (!new_send) {
1199 		if (type != IRDMA_TIMER_TYPE_CLOSE)
1200 			irdma_free_sqbuf(vsi, sqbuf);
1201 		return -ENOMEM;
1202 	}
1203 
1204 	new_send->retrycount = IRDMA_DEFAULT_RETRYS;
1205 	new_send->retranscount = IRDMA_DEFAULT_RETRANS;
1206 	new_send->sqbuf = sqbuf;
1207 	new_send->timetosend = jiffies;
1208 	new_send->type = type;
1209 	new_send->send_retrans = send_retrans;
1210 	new_send->close_when_complete = close_when_complete;
1211 
1212 	if (type == IRDMA_TIMER_TYPE_CLOSE) {
1213 		new_send->timetosend += (HZ / 10);
1214 		if (cm_node->close_entry) {
1215 			kfree(new_send);
1216 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
1217 				    IRDMA_DEBUG_CM, "already close entry\n");
1218 			return -EINVAL;
1219 		}
1220 
1221 		cm_node->close_entry = new_send;
1222 	} else {		/* type == IRDMA_TIMER_TYPE_SEND */
1223 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
1224 		cm_node->send_entry = new_send;
1225 		atomic_inc(&cm_node->refcnt);
1226 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
1227 		new_send->timetosend = jiffies + IRDMA_RETRY_TIMEOUT;
1228 
1229 		atomic_inc(&sqbuf->refcount);
1230 		irdma_puda_send_buf(vsi->ilq, sqbuf);
1231 		if (!send_retrans) {
1232 			irdma_cleanup_retrans_entry(cm_node);
1233 			if (close_when_complete)
1234 				irdma_rem_ref_cm_node(cm_node);
1235 			return 0;
1236 		}
1237 	}
1238 
1239 	spin_lock_irqsave(&cm_core->ht_lock, flags);
1240 	was_timer_set = timer_pending(&cm_core->tcp_timer);
1241 
1242 	if (!was_timer_set) {
1243 		cm_core->tcp_timer.expires = new_send->timetosend;
1244 		add_timer(&cm_core->tcp_timer);
1245 	}
1246 	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
1247 
1248 	return 0;
1249 }
1250 
1251 /**
1252  * irdma_retrans_expired - Could not rexmit the packet
1253  * @cm_node: connection's node
1254  */
1255 static void
1256 irdma_retrans_expired(struct irdma_cm_node *cm_node)
1257 {
1258 	enum irdma_cm_node_state state = cm_node->state;
1259 
1260 	cm_node->state = IRDMA_CM_STATE_CLOSED;
1261 	switch (state) {
1262 	case IRDMA_CM_STATE_SYN_RCVD:
1263 	case IRDMA_CM_STATE_CLOSING:
1264 		irdma_rem_ref_cm_node(cm_node);
1265 		break;
1266 	case IRDMA_CM_STATE_FIN_WAIT1:
1267 	case IRDMA_CM_STATE_LAST_ACK:
1268 		irdma_send_reset(cm_node);
1269 		break;
1270 	default:
1271 		atomic_inc(&cm_node->refcnt);
1272 		irdma_send_reset(cm_node);
1273 		irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
1274 		break;
1275 	}
1276 }
1277 
1278 /**
1279  * irdma_handle_close_entry - for handling retry/timeouts
1280  * @cm_node: connection's node
1281  * @rem_node: flag for remove cm_node
1282  */
1283 static void
1284 irdma_handle_close_entry(struct irdma_cm_node *cm_node,
1285 			 u32 rem_node)
1286 {
1287 	struct irdma_timer_entry *close_entry = cm_node->close_entry;
1288 	struct irdma_qp *iwqp;
1289 	unsigned long flags;
1290 
1291 	if (!close_entry)
1292 		return;
1293 	iwqp = (struct irdma_qp *)close_entry->sqbuf;
1294 	if (iwqp) {
1295 		spin_lock_irqsave(&iwqp->lock, flags);
1296 		if (iwqp->cm_id) {
1297 			iwqp->hw_tcp_state = IRDMA_TCP_STATE_CLOSED;
1298 			iwqp->hw_iwarp_state = IRDMA_QP_STATE_ERROR;
1299 			iwqp->last_aeq = IRDMA_AE_RESET_SENT;
1300 			iwqp->ibqp_state = IB_QPS_ERR;
1301 			spin_unlock_irqrestore(&iwqp->lock, flags);
1302 			irdma_cm_disconn(iwqp);
1303 		} else {
1304 			spin_unlock_irqrestore(&iwqp->lock, flags);
1305 		}
1306 	} else if (rem_node) {
1307 		/* TIME_WAIT state */
1308 		irdma_rem_ref_cm_node(cm_node);
1309 	}
1310 
1311 	kfree(close_entry);
1312 	cm_node->close_entry = NULL;
1313 }
1314 
1315 /**
1316  * irdma_cm_timer_tick - system's timer expired callback
1317  * @t: Pointer to timer_list
1318  */
1319 static void
1320 irdma_cm_timer_tick(struct timer_list *t)
1321 {
1322 	unsigned long nexttimeout = jiffies + IRDMA_LONG_TIME;
1323 	struct irdma_cm_node *cm_node;
1324 	struct irdma_timer_entry *send_entry, *close_entry;
1325 	struct list_head *list_core_temp;
1326 	struct list_head *list_node;
1327 	struct irdma_cm_core *cm_core = from_timer(cm_core, t, tcp_timer);
1328 	struct irdma_sc_vsi *vsi;
1329 	u32 settimer = 0;
1330 	unsigned long timetosend;
1331 	unsigned long flags;
1332 	struct list_head timer_list;
1333 
1334 	INIT_LIST_HEAD(&timer_list);
1335 
1336 	rcu_read_lock();
1337 	irdma_timer_list_prep(cm_core, &timer_list);
1338 	rcu_read_unlock();
1339 
1340 	list_for_each_safe(list_node, list_core_temp, &timer_list) {
1341 		cm_node = container_of(list_node, struct irdma_cm_node,
1342 				       timer_entry);
1343 		close_entry = cm_node->close_entry;
1344 
1345 		if (close_entry) {
1346 			if (time_after(close_entry->timetosend, jiffies)) {
1347 				if (nexttimeout > close_entry->timetosend ||
1348 				    !settimer) {
1349 					nexttimeout = close_entry->timetosend;
1350 					settimer = 1;
1351 				}
1352 			} else {
1353 				irdma_handle_close_entry(cm_node, 1);
1354 			}
1355 		}
1356 
1357 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
1358 
1359 		send_entry = cm_node->send_entry;
1360 		if (!send_entry)
1361 			goto done;
1362 		if (time_after(send_entry->timetosend, jiffies)) {
1363 			if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
1364 				if (nexttimeout > send_entry->timetosend ||
1365 				    !settimer) {
1366 					nexttimeout = send_entry->timetosend;
1367 					settimer = 1;
1368 				}
1369 			} else {
1370 				irdma_free_retrans_entry(cm_node);
1371 			}
1372 			goto done;
1373 		}
1374 
1375 		if (cm_node->state == IRDMA_CM_STATE_OFFLOADED ||
1376 		    cm_node->state == IRDMA_CM_STATE_CLOSED) {
1377 			irdma_free_retrans_entry(cm_node);
1378 			goto done;
1379 		}
1380 
1381 		if (!send_entry->retranscount || !send_entry->retrycount) {
1382 			irdma_free_retrans_entry(cm_node);
1383 
1384 			spin_unlock_irqrestore(&cm_node->retrans_list_lock,
1385 					       flags);
1386 			irdma_retrans_expired(cm_node);
1387 			cm_node->state = IRDMA_CM_STATE_CLOSED;
1388 			spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
1389 			goto done;
1390 		}
1391 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
1392 
1393 		vsi = &cm_node->iwdev->vsi;
1394 		if (!cm_node->ack_rcvd) {
1395 			atomic_inc(&send_entry->sqbuf->refcount);
1396 			irdma_puda_send_buf(vsi->ilq, send_entry->sqbuf);
1397 			cm_node->cm_core->stats_pkt_retrans++;
1398 		}
1399 
1400 		spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
1401 		if (send_entry->send_retrans) {
1402 			send_entry->retranscount--;
1403 			timetosend = (IRDMA_RETRY_TIMEOUT <<
1404 				      (IRDMA_DEFAULT_RETRANS -
1405 				       send_entry->retranscount));
1406 
1407 			send_entry->timetosend = jiffies +
1408 			    min(timetosend, IRDMA_MAX_TIMEOUT);
1409 			if (nexttimeout > send_entry->timetosend || !settimer) {
1410 				nexttimeout = send_entry->timetosend;
1411 				settimer = 1;
1412 			}
1413 		} else {
1414 			int close_when_complete;
1415 
1416 			close_when_complete = send_entry->close_when_complete;
1417 			irdma_free_retrans_entry(cm_node);
1418 			if (close_when_complete)
1419 				irdma_rem_ref_cm_node(cm_node);
1420 		}
1421 done:
1422 		spin_unlock_irqrestore(&cm_node->retrans_list_lock, flags);
1423 		irdma_rem_ref_cm_node(cm_node);
1424 	}
1425 
1426 	if (settimer) {
1427 		spin_lock_irqsave(&cm_core->ht_lock, flags);
1428 		if (!timer_pending(&cm_core->tcp_timer)) {
1429 			cm_core->tcp_timer.expires = nexttimeout;
1430 			add_timer(&cm_core->tcp_timer);
1431 		}
1432 		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
1433 	}
1434 }
1435 
1436 /**
1437  * irdma_send_syn - send SYN packet
1438  * @cm_node: connection's node
1439  * @sendack: flag to set ACK bit or not
1440  */
1441 int
1442 irdma_send_syn(struct irdma_cm_node *cm_node, u32 sendack)
1443 {
1444 	struct irdma_puda_buf *sqbuf;
1445 	int flags = SET_SYN;
1446 	char optionsbuf[sizeof(struct option_mss) +
1447 			sizeof(struct option_windowscale) +
1448 			sizeof(struct option_base) + TCP_OPTIONS_PADDING];
1449 	struct irdma_kmem_info opts;
1450 	int optionssize = 0;
1451 	/* Sending MSS option */
1452 	union all_known_options *options;
1453 
1454 	opts.addr = optionsbuf;
1455 	if (!cm_node)
1456 		return -EINVAL;
1457 
1458 	options = (union all_known_options *)&optionsbuf[optionssize];
1459 	options->mss.optionnum = OPTION_NUM_MSS;
1460 	options->mss.len = sizeof(struct option_mss);
1461 	options->mss.mss = htons(cm_node->tcp_cntxt.mss);
1462 	optionssize += sizeof(struct option_mss);
1463 
1464 	options = (union all_known_options *)&optionsbuf[optionssize];
1465 	options->windowscale.optionnum = OPTION_NUM_WINDOW_SCALE;
1466 	options->windowscale.len = sizeof(struct option_windowscale);
1467 	options->windowscale.shiftcount = cm_node->tcp_cntxt.rcv_wscale;
1468 	optionssize += sizeof(struct option_windowscale);
1469 	options = (union all_known_options *)&optionsbuf[optionssize];
1470 	options->eol = OPTION_NUM_EOL;
1471 	optionssize += 1;
1472 
1473 	if (sendack)
1474 		flags |= SET_ACK;
1475 
1476 	opts.size = optionssize;
1477 
1478 	sqbuf = cm_node->cm_core->form_cm_frame(cm_node, &opts, NULL, NULL,
1479 						flags);
1480 	if (!sqbuf)
1481 		return -ENOMEM;
1482 
1483 	return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
1484 				       0);
1485 }
1486 
1487 /**
1488  * irdma_send_ack - Send ACK packet
1489  * @cm_node: connection's node
1490  */
1491 void
1492 irdma_send_ack(struct irdma_cm_node *cm_node)
1493 {
1494 	struct irdma_puda_buf *sqbuf;
1495 	struct irdma_sc_vsi *vsi = &cm_node->iwdev->vsi;
1496 
1497 	sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
1498 						SET_ACK);
1499 	if (sqbuf)
1500 		irdma_puda_send_buf(vsi->ilq, sqbuf);
1501 }
1502 
1503 /**
1504  * irdma_send_fin - Send FIN pkt
1505  * @cm_node: connection's node
1506  */
1507 static int
1508 irdma_send_fin(struct irdma_cm_node *cm_node)
1509 {
1510 	struct irdma_puda_buf *sqbuf;
1511 
1512 	sqbuf = cm_node->cm_core->form_cm_frame(cm_node, NULL, NULL, NULL,
1513 						SET_ACK | SET_FIN);
1514 	if (!sqbuf)
1515 		return -ENOMEM;
1516 
1517 	return irdma_schedule_cm_timer(cm_node, sqbuf, IRDMA_TIMER_TYPE_SEND, 1,
1518 				       0);
1519 }
1520 
1521 /**
1522  * irdma_find_listener - find a cm node listening on this addr-port pair
1523  * @cm_core: cm's core
1524  * @dst_addr: listener ip addr
1525  * @ipv4: flag indicating IPv4 when true
1526  * @dst_port: listener tcp port num
1527  * @vlan_id: virtual LAN ID
1528  * @listener_state: state to match with listen node's
1529  */
1530 static struct irdma_cm_listener *
1531 irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, bool ipv4, u16 dst_port,
1532 		    u16 vlan_id, enum irdma_cm_listener_state listener_state)
1533 {
1534 	struct irdma_cm_listener *listen_node;
1535 	static const u32 ip_zero[4] = {0, 0, 0, 0};
1536 	u32 listen_addr[4];
1537 	u16 listen_port;
1538 	unsigned long flags;
1539 
1540 	/* walk list and find cm_node associated with this session ID */
1541 	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
1542 	list_for_each_entry(listen_node, &cm_core->listen_list, list) {
1543 		memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr));
1544 		listen_port = listen_node->loc_port;
1545 		if (listen_node->ipv4 != ipv4 || listen_port != dst_port ||
1546 		    !(listener_state & listen_node->listener_state))
1547 			continue;
1548 		/* compare node pair, return node handle if a match */
1549 		if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) ||
1550 		    (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) &&
1551 		     vlan_id == listen_node->vlan_id)) {
1552 			atomic_inc(&listen_node->refcnt);
1553 			spin_unlock_irqrestore(&cm_core->listen_list_lock,
1554 					       flags);
1555 			return listen_node;
1556 		}
1557 	}
1558 	spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1559 
1560 	return NULL;
1561 }
1562 
1563 /**
1564  * irdma_del_multiple_qhash - Remove qhash and child listens
1565  * @iwdev: iWarp device
1566  * @cm_info: CM info for parent listen node
1567  * @cm_parent_listen_node: The parent listen node
1568  */
1569 static int
1570 irdma_del_multiple_qhash(struct irdma_device *iwdev,
1571 			 struct irdma_cm_info *cm_info,
1572 			 struct irdma_cm_listener *cm_parent_listen_node)
1573 {
1574 	struct irdma_cm_listener *child_listen_node;
1575 	struct list_head *pos, *tpos;
1576 	unsigned long flags;
1577 	int ret = -EINVAL;
1578 
1579 	spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
1580 	list_for_each_safe(pos, tpos,
1581 			   &cm_parent_listen_node->child_listen_list) {
1582 		child_listen_node = list_entry(pos, struct irdma_cm_listener,
1583 					       child_listen_list);
1584 		if (child_listen_node->ipv4)
1585 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1586 				    "removing child listen for IP=%pI4, port=%d, vlan=%d\n",
1587 				    child_listen_node->loc_addr,
1588 				    child_listen_node->loc_port,
1589 				    child_listen_node->vlan_id);
1590 		else
1591 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1592 				    "removing child listen for IP=%pI6, port=%d, vlan=%d\n",
1593 				    child_listen_node->loc_addr,
1594 				    child_listen_node->loc_port,
1595 				    child_listen_node->vlan_id);
1596 		list_del(pos);
1597 		memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
1598 		       sizeof(cm_info->loc_addr));
1599 		cm_info->vlan_id = child_listen_node->vlan_id;
1600 		if (child_listen_node->qhash_set) {
1601 			ret = irdma_manage_qhash(iwdev, cm_info,
1602 						 IRDMA_QHASH_TYPE_TCP_SYN,
1603 						 IRDMA_QHASH_MANAGE_TYPE_DELETE,
1604 						 NULL, false);
1605 			child_listen_node->qhash_set = false;
1606 		} else {
1607 			ret = 0;
1608 		}
1609 		irdma_debug(&iwdev->rf->sc_dev,
1610 			    IRDMA_DEBUG_CM, "Child listen node freed = %p\n",
1611 			    child_listen_node);
1612 		kfree(child_listen_node);
1613 		cm_parent_listen_node->cm_core->stats_listen_nodes_destroyed++;
1614 	}
1615 	spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
1616 
1617 	return ret;
1618 }
1619 
1620 static u8 irdma_get_egress_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4){
1621 	return prio;
1622 }
1623 
1624 /**
1625  * irdma_netdev_vlan_ipv6 - Gets the netdev and mac
1626  * @addr: local IPv6 address
1627  * @vlan_id: vlan id for the given IPv6 address
1628  * @mac: mac address for the given IPv6 address
1629  *
1630  * Returns the net_device of the IPv6 address and also sets the
1631  * vlan id and mac for that address.
1632  */
1633 if_t
1634 irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac)
1635 {
1636 	if_t ip_dev = NULL;
1637 	struct in6_addr laddr6;
1638 	struct ifaddr *ifa;
1639 	u16 scope_id = 0;
1640 
1641 	irdma_copy_ip_htonl(laddr6.__u6_addr.__u6_addr32, addr);
1642 	if (vlan_id)
1643 		*vlan_id = 0xFFFF;	/* Match rdma_vlan_dev_vlan_id() */
1644 	if (mac)
1645 		eth_zero_addr(mac);
1646 
1647 	if (IN6_IS_SCOPE_LINKLOCAL(&laddr6) ||
1648 	    IN6_IS_ADDR_MC_INTFACELOCAL(&laddr6))
1649 		scope_id = ntohs(laddr6.__u6_addr.__u6_addr16[1]);
1650 
1651 	ip_dev = ip6_ifp_find(&init_net, laddr6, scope_id);
1652 	if (ip_dev) {
1653 		if (vlan_id)
1654 			*vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
1655 		ifa = if_getifaddr(ip_dev);
1656 		if (ifa && ifa->ifa_addr && mac)
1657 			ether_addr_copy(mac, if_getlladdr(ip_dev));
1658 	}
1659 
1660 	return ip_dev;
1661 }
1662 
1663 /**
1664  * irdma_get_vlan_ipv4 - Returns the vlan_id for IPv4 address
1665  * @addr: local IPv4 address
1666  */
1667 u16
1668 irdma_get_vlan_ipv4(u32 *addr)
1669 {
1670 	if_t netdev;
1671 	u16 vlan_id = 0xFFFF;
1672 
1673 	netdev = ip_ifp_find(&init_net, htonl(addr[0]));
1674 	if (netdev) {
1675 		vlan_id = rdma_vlan_dev_vlan_id(netdev);
1676 		dev_put(netdev);
1677 	}
1678 
1679 	return vlan_id;
1680 }
1681 
1682 struct irdma_add_mqh_cbs {
1683 	struct irdma_device *iwdev;
1684 	struct irdma_cm_info *cm_info;
1685 	struct irdma_cm_listener *cm_listen_node;
1686 };
1687 
1688 /**
1689  * irdma_add_mqh_ifa_cb - Adds multiple qhashes for IPV4/IPv6
1690  * @arg: Calback argument structure from irdma_add_mqh
1691  * @ifa: Current address to compute against
1692  * @count: Current cumulative output of all callbacks in this iteration
1693  *
1694  * Adds a qhash and a child listen node for a single IPv4/IPv6 address
1695  * on the adapter and adds the associated qhash filter
1696  */
1697 static u_int
1698 irdma_add_mqh_ifa_cb(void *arg, struct ifaddr *ifa, u_int count)
1699 {
1700 	struct irdma_add_mqh_cbs *cbs = arg;
1701 	struct irdma_cm_listener *child_listen_node;
1702 	struct irdma_cm_info *cm_info = cbs->cm_info;
1703 	struct irdma_device *iwdev = cbs->iwdev;
1704 	struct irdma_cm_listener *cm_parent_listen_node = cbs->cm_listen_node;
1705 	if_t ip_dev = ifa->ifa_ifp;
1706 	unsigned long flags;
1707 	int ret;
1708 
1709 	if (count)
1710 		return 0;
1711 
1712 	if (cm_info->ipv4)
1713 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1714 			    "Allocating child CM Listener forIP=%pI4, vlan_id=%d, MAC=%pM\n",
1715 			    &ifa->ifa_addr,
1716 			    rdma_vlan_dev_vlan_id(ip_dev), if_getlladdr(ip_dev));
1717 	else
1718 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1719 			    "IP=%pI6, vlan_id=%d, MAC=%pM\n",
1720 			    &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr,
1721 			    rdma_vlan_dev_vlan_id(ip_dev),
1722 			    if_getlladdr(ip_dev));
1723 	child_listen_node = kzalloc(sizeof(*child_listen_node), GFP_KERNEL);
1724 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1725 		    "Allocating child listener %p\n",
1726 		    child_listen_node);
1727 	if (!child_listen_node) {
1728 		irdma_debug(&iwdev->rf->sc_dev,
1729 			    IRDMA_DEBUG_CM,
1730 			    "listener memory allocation\n");
1731 		return -ENOMEM;
1732 	}
1733 
1734 	memcpy(child_listen_node, cm_parent_listen_node,
1735 	       sizeof(*child_listen_node));
1736 	cm_info->vlan_id = rdma_vlan_dev_vlan_id(ip_dev);
1737 	child_listen_node->vlan_id = cm_info->vlan_id;
1738 	if (cm_info->ipv4)
1739 		child_listen_node->loc_addr[0] =
1740 		    ntohl(((struct sockaddr_in *)ifa->ifa_addr)->sin_addr.s_addr);
1741 	else
1742 		irdma_copy_ip_ntohl(child_listen_node->loc_addr,
1743 				    ((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr.__u6_addr.__u6_addr32);
1744 	memcpy(cm_info->loc_addr, child_listen_node->loc_addr,
1745 	       sizeof(cm_info->loc_addr));
1746 	if (!iwdev->vsi.dscp_mode)
1747 		cm_info->user_pri =
1748 		    irdma_get_egress_vlan_prio(child_listen_node->loc_addr,
1749 					       cm_info->user_pri,
1750 					       false);
1751 	ret = irdma_manage_qhash(iwdev, cm_info,
1752 				 IRDMA_QHASH_TYPE_TCP_SYN,
1753 				 IRDMA_QHASH_MANAGE_TYPE_ADD,
1754 				 NULL, true);
1755 	if (ret) {
1756 		kfree(child_listen_node);
1757 		return ret;
1758 	}
1759 
1760 	child_listen_node->qhash_set = true;
1761 	spin_lock_irqsave(&iwdev->cm_core.listen_list_lock, flags);
1762 	list_add(&child_listen_node->child_listen_list,
1763 		 &cm_parent_listen_node->child_listen_list);
1764 	spin_unlock_irqrestore(&iwdev->cm_core.listen_list_lock, flags);
1765 	cm_parent_listen_node->cm_core->stats_listen_nodes_created++;
1766 
1767 	return 0;
1768 }
1769 
1770 /**
1771  * irdma_add_mqh - Adds multiple qhashes
1772  * @iwdev: iWarp device
1773  * @cm_info: CM info for parent listen node
1774  * @cm_listen_node: The parent listen node
1775  */
1776 static int
1777 irdma_add_mqh(struct irdma_device *iwdev,
1778 	      struct irdma_cm_info *cm_info,
1779 	      struct irdma_cm_listener *cm_listen_node)
1780 {
1781 	struct epoch_tracker et;
1782 	struct irdma_add_mqh_cbs cbs;
1783 	struct if_iter iter;
1784 	if_t ifp;
1785 	int err;
1786 
1787 	cbs.iwdev = iwdev;
1788 	cbs.cm_info = cm_info;
1789 	cbs.cm_listen_node = cm_listen_node;
1790 
1791 	VNET_ITERATOR_DECL(vnet_iter);
1792 
1793 	VNET_LIST_RLOCK();
1794 	NET_EPOCH_ENTER(et);
1795 	VNET_FOREACH(vnet_iter) {
1796 		CURVNET_SET_QUIET(vnet_iter);
1797 		for (ifp = if_iter_start(&iter); ifp != NULL; ifp = if_iter_next(&iter)) {
1798 			if (!(if_getflags(ifp) & IFF_UP))
1799 				continue;
1800 
1801 			if (((rdma_vlan_dev_vlan_id(ifp) >= VLAN_N_VID) ||
1802 			     (rdma_vlan_dev_real_dev(ifp) != iwdev->netdev)) &&
1803 			    ifp != iwdev->netdev)
1804 				continue;
1805 
1806 			if_addr_rlock(ifp);
1807 			if (cm_info->ipv4)
1808 				err = if_foreach_addr_type(ifp, AF_INET, irdma_add_mqh_ifa_cb, &cbs);
1809 			else
1810 				err = if_foreach_addr_type(ifp, AF_INET6, irdma_add_mqh_ifa_cb, &cbs);
1811 			if_addr_runlock(ifp);
1812 		}
1813 		if_iter_finish(&iter);
1814 		CURVNET_RESTORE();
1815 	}
1816 	NET_EPOCH_EXIT(et);
1817 	VNET_LIST_RUNLOCK();
1818 
1819 	return err;
1820 }
1821 
1822 /**
1823  * irdma_reset_list_prep - add connection nodes slated for reset to list
1824  * @cm_core: cm's core
1825  * @listener: pointer to listener node
1826  * @reset_list: a list to which cm_node will be selected
1827  */
1828 static void
1829 irdma_reset_list_prep(struct irdma_cm_core *cm_core,
1830 		      struct irdma_cm_listener *listener,
1831 		      struct list_head *reset_list)
1832 {
1833 	struct irdma_cm_node *cm_node;
1834 	int bkt;
1835 
1836 	HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) {
1837 		if (cm_node->listener == listener &&
1838 		    !cm_node->accelerated &&
1839 		    atomic_inc_not_zero(&cm_node->refcnt))
1840 			list_add(&cm_node->reset_entry, reset_list);
1841 	}
1842 }
1843 
1844 /**
1845  * irdma_dec_refcnt_listen - delete listener and associated cm nodes
1846  * @cm_core: cm's core
1847  * @listener: pointer to listener node
1848  * @free_hanging_nodes: to free associated cm_nodes
1849  * @apbvt_del: flag to delete the apbvt
1850  */
1851 static int
1852 irdma_dec_refcnt_listen(struct irdma_cm_core *cm_core,
1853 			struct irdma_cm_listener *listener,
1854 			int free_hanging_nodes, bool apbvt_del)
1855 {
1856 	struct list_head *list_pos;
1857 	struct list_head *list_temp;
1858 	struct irdma_cm_node *cm_node;
1859 	struct list_head reset_list;
1860 	struct irdma_cm_info nfo;
1861 	enum irdma_cm_node_state old_state;
1862 	unsigned long flags;
1863 	int err;
1864 
1865 	/* free non-accelerated child nodes for this listener */
1866 	INIT_LIST_HEAD(&reset_list);
1867 	if (free_hanging_nodes) {
1868 		rcu_read_lock();
1869 		irdma_reset_list_prep(cm_core, listener, &reset_list);
1870 		rcu_read_unlock();
1871 	}
1872 
1873 	list_for_each_safe(list_pos, list_temp, &reset_list) {
1874 		cm_node = container_of(list_pos, struct irdma_cm_node,
1875 				       reset_entry);
1876 		if (cm_node->state >= IRDMA_CM_STATE_FIN_WAIT1) {
1877 			irdma_rem_ref_cm_node(cm_node);
1878 			continue;
1879 		}
1880 
1881 		irdma_cleanup_retrans_entry(cm_node);
1882 		err = irdma_send_reset(cm_node);
1883 		if (err) {
1884 			cm_node->state = IRDMA_CM_STATE_CLOSED;
1885 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
1886 				    IRDMA_DEBUG_CM, "send reset failed\n");
1887 		} else {
1888 			old_state = cm_node->state;
1889 			cm_node->state = IRDMA_CM_STATE_LISTENER_DESTROYED;
1890 			if (old_state != IRDMA_CM_STATE_MPAREQ_RCVD)
1891 				irdma_rem_ref_cm_node(cm_node);
1892 		}
1893 	}
1894 
1895 	if (atomic_dec_and_test(&listener->refcnt)) {
1896 		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
1897 		list_del(&listener->list);
1898 		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1899 
1900 		if (apbvt_del)
1901 			irdma_del_apbvt(listener->iwdev,
1902 					listener->apbvt_entry);
1903 		memcpy(nfo.loc_addr, listener->loc_addr, sizeof(nfo.loc_addr));
1904 		nfo.loc_port = listener->loc_port;
1905 		nfo.ipv4 = listener->ipv4;
1906 		nfo.vlan_id = listener->vlan_id;
1907 		nfo.user_pri = listener->user_pri;
1908 		nfo.qh_qpid = listener->iwdev->vsi.ilq->qp_id;
1909 
1910 		if (!list_empty(&listener->child_listen_list)) {
1911 			irdma_del_multiple_qhash(listener->iwdev, &nfo,
1912 						 listener);
1913 		} else {
1914 			if (listener->qhash_set)
1915 				irdma_manage_qhash(listener->iwdev,
1916 						   &nfo,
1917 						   IRDMA_QHASH_TYPE_TCP_SYN,
1918 						   IRDMA_QHASH_MANAGE_TYPE_DELETE,
1919 						   NULL, false);
1920 		}
1921 
1922 		cm_core->stats_listen_destroyed++;
1923 		cm_core->stats_listen_nodes_destroyed++;
1924 		irdma_debug(&listener->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
1925 			    "loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d apbvt_del=%d\n",
1926 			    listener->loc_port, listener->loc_addr, listener,
1927 			    listener->cm_id, listener->qhash_set,
1928 			    listener->vlan_id, apbvt_del);
1929 		kfree(listener);
1930 		listener = NULL;
1931 		return 0;
1932 	}
1933 
1934 	return -EINVAL;
1935 }
1936 
1937 /**
1938  * irdma_cm_del_listen - delete a listener
1939  * @cm_core: cm's core
1940  * @listener: passive connection's listener
1941  * @apbvt_del: flag to delete apbvt
1942  */
1943 static int
1944 irdma_cm_del_listen(struct irdma_cm_core *cm_core,
1945 		    struct irdma_cm_listener *listener,
1946 		    bool apbvt_del)
1947 {
1948 	listener->listener_state = IRDMA_CM_LISTENER_PASSIVE_STATE;
1949 	listener->cm_id = NULL;
1950 
1951 	return irdma_dec_refcnt_listen(cm_core, listener, 1, apbvt_del);
1952 }
1953 
1954 /**
1955  * irdma_find_node - find a cm node that matches the reference cm node
1956  * @cm_core: cm's core
1957  * @rem_port: remote tcp port num
1958  * @rem_addr: remote ip addr
1959  * @loc_port: local tcp port num
1960  * @loc_addr: local ip addr
1961  * @vlan_id: local VLAN ID
1962  */
1963 struct irdma_cm_node *
1964 irdma_find_node(struct irdma_cm_core *cm_core,
1965 		u16 rem_port, u32 *rem_addr, u16 loc_port,
1966 		u32 *loc_addr, u16 vlan_id)
1967 {
1968 	struct irdma_cm_node *cm_node;
1969 	u32 key = (rem_port << 16) | loc_port;
1970 
1971 	rcu_read_lock();
1972 	HASH_FOR_EACH_POSSIBLE_RCU(cm_core->cm_hash_tbl, cm_node, list, key) {
1973 		if (cm_node->vlan_id == vlan_id &&
1974 		    cm_node->loc_port == loc_port && cm_node->rem_port == rem_port &&
1975 		    !memcmp(cm_node->loc_addr, loc_addr, sizeof(cm_node->loc_addr)) &&
1976 		    !memcmp(cm_node->rem_addr, rem_addr, sizeof(cm_node->rem_addr))) {
1977 			if (!atomic_inc_not_zero(&cm_node->refcnt))
1978 				goto exit;
1979 			rcu_read_unlock();
1980 			return cm_node;
1981 		}
1982 	}
1983 
1984 exit:
1985 	rcu_read_unlock();
1986 
1987 	/* no owner node */
1988 	return NULL;
1989 }
1990 
1991 /**
1992  * irdma_add_hte_node - add a cm node to the hash table
1993  * @cm_core: cm's core
1994  * @cm_node: connection's node
1995  */
1996 static void
1997 irdma_add_hte_node(struct irdma_cm_core *cm_core,
1998 		   struct irdma_cm_node *cm_node)
1999 {
2000 	unsigned long flags;
2001 	u32 key = (cm_node->rem_port << 16) | cm_node->loc_port;
2002 
2003 	spin_lock_irqsave(&cm_core->ht_lock, flags);
2004 	HASH_ADD_RCU(cm_core->cm_hash_tbl, &cm_node->list, key);
2005 	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
2006 }
2007 
2008 /**
2009  * irdma_ipv4_is_lpb - check if loopback
2010  * @loc_addr: local addr to compare
2011  * @rem_addr: remote address
2012  */
2013 bool
2014 irdma_ipv4_is_lpb(u32 loc_addr, u32 rem_addr)
2015 {
2016 	return ipv4_is_loopback(htonl(rem_addr)) || (loc_addr == rem_addr);
2017 }
2018 
2019 /**
2020  * irdma_ipv6_is_lpb - check if loopback
2021  * @loc_addr: local addr to compare
2022  * @rem_addr: remote address
2023  */
2024 bool
2025 irdma_ipv6_is_lpb(u32 *loc_addr, u32 *rem_addr)
2026 {
2027 	struct in6_addr raddr6;
2028 
2029 	irdma_copy_ip_htonl(raddr6.__u6_addr.__u6_addr32, rem_addr);
2030 
2031 	return !memcmp(loc_addr, rem_addr, 16) || ipv6_addr_loopback(&raddr6);
2032 }
2033 
2034 /**
2035  * irdma_cm_create_ah - create a cm address handle
2036  * @cm_node: The connection manager node to create AH for
2037  * @wait: Provides option to wait for ah creation or not
2038  */
2039 static int
2040 irdma_cm_create_ah(struct irdma_cm_node *cm_node, bool wait)
2041 {
2042 	struct irdma_ah_info ah_info = {0};
2043 	struct irdma_device *iwdev = cm_node->iwdev;
2044 #ifdef VIMAGE
2045 	struct rdma_cm_id *rdma_id = (struct rdma_cm_id *)cm_node->cm_id->context;
2046 	struct vnet *vnet = rdma_id->route.addr.dev_addr.net;
2047 #endif
2048 
2049 	ether_addr_copy(ah_info.mac_addr, if_getlladdr(iwdev->netdev));
2050 
2051 	ah_info.hop_ttl = 0x40;
2052 	ah_info.tc_tos = cm_node->tos;
2053 	ah_info.vsi = &iwdev->vsi;
2054 
2055 	if (cm_node->ipv4) {
2056 		ah_info.ipv4_valid = true;
2057 		ah_info.dest_ip_addr[0] = cm_node->rem_addr[0];
2058 		ah_info.src_ip_addr[0] = cm_node->loc_addr[0];
2059 		CURVNET_SET_QUIET(vnet);
2060 		ah_info.do_lpbk = irdma_ipv4_is_lpb(ah_info.src_ip_addr[0],
2061 						    ah_info.dest_ip_addr[0]);
2062 		CURVNET_RESTORE();
2063 	} else {
2064 		memcpy(ah_info.dest_ip_addr, cm_node->rem_addr,
2065 		       sizeof(ah_info.dest_ip_addr));
2066 		memcpy(ah_info.src_ip_addr, cm_node->loc_addr,
2067 		       sizeof(ah_info.src_ip_addr));
2068 		ah_info.do_lpbk = irdma_ipv6_is_lpb(ah_info.src_ip_addr,
2069 						    ah_info.dest_ip_addr);
2070 	}
2071 
2072 	ah_info.vlan_tag = cm_node->vlan_id;
2073 	if (cm_node->vlan_id < VLAN_N_VID) {
2074 		ah_info.insert_vlan_tag = 1;
2075 		ah_info.vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT;
2076 	}
2077 
2078 	ah_info.dst_arpindex =
2079 	    irdma_arp_table(iwdev->rf, ah_info.dest_ip_addr,
2080 			    NULL, IRDMA_ARP_RESOLVE);
2081 
2082 	if (irdma_puda_create_ah(&iwdev->rf->sc_dev, &ah_info, wait,
2083 				 IRDMA_PUDA_RSRC_TYPE_ILQ, cm_node,
2084 				 &cm_node->ah))
2085 		return -ENOMEM;
2086 
2087 	return 0;
2088 }
2089 
2090 /**
2091  * irdma_cm_free_ah - free a cm address handle
2092  * @cm_node: The connection manager node to create AH for
2093  */
2094 static void
2095 irdma_cm_free_ah(struct irdma_cm_node *cm_node)
2096 {
2097 	struct irdma_device *iwdev = cm_node->iwdev;
2098 
2099 	irdma_puda_free_ah(&iwdev->rf->sc_dev, cm_node->ah);
2100 	cm_node->ah = NULL;
2101 }
2102 
2103 /**
2104  * irdma_make_cm_node - create a new instance of a cm node
2105  * @cm_core: cm's core
2106  * @iwdev: iwarp device structure
2107  * @cm_info: quad info for connection
2108  * @listener: passive connection's listener
2109  */
2110 static struct irdma_cm_node *
2111 irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev,
2112 		   struct irdma_cm_info *cm_info,
2113 		   struct irdma_cm_listener *listener)
2114 {
2115 	struct irdma_cm_node *cm_node;
2116 	int arpindex;
2117 	if_t netdev = iwdev->netdev;
2118 
2119 	/* create an hte and cm_node for this instance */
2120 	cm_node = kzalloc(sizeof(*cm_node), GFP_ATOMIC);
2121 	if (!cm_node)
2122 		return NULL;
2123 
2124 	/* set our node specific transport info */
2125 	cm_node->ipv4 = cm_info->ipv4;
2126 	cm_node->vlan_id = cm_info->vlan_id;
2127 	if (cm_node->vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
2128 		cm_node->vlan_id = 0;
2129 	cm_node->tos = cm_info->tos;
2130 	cm_node->user_pri = cm_info->user_pri;
2131 	if (listener) {
2132 		if (listener->tos != cm_info->tos)
2133 			irdma_dev_warn(&iwdev->ibdev,
2134 				       "application TOS[%d] and remote client TOS[%d] mismatch\n",
2135 				       listener->tos, cm_info->tos);
2136 		if (iwdev->vsi.dscp_mode) {
2137 			cm_node->user_pri = listener->user_pri;
2138 		} else {
2139 			cm_node->tos = max(listener->tos, cm_info->tos);
2140 			cm_node->user_pri = rt_tos2priority(cm_node->tos);
2141 			cm_node->user_pri =
2142 			    irdma_get_egress_vlan_prio(cm_info->loc_addr,
2143 						       cm_node->user_pri,
2144 						       cm_info->ipv4);
2145 		}
2146 		irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB,
2147 			    "listener: TOS:[%d] UP:[%d]\n",
2148 			    cm_node->tos,
2149 			    cm_node->user_pri);
2150 	}
2151 	memcpy(cm_node->loc_addr, cm_info->loc_addr, sizeof(cm_node->loc_addr));
2152 	memcpy(cm_node->rem_addr, cm_info->rem_addr, sizeof(cm_node->rem_addr));
2153 	cm_node->loc_port = cm_info->loc_port;
2154 	cm_node->rem_port = cm_info->rem_port;
2155 
2156 	cm_node->mpa_frame_rev = IRDMA_CM_DEFAULT_MPA_VER;
2157 	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
2158 	cm_node->iwdev = iwdev;
2159 	cm_node->dev = &iwdev->rf->sc_dev;
2160 
2161 	cm_node->ird_size = cm_node->dev->hw_attrs.max_hw_ird;
2162 	cm_node->ord_size = cm_node->dev->hw_attrs.max_hw_ord;
2163 
2164 	cm_node->listener = listener;
2165 	cm_node->cm_id = cm_info->cm_id;
2166 	ether_addr_copy(cm_node->loc_mac, if_getlladdr(netdev));
2167 	spin_lock_init(&cm_node->retrans_list_lock);
2168 	cm_node->ack_rcvd = false;
2169 
2170 	init_completion(&cm_node->establish_comp);
2171 	atomic_set(&cm_node->refcnt, 1);
2172 	/* associate our parent CM core */
2173 	cm_node->cm_core = cm_core;
2174 	cm_node->tcp_cntxt.loc_id = IRDMA_CM_DEFAULT_LOCAL_ID;
2175 	cm_node->tcp_cntxt.rcv_wscale = iwdev->rcv_wscale;
2176 	cm_node->tcp_cntxt.rcv_wnd = iwdev->rcv_wnd >> cm_node->tcp_cntxt.rcv_wscale;
2177 	kc_set_loc_seq_num_mss(cm_node);
2178 
2179 	arpindex = irdma_resolve_neigh_lpb_chk(iwdev, cm_node, cm_info);
2180 	if (arpindex < 0)
2181 		goto err;
2182 
2183 	ether_addr_copy(cm_node->rem_mac, iwdev->rf->arp_table[arpindex].mac_addr);
2184 	irdma_add_hte_node(cm_core, cm_node);
2185 	cm_core->stats_nodes_created++;
2186 	return cm_node;
2187 
2188 err:
2189 	kfree(cm_node);
2190 
2191 	return NULL;
2192 }
2193 
2194 static void
2195 irdma_destroy_connection(struct irdma_cm_node *cm_node)
2196 {
2197 	struct irdma_cm_core *cm_core = cm_node->cm_core;
2198 	struct irdma_qp *iwqp;
2199 	struct irdma_cm_info nfo;
2200 
2201 	/* if the node is destroyed before connection was accelerated */
2202 	if (!cm_node->accelerated && cm_node->accept_pend) {
2203 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
2204 			    IRDMA_DEBUG_CM, "node destroyed before established\n");
2205 		atomic_dec(&cm_node->listener->pend_accepts_cnt);
2206 	}
2207 	if (cm_node->close_entry)
2208 		irdma_handle_close_entry(cm_node, 0);
2209 	if (cm_node->listener) {
2210 		irdma_dec_refcnt_listen(cm_core, cm_node->listener, 0, true);
2211 	} else {
2212 		if (cm_node->apbvt_set) {
2213 			irdma_del_apbvt(cm_node->iwdev, cm_node->apbvt_entry);
2214 			cm_node->apbvt_set = 0;
2215 		}
2216 		irdma_get_addr_info(cm_node, &nfo);
2217 		if (cm_node->qhash_set) {
2218 			nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
2219 			irdma_manage_qhash(cm_node->iwdev, &nfo,
2220 					   IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
2221 					   IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL,
2222 					   false);
2223 			cm_node->qhash_set = 0;
2224 		}
2225 	}
2226 
2227 	iwqp = cm_node->iwqp;
2228 	if (iwqp) {
2229 		cm_node->cm_id->rem_ref(cm_node->cm_id);
2230 		cm_node->cm_id = NULL;
2231 		iwqp->cm_id = NULL;
2232 		irdma_qp_rem_ref(&iwqp->ibqp);
2233 		cm_node->iwqp = NULL;
2234 	} else if (cm_node->qhash_set) {
2235 		irdma_get_addr_info(cm_node, &nfo);
2236 		nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
2237 		irdma_manage_qhash(cm_node->iwdev, &nfo,
2238 				   IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
2239 				   IRDMA_QHASH_MANAGE_TYPE_DELETE, NULL, false);
2240 		cm_node->qhash_set = 0;
2241 	}
2242 
2243 	cm_core->cm_free_ah(cm_node);
2244 }
2245 
2246 /**
2247  * irdma_rem_ref_cm_node - destroy an instance of a cm node
2248  * @cm_node: connection's node
2249  */
2250 void
2251 irdma_rem_ref_cm_node(struct irdma_cm_node *cm_node)
2252 {
2253 	struct irdma_cm_core *cm_core = cm_node->cm_core;
2254 	unsigned long flags;
2255 
2256 	spin_lock_irqsave(&cm_core->ht_lock, flags);
2257 
2258 	if (!atomic_dec_and_test(&cm_node->refcnt)) {
2259 		spin_unlock_irqrestore(&cm_core->ht_lock, flags);
2260 		return;
2261 	}
2262 	if (cm_node->iwqp) {
2263 		cm_node->iwqp->cm_node = NULL;
2264 		cm_node->iwqp->cm_id = NULL;
2265 	}
2266 	HASH_DEL_RCU(cm_core->cm_hash_tbl, &cm_node->list);
2267 	cm_node->cm_core->stats_nodes_destroyed++;
2268 
2269 	spin_unlock_irqrestore(&cm_core->ht_lock, flags);
2270 
2271 	irdma_destroy_connection(cm_node);
2272 
2273 	kfree_rcu(cm_node, rcu_head);
2274 }
2275 
2276 /**
2277  * irdma_handle_fin_pkt - FIN packet received
2278  * @cm_node: connection's node
2279  */
2280 static void
2281 irdma_handle_fin_pkt(struct irdma_cm_node *cm_node)
2282 {
2283 	switch (cm_node->state) {
2284 	case IRDMA_CM_STATE_SYN_RCVD:
2285 	case IRDMA_CM_STATE_SYN_SENT:
2286 	case IRDMA_CM_STATE_ESTABLISHED:
2287 	case IRDMA_CM_STATE_MPAREJ_RCVD:
2288 		cm_node->tcp_cntxt.rcv_nxt++;
2289 		irdma_cleanup_retrans_entry(cm_node);
2290 		cm_node->state = IRDMA_CM_STATE_LAST_ACK;
2291 		irdma_send_fin(cm_node);
2292 		break;
2293 	case IRDMA_CM_STATE_MPAREQ_SENT:
2294 		irdma_create_event(cm_node, IRDMA_CM_EVENT_ABORTED);
2295 		cm_node->tcp_cntxt.rcv_nxt++;
2296 		irdma_cleanup_retrans_entry(cm_node);
2297 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2298 		atomic_inc(&cm_node->refcnt);
2299 		irdma_send_reset(cm_node);
2300 		break;
2301 	case IRDMA_CM_STATE_FIN_WAIT1:
2302 		cm_node->tcp_cntxt.rcv_nxt++;
2303 		irdma_cleanup_retrans_entry(cm_node);
2304 		cm_node->state = IRDMA_CM_STATE_CLOSING;
2305 		irdma_send_ack(cm_node);
2306 		/*
2307 		 * Wait for ACK as this is simultaneous close. After we receive ACK, do not send anything. Just rm the
2308 		 * node.
2309 		 */
2310 		break;
2311 	case IRDMA_CM_STATE_FIN_WAIT2:
2312 		cm_node->tcp_cntxt.rcv_nxt++;
2313 		irdma_cleanup_retrans_entry(cm_node);
2314 		cm_node->state = IRDMA_CM_STATE_TIME_WAIT;
2315 		irdma_send_ack(cm_node);
2316 		irdma_schedule_cm_timer(cm_node, NULL, IRDMA_TIMER_TYPE_CLOSE,
2317 					1, 0);
2318 		break;
2319 	case IRDMA_CM_STATE_TIME_WAIT:
2320 		cm_node->tcp_cntxt.rcv_nxt++;
2321 		irdma_cleanup_retrans_entry(cm_node);
2322 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2323 		irdma_rem_ref_cm_node(cm_node);
2324 		break;
2325 	case IRDMA_CM_STATE_OFFLOADED:
2326 	default:
2327 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
2328 			    "bad state node state = %d\n",
2329 			    cm_node->state);
2330 		break;
2331 	}
2332 }
2333 
2334 /**
2335  * irdma_handle_rst_pkt - process received RST packet
2336  * @cm_node: connection's node
2337  * @rbuf: receive buffer
2338  */
2339 static void
2340 irdma_handle_rst_pkt(struct irdma_cm_node *cm_node,
2341 		     struct irdma_puda_buf *rbuf)
2342 {
2343 	irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
2344 		    "caller: %pS cm_node=%p state=%d rem_port=0x%04x loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4\n",
2345 		    __builtin_return_address(0), cm_node, cm_node->state,
2346 		    cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
2347 		    cm_node->loc_addr);
2348 
2349 	irdma_cleanup_retrans_entry(cm_node);
2350 	switch (cm_node->state) {
2351 	case IRDMA_CM_STATE_SYN_SENT:
2352 	case IRDMA_CM_STATE_MPAREQ_SENT:
2353 		switch (cm_node->mpa_frame_rev) {
2354 		case IETF_MPA_V2:
2355 			/* Drop down to MPA_V1 */
2356 			cm_node->mpa_frame_rev = IETF_MPA_V1;
2357 			/* send a syn and goto syn sent state */
2358 			cm_node->state = IRDMA_CM_STATE_SYN_SENT;
2359 			if (irdma_send_syn(cm_node, 0))
2360 				irdma_active_open_err(cm_node, false);
2361 			break;
2362 		case IETF_MPA_V1:
2363 		default:
2364 			irdma_active_open_err(cm_node, false);
2365 			break;
2366 		}
2367 		break;
2368 	case IRDMA_CM_STATE_MPAREQ_RCVD:
2369 		atomic_inc(&cm_node->passive_state);
2370 		break;
2371 	case IRDMA_CM_STATE_ESTABLISHED:
2372 	case IRDMA_CM_STATE_SYN_RCVD:
2373 	case IRDMA_CM_STATE_LISTENING:
2374 		irdma_passive_open_err(cm_node, false);
2375 		break;
2376 	case IRDMA_CM_STATE_OFFLOADED:
2377 		irdma_active_open_err(cm_node, false);
2378 		break;
2379 	case IRDMA_CM_STATE_CLOSED:
2380 		break;
2381 	case IRDMA_CM_STATE_FIN_WAIT2:
2382 	case IRDMA_CM_STATE_FIN_WAIT1:
2383 	case IRDMA_CM_STATE_LAST_ACK:
2384 	case IRDMA_CM_STATE_TIME_WAIT:
2385 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2386 		irdma_rem_ref_cm_node(cm_node);
2387 		break;
2388 	default:
2389 		break;
2390 	}
2391 }
2392 
2393 /**
2394  * irdma_handle_rcv_mpa - Process a recv'd mpa buffer
2395  * @cm_node: connection's node
2396  * @rbuf: receive buffer
2397  */
2398 static void
2399 irdma_handle_rcv_mpa(struct irdma_cm_node *cm_node,
2400 		     struct irdma_puda_buf *rbuf)
2401 {
2402 	int err;
2403 	int datasize = rbuf->datalen;
2404 	u8 *dataloc = rbuf->data;
2405 
2406 	enum irdma_cm_event_type type = IRDMA_CM_EVENT_UNKNOWN;
2407 	u32 res_type;
2408 
2409 	err = irdma_parse_mpa(cm_node, dataloc, &res_type, datasize);
2410 	if (err) {
2411 		if (cm_node->state == IRDMA_CM_STATE_MPAREQ_SENT)
2412 			irdma_active_open_err(cm_node, true);
2413 		else
2414 			irdma_passive_open_err(cm_node, true);
2415 		return;
2416 	}
2417 
2418 	switch (cm_node->state) {
2419 	case IRDMA_CM_STATE_ESTABLISHED:
2420 		if (res_type == IRDMA_MPA_REQUEST_REJECT)
2421 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
2422 				    IRDMA_DEBUG_CM, "state for reject\n");
2423 		cm_node->state = IRDMA_CM_STATE_MPAREQ_RCVD;
2424 		type = IRDMA_CM_EVENT_MPA_REQ;
2425 		irdma_send_ack(cm_node);	/* ACK received MPA request */
2426 		atomic_set(&cm_node->passive_state,
2427 			   IRDMA_PASSIVE_STATE_INDICATED);
2428 		break;
2429 	case IRDMA_CM_STATE_MPAREQ_SENT:
2430 		irdma_cleanup_retrans_entry(cm_node);
2431 		if (res_type == IRDMA_MPA_REQUEST_REJECT) {
2432 			type = IRDMA_CM_EVENT_MPA_REJECT;
2433 			cm_node->state = IRDMA_CM_STATE_MPAREJ_RCVD;
2434 		} else {
2435 			type = IRDMA_CM_EVENT_CONNECTED;
2436 			cm_node->state = IRDMA_CM_STATE_OFFLOADED;
2437 		}
2438 		irdma_send_ack(cm_node);
2439 		break;
2440 	default:
2441 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
2442 			    "wrong cm_node state =%d\n",
2443 			    cm_node->state);
2444 		break;
2445 	}
2446 	irdma_create_event(cm_node, type);
2447 }
2448 
2449 /**
2450  * irdma_check_syn - Check for error on received syn ack
2451  * @cm_node: connection's node
2452  * @tcph: pointer tcp header
2453  */
2454 static int
2455 irdma_check_syn(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
2456 {
2457 	if (ntohl(tcph->th_ack) != cm_node->tcp_cntxt.loc_seq_num) {
2458 		irdma_active_open_err(cm_node, true);
2459 		return 1;
2460 	}
2461 
2462 	return 0;
2463 }
2464 
2465 /**
2466  * irdma_check_seq - check seq numbers if OK
2467  * @cm_node: connection's node
2468  * @tcph: pointer tcp header
2469  */
2470 static int
2471 irdma_check_seq(struct irdma_cm_node *cm_node, struct tcphdr *tcph)
2472 {
2473 	u32 seq;
2474 	u32 ack_seq;
2475 	u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
2476 	u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
2477 	u32 rcv_wnd;
2478 	int err = 0;
2479 
2480 	seq = ntohl(tcph->th_seq);
2481 	ack_seq = ntohl(tcph->th_ack);
2482 	rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
2483 	if (ack_seq != loc_seq_num ||
2484 	    !between(seq, rcv_nxt, (rcv_nxt + rcv_wnd)))
2485 		err = -1;
2486 	if (err)
2487 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
2488 			    IRDMA_DEBUG_CM, "seq number err\n");
2489 
2490 	return err;
2491 }
2492 
2493 void
2494 irdma_add_conn_est_qh(struct irdma_cm_node *cm_node)
2495 {
2496 	struct irdma_cm_info nfo;
2497 
2498 	irdma_get_addr_info(cm_node, &nfo);
2499 	nfo.qh_qpid = cm_node->iwdev->vsi.ilq->qp_id;
2500 	irdma_manage_qhash(cm_node->iwdev, &nfo,
2501 			   IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
2502 			   IRDMA_QHASH_MANAGE_TYPE_ADD,
2503 			   cm_node, false);
2504 	cm_node->qhash_set = true;
2505 }
2506 
2507 /**
2508  * irdma_handle_syn_pkt - is for Passive node
2509  * @cm_node: connection's node
2510  * @rbuf: receive buffer
2511  */
2512 static void
2513 irdma_handle_syn_pkt(struct irdma_cm_node *cm_node,
2514 		     struct irdma_puda_buf *rbuf)
2515 {
2516 	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
2517 	int err;
2518 	u32 inc_sequence;
2519 	int optionsize;
2520 
2521 	optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr);
2522 	inc_sequence = ntohl(tcph->th_seq);
2523 
2524 	switch (cm_node->state) {
2525 	case IRDMA_CM_STATE_SYN_SENT:
2526 	case IRDMA_CM_STATE_MPAREQ_SENT:
2527 		/* Rcvd syn on active open connection */
2528 		irdma_active_open_err(cm_node, 1);
2529 		break;
2530 	case IRDMA_CM_STATE_LISTENING:
2531 		/* Passive OPEN */
2532 		if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
2533 		    cm_node->listener->backlog) {
2534 			cm_node->cm_core->stats_backlog_drops++;
2535 			irdma_passive_open_err(cm_node, false);
2536 			break;
2537 		}
2538 		err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
2539 		if (err) {
2540 			irdma_passive_open_err(cm_node, false);
2541 			/* drop pkt */
2542 			break;
2543 		}
2544 		err = cm_node->cm_core->cm_create_ah(cm_node, false);
2545 		if (err) {
2546 			irdma_passive_open_err(cm_node, false);
2547 			/* drop pkt */
2548 			break;
2549 		}
2550 		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
2551 		cm_node->accept_pend = 1;
2552 		atomic_inc(&cm_node->listener->pend_accepts_cnt);
2553 
2554 		cm_node->state = IRDMA_CM_STATE_SYN_RCVD;
2555 		break;
2556 	case IRDMA_CM_STATE_CLOSED:
2557 		irdma_cleanup_retrans_entry(cm_node);
2558 		atomic_inc(&cm_node->refcnt);
2559 		irdma_send_reset(cm_node);
2560 		break;
2561 	case IRDMA_CM_STATE_OFFLOADED:
2562 	case IRDMA_CM_STATE_ESTABLISHED:
2563 	case IRDMA_CM_STATE_FIN_WAIT1:
2564 	case IRDMA_CM_STATE_FIN_WAIT2:
2565 	case IRDMA_CM_STATE_MPAREQ_RCVD:
2566 	case IRDMA_CM_STATE_LAST_ACK:
2567 	case IRDMA_CM_STATE_CLOSING:
2568 	case IRDMA_CM_STATE_UNKNOWN:
2569 	default:
2570 		break;
2571 	}
2572 }
2573 
2574 /**
2575  * irdma_handle_synack_pkt - Process SYN+ACK packet (active side)
2576  * @cm_node: connection's node
2577  * @rbuf: receive buffer
2578  */
2579 static void
2580 irdma_handle_synack_pkt(struct irdma_cm_node *cm_node,
2581 			struct irdma_puda_buf *rbuf)
2582 {
2583 	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
2584 	int err;
2585 	u32 inc_sequence;
2586 	int optionsize;
2587 
2588 	optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr);
2589 	inc_sequence = ntohl(tcph->th_seq);
2590 	switch (cm_node->state) {
2591 	case IRDMA_CM_STATE_SYN_SENT:
2592 		irdma_cleanup_retrans_entry(cm_node);
2593 		/* active open */
2594 		if (irdma_check_syn(cm_node, tcph)) {
2595 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
2596 				    IRDMA_DEBUG_CM, "check syn fail\n");
2597 			return;
2598 		}
2599 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack);
2600 		/* setup options */
2601 		err = irdma_handle_tcp_options(cm_node, tcph, optionsize, 0);
2602 		if (err) {
2603 			irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
2604 				    "cm_node=%p tcp_options failed\n",
2605 				    cm_node);
2606 			break;
2607 		}
2608 		irdma_cleanup_retrans_entry(cm_node);
2609 		cm_node->tcp_cntxt.rcv_nxt = inc_sequence + 1;
2610 		irdma_send_ack(cm_node);	/* ACK  for the syn_ack */
2611 		err = irdma_send_mpa_request(cm_node);
2612 		if (err) {
2613 			irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
2614 				    "cm_node=%p irdma_send_mpa_request failed\n",
2615 				    cm_node);
2616 			break;
2617 		}
2618 		cm_node->state = IRDMA_CM_STATE_MPAREQ_SENT;
2619 		break;
2620 	case IRDMA_CM_STATE_MPAREQ_RCVD:
2621 		irdma_passive_open_err(cm_node, true);
2622 		break;
2623 	case IRDMA_CM_STATE_LISTENING:
2624 		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack);
2625 		irdma_cleanup_retrans_entry(cm_node);
2626 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2627 		irdma_send_reset(cm_node);
2628 		break;
2629 	case IRDMA_CM_STATE_CLOSED:
2630 		cm_node->tcp_cntxt.loc_seq_num = ntohl(tcph->th_ack);
2631 		irdma_cleanup_retrans_entry(cm_node);
2632 		atomic_inc(&cm_node->refcnt);
2633 		irdma_send_reset(cm_node);
2634 		break;
2635 	case IRDMA_CM_STATE_ESTABLISHED:
2636 	case IRDMA_CM_STATE_FIN_WAIT1:
2637 	case IRDMA_CM_STATE_FIN_WAIT2:
2638 	case IRDMA_CM_STATE_LAST_ACK:
2639 	case IRDMA_CM_STATE_OFFLOADED:
2640 	case IRDMA_CM_STATE_CLOSING:
2641 	case IRDMA_CM_STATE_UNKNOWN:
2642 	case IRDMA_CM_STATE_MPAREQ_SENT:
2643 	default:
2644 		break;
2645 	}
2646 }
2647 
2648 /**
2649  * irdma_handle_ack_pkt - process packet with ACK
2650  * @cm_node: connection's node
2651  * @rbuf: receive buffer
2652  */
2653 static int
2654 irdma_handle_ack_pkt(struct irdma_cm_node *cm_node,
2655 		     struct irdma_puda_buf *rbuf)
2656 {
2657 	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
2658 	u32 inc_sequence;
2659 	int ret;
2660 	int optionsize;
2661 	u32 datasize = rbuf->datalen;
2662 
2663 	optionsize = (tcph->th_off << 2) - sizeof(struct tcphdr);
2664 
2665 	if (irdma_check_seq(cm_node, tcph))
2666 		return -EINVAL;
2667 
2668 	inc_sequence = ntohl(tcph->th_seq);
2669 	switch (cm_node->state) {
2670 	case IRDMA_CM_STATE_SYN_RCVD:
2671 		irdma_cleanup_retrans_entry(cm_node);
2672 		ret = irdma_handle_tcp_options(cm_node, tcph, optionsize, 1);
2673 		if (ret)
2674 			return ret;
2675 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack);
2676 		cm_node->state = IRDMA_CM_STATE_ESTABLISHED;
2677 		if (datasize) {
2678 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
2679 			irdma_handle_rcv_mpa(cm_node, rbuf);
2680 		}
2681 		break;
2682 	case IRDMA_CM_STATE_ESTABLISHED:
2683 		irdma_cleanup_retrans_entry(cm_node);
2684 		if (datasize) {
2685 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
2686 			irdma_handle_rcv_mpa(cm_node, rbuf);
2687 		}
2688 		break;
2689 	case IRDMA_CM_STATE_MPAREQ_SENT:
2690 		cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->th_ack);
2691 		if (datasize) {
2692 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
2693 			cm_node->ack_rcvd = false;
2694 			irdma_handle_rcv_mpa(cm_node, rbuf);
2695 		} else {
2696 			cm_node->ack_rcvd = true;
2697 		}
2698 		break;
2699 	case IRDMA_CM_STATE_LISTENING:
2700 		irdma_cleanup_retrans_entry(cm_node);
2701 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2702 		irdma_send_reset(cm_node);
2703 		break;
2704 	case IRDMA_CM_STATE_CLOSED:
2705 		irdma_cleanup_retrans_entry(cm_node);
2706 		atomic_inc(&cm_node->refcnt);
2707 		irdma_send_reset(cm_node);
2708 		break;
2709 	case IRDMA_CM_STATE_LAST_ACK:
2710 	case IRDMA_CM_STATE_CLOSING:
2711 		irdma_cleanup_retrans_entry(cm_node);
2712 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2713 		irdma_rem_ref_cm_node(cm_node);
2714 		break;
2715 	case IRDMA_CM_STATE_FIN_WAIT1:
2716 		irdma_cleanup_retrans_entry(cm_node);
2717 		cm_node->state = IRDMA_CM_STATE_FIN_WAIT2;
2718 		break;
2719 	case IRDMA_CM_STATE_SYN_SENT:
2720 	case IRDMA_CM_STATE_FIN_WAIT2:
2721 	case IRDMA_CM_STATE_OFFLOADED:
2722 	case IRDMA_CM_STATE_MPAREQ_RCVD:
2723 	case IRDMA_CM_STATE_UNKNOWN:
2724 	default:
2725 		irdma_cleanup_retrans_entry(cm_node);
2726 		break;
2727 	}
2728 
2729 	return 0;
2730 }
2731 
2732 /**
2733  * irdma_process_pkt - process cm packet
2734  * @cm_node: connection's node
2735  * @rbuf: receive buffer
2736  */
2737 static void
2738 irdma_process_pkt(struct irdma_cm_node *cm_node,
2739 		  struct irdma_puda_buf *rbuf)
2740 {
2741 	enum irdma_tcpip_pkt_type pkt_type = IRDMA_PKT_TYPE_UNKNOWN;
2742 	struct tcphdr *tcph = (struct tcphdr *)rbuf->tcph;
2743 	u32 fin_set = 0;
2744 	int err;
2745 
2746 	if (tcph->th_flags & TH_RST) {
2747 		pkt_type = IRDMA_PKT_TYPE_RST;
2748 	} else if (tcph->th_flags & TH_SYN) {
2749 		pkt_type = IRDMA_PKT_TYPE_SYN;
2750 		if (tcph->th_flags & TH_ACK)
2751 			pkt_type = IRDMA_PKT_TYPE_SYNACK;
2752 	} else if (tcph->th_flags & TH_ACK) {
2753 		pkt_type = IRDMA_PKT_TYPE_ACK;
2754 	}
2755 	if (tcph->th_flags & TH_FIN)
2756 		fin_set = 1;
2757 
2758 	switch (pkt_type) {
2759 	case IRDMA_PKT_TYPE_SYN:
2760 		irdma_handle_syn_pkt(cm_node, rbuf);
2761 		break;
2762 	case IRDMA_PKT_TYPE_SYNACK:
2763 		irdma_handle_synack_pkt(cm_node, rbuf);
2764 		break;
2765 	case IRDMA_PKT_TYPE_ACK:
2766 		err = irdma_handle_ack_pkt(cm_node, rbuf);
2767 		if (fin_set && !err)
2768 			irdma_handle_fin_pkt(cm_node);
2769 		break;
2770 	case IRDMA_PKT_TYPE_RST:
2771 		irdma_handle_rst_pkt(cm_node, rbuf);
2772 		break;
2773 	default:
2774 		if (fin_set &&
2775 		    (!irdma_check_seq(cm_node, (struct tcphdr *)rbuf->tcph)))
2776 			irdma_handle_fin_pkt(cm_node);
2777 		break;
2778 	}
2779 }
2780 
2781 /**
2782  * irdma_make_listen_node - create a listen node with params
2783  * @cm_core: cm's core
2784  * @iwdev: iwarp device structure
2785  * @cm_info: quad info for connection
2786  */
2787 static struct irdma_cm_listener *
2788 irdma_make_listen_node(struct irdma_cm_core *cm_core,
2789 		       struct irdma_device *iwdev,
2790 		       struct irdma_cm_info *cm_info)
2791 {
2792 	struct irdma_cm_listener *listener;
2793 	unsigned long flags;
2794 
2795 	/* cannot have multiple matching listeners */
2796 	listener = irdma_find_listener(cm_core, cm_info->loc_addr, cm_info->ipv4,
2797 				       cm_info->loc_port, cm_info->vlan_id,
2798 				       IRDMA_CM_LISTENER_EITHER_STATE);
2799 	if (listener &&
2800 	    listener->listener_state == IRDMA_CM_LISTENER_ACTIVE_STATE) {
2801 		atomic_dec(&listener->refcnt);
2802 		return NULL;
2803 	}
2804 
2805 	if (!listener) {
2806 		/*
2807 		 * create a CM listen node 1/2 node to compare incoming traffic to
2808 		 */
2809 		listener = kzalloc(sizeof(*listener), GFP_KERNEL);
2810 		if (!listener)
2811 			return NULL;
2812 		cm_core->stats_listen_nodes_created++;
2813 		memcpy(listener->loc_addr, cm_info->loc_addr,
2814 		       sizeof(listener->loc_addr));
2815 		listener->loc_port = cm_info->loc_port;
2816 
2817 		INIT_LIST_HEAD(&listener->child_listen_list);
2818 
2819 		atomic_set(&listener->refcnt, 1);
2820 	} else {
2821 		listener->reused_node = 1;
2822 	}
2823 
2824 	listener->cm_id = cm_info->cm_id;
2825 	listener->ipv4 = cm_info->ipv4;
2826 	listener->vlan_id = cm_info->vlan_id;
2827 	atomic_set(&listener->pend_accepts_cnt, 0);
2828 	listener->cm_core = cm_core;
2829 	listener->iwdev = iwdev;
2830 
2831 	listener->backlog = cm_info->backlog;
2832 	listener->listener_state = IRDMA_CM_LISTENER_ACTIVE_STATE;
2833 
2834 	if (!listener->reused_node) {
2835 		spin_lock_irqsave(&cm_core->listen_list_lock, flags);
2836 		list_add(&listener->list, &cm_core->listen_list);
2837 		spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
2838 	}
2839 
2840 	return listener;
2841 }
2842 
2843 /**
2844  * irdma_create_cm_node - make a connection node with params
2845  * @cm_core: cm's core
2846  * @iwdev: iwarp device structure
2847  * @conn_param: connection parameters
2848  * @cm_info: quad info for connection
2849  * @caller_cm_node: pointer to cm_node structure to return
2850  */
2851 static int
2852 irdma_create_cm_node(struct irdma_cm_core *cm_core,
2853 		     struct irdma_device *iwdev,
2854 		     struct iw_cm_conn_param *conn_param,
2855 		     struct irdma_cm_info *cm_info,
2856 		     struct irdma_cm_node **caller_cm_node)
2857 {
2858 	struct irdma_cm_node *cm_node;
2859 	u16 private_data_len = conn_param->private_data_len;
2860 	const void *private_data = conn_param->private_data;
2861 
2862 	/* create a CM connection node */
2863 	cm_node = irdma_make_cm_node(cm_core, iwdev, cm_info, NULL);
2864 	if (!cm_node)
2865 		return -ENOMEM;
2866 
2867 	/* set our node side to client (active) side */
2868 	cm_node->tcp_cntxt.client = 1;
2869 	cm_node->tcp_cntxt.rcv_wscale = IRDMA_CM_DEFAULT_RCV_WND_SCALE;
2870 
2871 	irdma_record_ird_ord(cm_node, conn_param->ird, conn_param->ord);
2872 
2873 	cm_node->pdata.size = private_data_len;
2874 	cm_node->pdata.addr = cm_node->pdata_buf;
2875 
2876 	memcpy(cm_node->pdata_buf, private_data, private_data_len);
2877 	*caller_cm_node = cm_node;
2878 
2879 	return 0;
2880 }
2881 
2882 /**
2883  * irdma_cm_reject - reject and teardown a connection
2884  * @cm_node: connection's node
2885  * @pdata: ptr to private data for reject
2886  * @plen: size of private data
2887  */
2888 static int
2889 irdma_cm_reject(struct irdma_cm_node *cm_node, const void *pdata,
2890 		u8 plen)
2891 {
2892 	int ret;
2893 	int passive_state;
2894 
2895 	if (cm_node->tcp_cntxt.client)
2896 		return 0;
2897 
2898 	irdma_cleanup_retrans_entry(cm_node);
2899 
2900 	passive_state = atomic_add_return(1, &cm_node->passive_state);
2901 	if (passive_state == IRDMA_SEND_RESET_EVENT) {
2902 		cm_node->state = IRDMA_CM_STATE_CLOSED;
2903 		irdma_rem_ref_cm_node(cm_node);
2904 		return 0;
2905 	}
2906 
2907 	if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
2908 		irdma_rem_ref_cm_node(cm_node);
2909 		return 0;
2910 	}
2911 
2912 	ret = irdma_send_mpa_reject(cm_node, pdata, plen);
2913 	if (!ret)
2914 		return 0;
2915 
2916 	cm_node->state = IRDMA_CM_STATE_CLOSED;
2917 	if (irdma_send_reset(cm_node))
2918 		irdma_debug(&cm_node->iwdev->rf->sc_dev,
2919 			    IRDMA_DEBUG_CM, "send reset failed\n");
2920 
2921 	return ret;
2922 }
2923 
2924 /**
2925  * irdma_cm_close - close of cm connection
2926  * @cm_node: connection's node
2927  */
2928 static int
2929 irdma_cm_close(struct irdma_cm_node *cm_node)
2930 {
2931 	switch (cm_node->state) {
2932 	case IRDMA_CM_STATE_SYN_RCVD:
2933 	case IRDMA_CM_STATE_SYN_SENT:
2934 	case IRDMA_CM_STATE_ONE_SIDE_ESTABLISHED:
2935 	case IRDMA_CM_STATE_ESTABLISHED:
2936 	case IRDMA_CM_STATE_ACCEPTING:
2937 	case IRDMA_CM_STATE_MPAREQ_SENT:
2938 	case IRDMA_CM_STATE_MPAREQ_RCVD:
2939 		irdma_cleanup_retrans_entry(cm_node);
2940 		irdma_send_reset(cm_node);
2941 		break;
2942 	case IRDMA_CM_STATE_CLOSE_WAIT:
2943 		cm_node->state = IRDMA_CM_STATE_LAST_ACK;
2944 		irdma_send_fin(cm_node);
2945 		break;
2946 	case IRDMA_CM_STATE_FIN_WAIT1:
2947 	case IRDMA_CM_STATE_FIN_WAIT2:
2948 	case IRDMA_CM_STATE_LAST_ACK:
2949 	case IRDMA_CM_STATE_TIME_WAIT:
2950 	case IRDMA_CM_STATE_CLOSING:
2951 		return -EINVAL;
2952 	case IRDMA_CM_STATE_LISTENING:
2953 		irdma_cleanup_retrans_entry(cm_node);
2954 		irdma_send_reset(cm_node);
2955 		break;
2956 	case IRDMA_CM_STATE_MPAREJ_RCVD:
2957 	case IRDMA_CM_STATE_UNKNOWN:
2958 	case IRDMA_CM_STATE_INITED:
2959 	case IRDMA_CM_STATE_CLOSED:
2960 	case IRDMA_CM_STATE_LISTENER_DESTROYED:
2961 		irdma_rem_ref_cm_node(cm_node);
2962 		break;
2963 	case IRDMA_CM_STATE_OFFLOADED:
2964 		if (cm_node->send_entry)
2965 			irdma_debug(&cm_node->iwdev->rf->sc_dev,
2966 				    IRDMA_DEBUG_CM, "CM send_entry in OFFLOADED state\n");
2967 		irdma_rem_ref_cm_node(cm_node);
2968 		break;
2969 	}
2970 
2971 	return 0;
2972 }
2973 
2974 /**
2975  * irdma_receive_ilq - recv an ETHERNET packet, and process it
2976  * through CM
2977  * @vsi: VSI structure of dev
2978  * @rbuf: receive buffer
2979  */
2980 void
2981 irdma_receive_ilq(struct irdma_sc_vsi *vsi, struct irdma_puda_buf *rbuf)
2982 {
2983 	struct irdma_cm_node *cm_node;
2984 	struct irdma_cm_listener *listener;
2985 	struct ip *iph;
2986 	struct ip6_hdr *ip6h;
2987 	struct tcphdr *tcph;
2988 	struct irdma_cm_info cm_info = {0};
2989 	struct irdma_device *iwdev = vsi->back_vsi;
2990 	struct irdma_cm_core *cm_core = &iwdev->cm_core;
2991 	struct ether_vlan_header *ethh;
2992 	u16 vtag;
2993 
2994 	/* if vlan, then maclen = 18 else 14 */
2995 	iph = (struct ip *)rbuf->iph;
2996 	irdma_debug_buf(vsi->dev, IRDMA_DEBUG_ILQ, "RECEIVE ILQ BUFFER",
2997 			rbuf->mem.va, rbuf->totallen);
2998 	if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2) {
2999 		if (rbuf->vlan_valid) {
3000 			vtag = rbuf->vlan_id;
3001 			cm_info.user_pri = (vtag & EVL_PRI_MASK) >>
3002 			    VLAN_PRIO_SHIFT;
3003 			cm_info.vlan_id = vtag & EVL_VLID_MASK;
3004 		} else {
3005 			cm_info.vlan_id = 0xFFFF;
3006 		}
3007 	} else {
3008 		ethh = rbuf->mem.va;
3009 
3010 		if (ethh->evl_proto == htons(ETH_P_8021Q)) {
3011 			vtag = ntohs(ethh->evl_tag);
3012 			cm_info.user_pri = (vtag & EVL_PRI_MASK) >>
3013 			    VLAN_PRIO_SHIFT;
3014 			cm_info.vlan_id = vtag & EVL_VLID_MASK;
3015 			irdma_debug(&cm_core->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3016 				    "vlan_id=%d\n",
3017 				    cm_info.vlan_id);
3018 		} else {
3019 			cm_info.vlan_id = 0xFFFF;
3020 		}
3021 	}
3022 	tcph = (struct tcphdr *)rbuf->tcph;
3023 
3024 	if (rbuf->ipv4) {
3025 		cm_info.loc_addr[0] = ntohl(iph->ip_dst.s_addr);
3026 		cm_info.rem_addr[0] = ntohl(iph->ip_src.s_addr);
3027 		cm_info.ipv4 = true;
3028 		cm_info.tos = iph->ip_tos;
3029 	} else {
3030 		ip6h = (struct ip6_hdr *)rbuf->iph;
3031 		irdma_copy_ip_ntohl(cm_info.loc_addr,
3032 				    ip6h->ip6_dst.__u6_addr.__u6_addr32);
3033 		irdma_copy_ip_ntohl(cm_info.rem_addr,
3034 				    ip6h->ip6_src.__u6_addr.__u6_addr32);
3035 		cm_info.ipv4 = false;
3036 		cm_info.tos = (ip6h->ip6_vfc << 4) | ip6h->ip6_flow;
3037 	}
3038 	cm_info.loc_port = ntohs(tcph->th_dport);
3039 	cm_info.rem_port = ntohs(tcph->th_sport);
3040 	cm_node = irdma_find_node(cm_core, cm_info.rem_port, cm_info.rem_addr,
3041 				  cm_info.loc_port, cm_info.loc_addr, cm_info.vlan_id);
3042 
3043 	if (!cm_node) {
3044 		/*
3045 		 * Only type of packet accepted are for the PASSIVE open (syn only)
3046 		 */
3047 		if (!(tcph->th_flags & TH_SYN) || tcph->th_flags & TH_ACK)
3048 			return;
3049 
3050 		listener = irdma_find_listener(cm_core,
3051 					       cm_info.loc_addr,
3052 					       cm_info.ipv4,
3053 					       cm_info.loc_port,
3054 					       cm_info.vlan_id,
3055 					       IRDMA_CM_LISTENER_ACTIVE_STATE);
3056 		if (!listener) {
3057 			cm_info.cm_id = NULL;
3058 			irdma_debug(&cm_core->iwdev->rf->sc_dev,
3059 				    IRDMA_DEBUG_CM, "no listener found\n");
3060 			return;
3061 		}
3062 
3063 		cm_info.cm_id = listener->cm_id;
3064 		cm_node = irdma_make_cm_node(cm_core, iwdev, &cm_info,
3065 					     listener);
3066 		if (!cm_node) {
3067 			irdma_debug(&cm_core->iwdev->rf->sc_dev,
3068 				    IRDMA_DEBUG_CM, "allocate node failed\n");
3069 			atomic_dec(&listener->refcnt);
3070 			return;
3071 		}
3072 
3073 		if (!(tcph->th_flags & (TH_RST | TH_FIN))) {
3074 			cm_node->state = IRDMA_CM_STATE_LISTENING;
3075 		} else {
3076 			irdma_rem_ref_cm_node(cm_node);
3077 			return;
3078 		}
3079 
3080 		atomic_inc(&cm_node->refcnt);
3081 	} else if (cm_node->state == IRDMA_CM_STATE_OFFLOADED) {
3082 		irdma_rem_ref_cm_node(cm_node);
3083 		return;
3084 	}
3085 
3086 	irdma_process_pkt(cm_node, rbuf);
3087 	irdma_rem_ref_cm_node(cm_node);
3088 }
3089 
3090 static int
3091 irdma_add_qh(struct irdma_cm_node *cm_node, bool active)
3092 {
3093 	if (!active)
3094 		irdma_add_conn_est_qh(cm_node);
3095 	return 0;
3096 }
3097 
3098 static void
3099 irdma_cm_free_ah_nop(struct irdma_cm_node *cm_node)
3100 {
3101 }
3102 
3103 /**
3104  * irdma_setup_cm_core - setup top level instance of a cm core
3105  * @iwdev: iwarp device structure
3106  * @rdma_ver: HW version
3107  */
3108 int
3109 irdma_setup_cm_core(struct irdma_device *iwdev, u8 rdma_ver)
3110 {
3111 	struct irdma_cm_core *cm_core = &iwdev->cm_core;
3112 
3113 	cm_core->iwdev = iwdev;
3114 	cm_core->dev = &iwdev->rf->sc_dev;
3115 
3116 	/* Handles CM event work items send to Iwarp core */
3117 	cm_core->event_wq = alloc_ordered_workqueue("iwarp-event-wq", 0);
3118 	if (!cm_core->event_wq)
3119 		return -ENOMEM;
3120 
3121 	INIT_LIST_HEAD(&cm_core->listen_list);
3122 
3123 	timer_setup(&cm_core->tcp_timer, irdma_cm_timer_tick, 0);
3124 
3125 	spin_lock_init(&cm_core->ht_lock);
3126 	spin_lock_init(&cm_core->listen_list_lock);
3127 	spin_lock_init(&cm_core->apbvt_lock);
3128 	switch (rdma_ver) {
3129 	case IRDMA_GEN_1:
3130 		cm_core->form_cm_frame = irdma_form_uda_cm_frame;
3131 		cm_core->cm_create_ah = irdma_add_qh;
3132 		cm_core->cm_free_ah = irdma_cm_free_ah_nop;
3133 		break;
3134 	case IRDMA_GEN_2:
3135 	default:
3136 		cm_core->form_cm_frame = irdma_form_ah_cm_frame;
3137 		cm_core->cm_create_ah = irdma_cm_create_ah;
3138 		cm_core->cm_free_ah = irdma_cm_free_ah;
3139 	}
3140 
3141 	return 0;
3142 }
3143 
3144 /**
3145  * irdma_cleanup_cm_core - deallocate a top level instance of a
3146  * cm core
3147  * @cm_core: cm's core
3148  */
3149 void
3150 irdma_cleanup_cm_core(struct irdma_cm_core *cm_core)
3151 {
3152 	if (!cm_core)
3153 		return;
3154 
3155 	del_timer_sync(&cm_core->tcp_timer);
3156 
3157 	destroy_workqueue(cm_core->event_wq);
3158 	cm_core->dev->ws_reset(&cm_core->iwdev->vsi);
3159 }
3160 
3161 /**
3162  * irdma_init_tcp_ctx - setup qp context
3163  * @cm_node: connection's node
3164  * @tcp_info: offload info for tcp
3165  * @iwqp: associate qp for the connection
3166  */
3167 static void
3168 irdma_init_tcp_ctx(struct irdma_cm_node *cm_node,
3169 		   struct irdma_tcp_offload_info *tcp_info,
3170 		   struct irdma_qp *iwqp)
3171 {
3172 	tcp_info->ipv4 = cm_node->ipv4;
3173 	tcp_info->drop_ooo_seg = !iwqp->iwdev->iw_ooo;
3174 	tcp_info->wscale = true;
3175 	tcp_info->ignore_tcp_opt = true;
3176 	tcp_info->ignore_tcp_uns_opt = true;
3177 	tcp_info->no_nagle = false;
3178 
3179 	tcp_info->ttl = IRDMA_DEFAULT_TTL;
3180 	tcp_info->rtt_var = IRDMA_DEFAULT_RTT_VAR;
3181 	tcp_info->ss_thresh = IRDMA_DEFAULT_SS_THRESH;
3182 	tcp_info->rexmit_thresh = IRDMA_DEFAULT_REXMIT_THRESH;
3183 
3184 	tcp_info->tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
3185 	tcp_info->snd_wscale = cm_node->tcp_cntxt.snd_wscale;
3186 	tcp_info->rcv_wscale = cm_node->tcp_cntxt.rcv_wscale;
3187 
3188 	tcp_info->snd_nxt = cm_node->tcp_cntxt.loc_seq_num;
3189 	tcp_info->snd_wnd = cm_node->tcp_cntxt.snd_wnd;
3190 	tcp_info->rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
3191 	tcp_info->snd_max = cm_node->tcp_cntxt.loc_seq_num;
3192 
3193 	tcp_info->snd_una = cm_node->tcp_cntxt.loc_seq_num;
3194 	tcp_info->cwnd = 2 * cm_node->tcp_cntxt.mss;
3195 	tcp_info->snd_wl1 = cm_node->tcp_cntxt.rcv_nxt;
3196 	tcp_info->snd_wl2 = cm_node->tcp_cntxt.loc_seq_num;
3197 	tcp_info->max_snd_window = cm_node->tcp_cntxt.max_snd_wnd;
3198 	tcp_info->rcv_wnd = cm_node->tcp_cntxt.rcv_wnd
3199 	    << cm_node->tcp_cntxt.rcv_wscale;
3200 
3201 	tcp_info->flow_label = 0;
3202 	tcp_info->snd_mss = (u32)cm_node->tcp_cntxt.mss;
3203 	tcp_info->tos = cm_node->tos;
3204 	if (cm_node->vlan_id < VLAN_N_VID) {
3205 		tcp_info->insert_vlan_tag = true;
3206 		tcp_info->vlan_tag = cm_node->vlan_id;
3207 		tcp_info->vlan_tag |= cm_node->user_pri << VLAN_PRIO_SHIFT;
3208 	}
3209 	tcp_info->src_port = cm_node->loc_port;
3210 	tcp_info->dst_port = cm_node->rem_port;
3211 	tcp_info->arp_idx = (u16)irdma_arp_table(iwqp->iwdev->rf,
3212 						  cm_node->rem_addr, NULL,
3213 						  IRDMA_ARP_RESOLVE);
3214 	if (cm_node->ipv4) {
3215 		tcp_info->dest_ip_addr[3] = cm_node->rem_addr[0];
3216 		tcp_info->local_ipaddr[3] = cm_node->loc_addr[0];
3217 	} else {
3218 		memcpy(tcp_info->dest_ip_addr, cm_node->rem_addr,
3219 		       sizeof(tcp_info->dest_ip_addr));
3220 		memcpy(tcp_info->local_ipaddr, cm_node->loc_addr,
3221 		       sizeof(tcp_info->local_ipaddr));
3222 	}
3223 }
3224 
3225 /**
3226  * irdma_cm_init_tsa_conn - setup qp for RTS
3227  * @iwqp: associate qp for the connection
3228  * @cm_node: connection's node
3229  */
3230 static void
3231 irdma_cm_init_tsa_conn(struct irdma_qp *iwqp,
3232 		       struct irdma_cm_node *cm_node)
3233 {
3234 	struct irdma_iwarp_offload_info *iwarp_info;
3235 	struct irdma_qp_host_ctx_info *ctx_info;
3236 
3237 	iwarp_info = &iwqp->iwarp_info;
3238 	ctx_info = &iwqp->ctx_info;
3239 
3240 	ctx_info->tcp_info = &iwqp->tcp_info;
3241 	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
3242 	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
3243 
3244 	iwarp_info->ord_size = cm_node->ord_size;
3245 	iwarp_info->ird_size = cm_node->ird_size;
3246 	iwarp_info->rd_en = true;
3247 	iwarp_info->rdmap_ver = 1;
3248 	iwarp_info->ddp_ver = 1;
3249 	iwarp_info->pd_id = iwqp->iwpd->sc_pd.pd_id;
3250 
3251 	ctx_info->tcp_info_valid = true;
3252 	ctx_info->iwarp_info_valid = true;
3253 	ctx_info->user_pri = cm_node->user_pri;
3254 
3255 	irdma_init_tcp_ctx(cm_node, &iwqp->tcp_info, iwqp);
3256 	if (cm_node->snd_mark_en) {
3257 		iwarp_info->snd_mark_en = true;
3258 		iwarp_info->snd_mark_offset = (iwqp->tcp_info.snd_nxt & SNDMARKER_SEQNMASK) +
3259 		    cm_node->lsmm_size;
3260 	}
3261 
3262 	cm_node->state = IRDMA_CM_STATE_OFFLOADED;
3263 	iwqp->tcp_info.tcp_state = IRDMA_TCP_STATE_ESTABLISHED;
3264 	iwqp->tcp_info.src_mac_addr_idx = iwqp->iwdev->mac_ip_table_idx;
3265 
3266 	if (cm_node->rcv_mark_en) {
3267 		iwarp_info->rcv_mark_en = true;
3268 		iwarp_info->align_hdrs = true;
3269 	}
3270 
3271 	irdma_sc_qp_setctx(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info);
3272 
3273 	/* once tcp_info is set, no need to do it again */
3274 	ctx_info->tcp_info_valid = false;
3275 	ctx_info->iwarp_info_valid = false;
3276 }
3277 
3278 /**
3279  * irdma_cm_disconn - when a connection is being closed
3280  * @iwqp: associated qp for the connection
3281  */
3282 void
3283 irdma_cm_disconn(struct irdma_qp *iwqp)
3284 {
3285 	struct irdma_device *iwdev = iwqp->iwdev;
3286 	struct disconn_work *work;
3287 	unsigned long flags;
3288 
3289 	work = kzalloc(sizeof(*work), GFP_ATOMIC);
3290 	if (!work)
3291 		return;
3292 
3293 	spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
3294 	if (!iwdev->rf->qp_table[iwqp->ibqp.qp_num]) {
3295 		spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
3296 		irdma_debug(&iwdev->rf->sc_dev,
3297 			    IRDMA_DEBUG_CM, "qp_id %d is already freed\n",
3298 			    iwqp->ibqp.qp_num);
3299 		kfree(work);
3300 		return;
3301 	}
3302 	irdma_qp_add_ref(&iwqp->ibqp);
3303 	spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
3304 
3305 	work->iwqp = iwqp;
3306 	INIT_WORK(&work->work, irdma_disconnect_worker);
3307 	queue_work(iwdev->cleanup_wq, &work->work);
3308 }
3309 
3310 /**
3311  * irdma_qp_disconnect - free qp and close cm
3312  * @iwqp: associate qp for the connection
3313  */
3314 static void
3315 irdma_qp_disconnect(struct irdma_qp *iwqp)
3316 {
3317 	struct irdma_device *iwdev = iwqp->iwdev;
3318 
3319 	iwqp->active_conn = 0;
3320 	/* close the CM node down if it is still active */
3321 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Call close API\n");
3322 	irdma_cm_close(iwqp->cm_node);
3323 }
3324 
3325 /**
3326  * irdma_cm_disconn_true - called by worker thread to disconnect qp
3327  * @iwqp: associate qp for the connection
3328  */
3329 static void
3330 irdma_cm_disconn_true(struct irdma_qp *iwqp)
3331 {
3332 	struct iw_cm_id *cm_id;
3333 	struct irdma_device *iwdev;
3334 	struct irdma_sc_qp *qp = &iwqp->sc_qp;
3335 	u16 last_ae;
3336 	u8 original_hw_tcp_state;
3337 	u8 original_ibqp_state;
3338 	int disconn_status = 0;
3339 	int issue_disconn = 0;
3340 	int issue_close = 0;
3341 	int issue_flush = 0;
3342 	unsigned long flags;
3343 	int err;
3344 
3345 	iwdev = iwqp->iwdev;
3346 	spin_lock_irqsave(&iwqp->lock, flags);
3347 	if (rdma_protocol_roce(&iwdev->ibdev, 1)) {
3348 		struct ib_qp_attr attr;
3349 
3350 		if (iwqp->flush_issued || iwqp->sc_qp.qp_uk.destroy_pending) {
3351 			spin_unlock_irqrestore(&iwqp->lock, flags);
3352 			return;
3353 		}
3354 
3355 		spin_unlock_irqrestore(&iwqp->lock, flags);
3356 
3357 		attr.qp_state = IB_QPS_ERR;
3358 		irdma_modify_qp_roce(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
3359 		irdma_ib_qp_event(iwqp, qp->event_type);
3360 		return;
3361 	}
3362 
3363 	cm_id = iwqp->cm_id;
3364 	original_hw_tcp_state = iwqp->hw_tcp_state;
3365 	original_ibqp_state = iwqp->ibqp_state;
3366 	last_ae = iwqp->last_aeq;
3367 
3368 	if (qp->term_flags) {
3369 		issue_disconn = 1;
3370 		issue_close = 1;
3371 		iwqp->cm_id = NULL;
3372 		irdma_terminate_del_timer(qp);
3373 		if (!iwqp->flush_issued) {
3374 			iwqp->flush_issued = 1;
3375 			issue_flush = 1;
3376 		}
3377 	} else if ((original_hw_tcp_state == IRDMA_TCP_STATE_CLOSE_WAIT) ||
3378 		   ((original_ibqp_state == IB_QPS_RTS) &&
3379 		    (last_ae == IRDMA_AE_LLP_CONNECTION_RESET))) {
3380 		issue_disconn = 1;
3381 		if (last_ae == IRDMA_AE_LLP_CONNECTION_RESET)
3382 			disconn_status = -ECONNRESET;
3383 	}
3384 
3385 	if (original_hw_tcp_state == IRDMA_TCP_STATE_CLOSED ||
3386 	    original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT ||
3387 	    last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE ||
3388 	    last_ae == IRDMA_AE_BAD_CLOSE ||
3389 	    last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset || !cm_id) {
3390 		issue_close = 1;
3391 		iwqp->cm_id = NULL;
3392 		qp->term_flags = 0;
3393 		if (!iwqp->flush_issued) {
3394 			iwqp->flush_issued = 1;
3395 			issue_flush = 1;
3396 		}
3397 	}
3398 
3399 	spin_unlock_irqrestore(&iwqp->lock, flags);
3400 	if (issue_flush && !iwqp->sc_qp.qp_uk.destroy_pending) {
3401 		irdma_flush_wqes(iwqp, IRDMA_FLUSH_SQ | IRDMA_FLUSH_RQ |
3402 				 IRDMA_FLUSH_WAIT);
3403 
3404 		if (qp->term_flags)
3405 			irdma_ib_qp_event(iwqp, qp->event_type);
3406 	}
3407 
3408 	if (!cm_id || !cm_id->event_handler)
3409 		return;
3410 
3411 	spin_lock_irqsave(&iwdev->cm_core.ht_lock, flags);
3412 	if (!iwqp->cm_node) {
3413 		spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
3414 		return;
3415 	}
3416 	atomic_inc(&iwqp->cm_node->refcnt);
3417 
3418 	spin_unlock_irqrestore(&iwdev->cm_core.ht_lock, flags);
3419 
3420 	if (issue_disconn) {
3421 		err = irdma_send_cm_event(iwqp->cm_node, cm_id,
3422 					  IW_CM_EVENT_DISCONNECT,
3423 					  disconn_status);
3424 		if (err)
3425 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3426 				    "disconnect event failed: - cm_id = %p\n",
3427 				    cm_id);
3428 	}
3429 	if (issue_close) {
3430 		cm_id->provider_data = iwqp;
3431 		err = irdma_send_cm_event(iwqp->cm_node, cm_id,
3432 					  IW_CM_EVENT_CLOSE, 0);
3433 		if (err)
3434 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3435 				    "close event failed: - cm_id = %p\n",
3436 				    cm_id);
3437 		irdma_qp_disconnect(iwqp);
3438 	}
3439 	irdma_rem_ref_cm_node(iwqp->cm_node);
3440 }
3441 
3442 /**
3443  * irdma_disconnect_worker - worker for connection close
3444  * @work: points or disconn structure
3445  */
3446 static void
3447 irdma_disconnect_worker(struct work_struct *work)
3448 {
3449 	struct disconn_work *dwork = container_of(work, struct disconn_work, work);
3450 	struct irdma_qp *iwqp = dwork->iwqp;
3451 
3452 	kfree(dwork);
3453 	irdma_cm_disconn_true(iwqp);
3454 	irdma_qp_rem_ref(&iwqp->ibqp);
3455 }
3456 
3457 /**
3458  * irdma_free_lsmm_rsrc - free lsmm memory and deregister
3459  * @iwqp: associate qp for the connection
3460  */
3461 void
3462 irdma_free_lsmm_rsrc(struct irdma_qp *iwqp)
3463 {
3464 	struct irdma_device *iwdev;
3465 
3466 	iwdev = iwqp->iwdev;
3467 
3468 	if (iwqp->ietf_mem.va) {
3469 		if (iwqp->lsmm_mr)
3470 			kc_free_lsmm_dereg_mr(iwdev, iwqp);
3471 		irdma_free_dma_mem(iwdev->rf->sc_dev.hw,
3472 				   &iwqp->ietf_mem);
3473 		iwqp->ietf_mem.va = NULL;
3474 	}
3475 }
3476 
3477 /**
3478  * irdma_accept - registered call for connection to be accepted
3479  * @cm_id: cm information for passive connection
3480  * @conn_param: accpet parameters
3481  */
3482 int
3483 irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3484 {
3485 	struct ib_qp *ibqp;
3486 	struct irdma_qp *iwqp;
3487 	struct irdma_device *iwdev;
3488 	struct irdma_sc_dev *dev;
3489 	struct irdma_cm_node *cm_node;
3490 	struct ib_qp_attr attr = {0};
3491 	int passive_state;
3492 	struct ib_mr *ibmr;
3493 	struct irdma_pd *iwpd;
3494 	u16 buf_len = 0;
3495 	struct irdma_kmem_info accept;
3496 	u64 tagged_offset;
3497 	int wait_ret;
3498 	int ret = 0;
3499 
3500 	ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
3501 	if (!ibqp)
3502 		return -EINVAL;
3503 
3504 	iwqp = to_iwqp(ibqp);
3505 	iwdev = iwqp->iwdev;
3506 	dev = &iwdev->rf->sc_dev;
3507 	cm_node = cm_id->provider_data;
3508 
3509 	if (((struct sockaddr_in *)&cm_id->local_addr)->sin_family == AF_INET) {
3510 		cm_node->ipv4 = true;
3511 		cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr);
3512 	} else {
3513 		cm_node->ipv4 = false;
3514 		irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id,
3515 				       NULL);
3516 	}
3517 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM, "Accept vlan_id=%d\n",
3518 		    cm_node->vlan_id);
3519 
3520 	if (cm_node->state == IRDMA_CM_STATE_LISTENER_DESTROYED) {
3521 		ret = -EINVAL;
3522 		goto error;
3523 	}
3524 
3525 	passive_state = atomic_add_return(1, &cm_node->passive_state);
3526 	if (passive_state == IRDMA_SEND_RESET_EVENT) {
3527 		ret = -ECONNRESET;
3528 		goto error;
3529 	}
3530 
3531 	buf_len = conn_param->private_data_len + IRDMA_MAX_IETF_SIZE;
3532 	iwqp->ietf_mem.size = buf_len;
3533 	iwqp->ietf_mem.va = irdma_allocate_dma_mem(dev->hw, &iwqp->ietf_mem,
3534 						   iwqp->ietf_mem.size, 1);
3535 	if (!iwqp->ietf_mem.va) {
3536 		ret = -ENOMEM;
3537 		goto error;
3538 	}
3539 
3540 	cm_node->pdata.size = conn_param->private_data_len;
3541 	accept.addr = iwqp->ietf_mem.va;
3542 	accept.size = irdma_cm_build_mpa_frame(cm_node, &accept, MPA_KEY_REPLY);
3543 	memcpy((u8 *)accept.addr + accept.size, conn_param->private_data,
3544 	       conn_param->private_data_len);
3545 
3546 	if (cm_node->dev->ws_add(iwqp->sc_qp.vsi, cm_node->user_pri)) {
3547 		ret = -ENOMEM;
3548 		goto error;
3549 	}
3550 	iwqp->sc_qp.user_pri = cm_node->user_pri;
3551 	irdma_qp_add_qos(&iwqp->sc_qp);
3552 	if (cm_node->dev->hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2)
3553 		iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp);
3554 	/* setup our first outgoing iWarp send WQE (the IETF frame response) */
3555 	iwpd = iwqp->iwpd;
3556 	tagged_offset = (uintptr_t)iwqp->ietf_mem.va;
3557 	ibmr = irdma_reg_phys_mr(&iwpd->ibpd, iwqp->ietf_mem.pa, buf_len,
3558 				 IB_ACCESS_LOCAL_WRITE, &tagged_offset);
3559 	if (IS_ERR(ibmr)) {
3560 		ret = -ENOMEM;
3561 		goto error;
3562 	}
3563 
3564 	ibmr->pd = &iwpd->ibpd;
3565 	ibmr->device = iwpd->ibpd.device;
3566 	iwqp->lsmm_mr = ibmr;
3567 	if (iwqp->page)
3568 		iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page);
3569 
3570 	cm_node->lsmm_size = accept.size + conn_param->private_data_len;
3571 	irdma_sc_send_lsmm(&iwqp->sc_qp, iwqp->ietf_mem.va, cm_node->lsmm_size,
3572 			   ibmr->lkey);
3573 
3574 	if (iwqp->page)
3575 		kunmap_local(iwqp->sc_qp.qp_uk.sq_base);
3576 
3577 	iwqp->cm_id = cm_id;
3578 	cm_node->cm_id = cm_id;
3579 
3580 	cm_id->provider_data = iwqp;
3581 	iwqp->active_conn = 0;
3582 	iwqp->cm_node = cm_node;
3583 	cm_node->iwqp = iwqp;
3584 	irdma_cm_init_tsa_conn(iwqp, cm_node);
3585 	irdma_qp_add_ref(&iwqp->ibqp);
3586 	cm_id->add_ref(cm_id);
3587 
3588 	attr.qp_state = IB_QPS_RTS;
3589 	cm_node->qhash_set = false;
3590 	cm_node->cm_core->cm_free_ah(cm_node);
3591 
3592 	irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
3593 	if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
3594 		wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
3595 							    iwqp->rts_ae_rcvd,
3596 							    IRDMA_MAX_TIMEOUT);
3597 		if (!wait_ret) {
3598 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3599 				    "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
3600 				    cm_node, cm_node->loc_port,
3601 				    cm_node->rem_port, cm_node->cm_id);
3602 			ret = -ECONNRESET;
3603 			goto error;
3604 		}
3605 	}
3606 
3607 	irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_ESTABLISHED, 0);
3608 	cm_node->accelerated = true;
3609 	complete(&cm_node->establish_comp);
3610 
3611 	if (cm_node->accept_pend) {
3612 		atomic_dec(&cm_node->listener->pend_accepts_cnt);
3613 		cm_node->accept_pend = 0;
3614 	}
3615 
3616 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3617 		    "rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n",
3618 		    cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
3619 		    cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num);
3620 	cm_node->cm_core->stats_accepts++;
3621 
3622 	return 0;
3623 error:
3624 	irdma_free_lsmm_rsrc(iwqp);
3625 	irdma_rem_ref_cm_node(cm_node);
3626 
3627 	return ret;
3628 }
3629 
3630 /**
3631  * irdma_reject - registered call for connection to be rejected
3632  * @cm_id: cm information for passive connection
3633  * @pdata: private data to be sent
3634  * @pdata_len: private data length
3635  */
3636 int
3637 irdma_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3638 {
3639 	struct irdma_device *iwdev;
3640 	struct irdma_cm_node *cm_node;
3641 
3642 	cm_node = cm_id->provider_data;
3643 	cm_node->pdata.size = pdata_len;
3644 
3645 	iwdev = to_iwdev(cm_id->device);
3646 	if (!iwdev)
3647 		return -EINVAL;
3648 
3649 	cm_node->cm_core->stats_rejects++;
3650 
3651 	if (pdata_len + sizeof(struct ietf_mpa_v2) > IRDMA_MAX_CM_BUF)
3652 		return -EINVAL;
3653 
3654 	return irdma_cm_reject(cm_node, pdata, pdata_len);
3655 }
3656 
3657 /**
3658  * irdma_connect - registered call for connection to be established
3659  * @cm_id: cm information for passive connection
3660  * @conn_param: Information about the connection
3661  */
3662 int
3663 irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3664 {
3665 	struct ib_qp *ibqp;
3666 	struct irdma_qp *iwqp;
3667 	struct irdma_device *iwdev;
3668 	struct irdma_cm_node *cm_node;
3669 	struct irdma_cm_info cm_info;
3670 	struct sockaddr_in *laddr;
3671 	struct sockaddr_in *raddr;
3672 	struct sockaddr_in6 *laddr6;
3673 	struct sockaddr_in6 *raddr6;
3674 	int ret = 0;
3675 
3676 	ibqp = irdma_get_qp(cm_id->device, conn_param->qpn);
3677 	if (!ibqp)
3678 		return -EINVAL;
3679 	iwqp = to_iwqp(ibqp);
3680 	if (!iwqp)
3681 		return -EINVAL;
3682 	iwdev = iwqp->iwdev;
3683 	if (!iwdev)
3684 		return -EINVAL;
3685 
3686 	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3687 	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3688 	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3689 	raddr6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3690 
3691 	if (!(laddr->sin_port) || !(raddr->sin_port))
3692 		return -EINVAL;
3693 
3694 	iwqp->active_conn = 1;
3695 	iwqp->cm_id = NULL;
3696 	cm_id->provider_data = iwqp;
3697 
3698 	/* set up the connection params for the node */
3699 	if (cm_id->remote_addr.ss_family == AF_INET) {
3700 		if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
3701 			return -EINVAL;
3702 
3703 		cm_info.ipv4 = true;
3704 		memset(cm_info.loc_addr, 0, sizeof(cm_info.loc_addr));
3705 		memset(cm_info.rem_addr, 0, sizeof(cm_info.rem_addr));
3706 		cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
3707 		cm_info.rem_addr[0] = ntohl(raddr->sin_addr.s_addr);
3708 		cm_info.loc_port = ntohs(laddr->sin_port);
3709 		cm_info.rem_port = ntohs(raddr->sin_port);
3710 		cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
3711 	} else {
3712 		if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
3713 			return -EINVAL;
3714 
3715 		cm_info.ipv4 = false;
3716 		irdma_copy_ip_ntohl(cm_info.loc_addr,
3717 				    laddr6->sin6_addr.__u6_addr.__u6_addr32);
3718 		irdma_copy_ip_ntohl(cm_info.rem_addr,
3719 				    raddr6->sin6_addr.__u6_addr.__u6_addr32);
3720 		cm_info.loc_port = ntohs(laddr6->sin6_port);
3721 		cm_info.rem_port = ntohs(raddr6->sin6_port);
3722 		irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, NULL);
3723 	}
3724 	cm_info.cm_id = cm_id;
3725 	cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
3726 	cm_info.tos = cm_id->tos;
3727 	if (iwdev->vsi.dscp_mode) {
3728 		cm_info.user_pri =
3729 		    iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)];
3730 	} else {
3731 		cm_info.user_pri = rt_tos2priority(cm_id->tos);
3732 		cm_info.user_pri = irdma_get_egress_vlan_prio(cm_info.loc_addr,
3733 							      cm_info.user_pri,
3734 							      cm_info.ipv4);
3735 	}
3736 
3737 	if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri))
3738 		return -ENOMEM;
3739 	iwqp->sc_qp.user_pri = cm_info.user_pri;
3740 	irdma_qp_add_qos(&iwqp->sc_qp);
3741 	if (iwdev->rf->sc_dev.hw_attrs.uk_attrs.hw_rev == IRDMA_GEN_2)
3742 		iwdev->rf->check_fc(&iwdev->vsi, &iwqp->sc_qp);
3743 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_DCB,
3744 		    "TOS:[%d] UP:[%d]\n", cm_id->tos,
3745 		    cm_info.user_pri);
3746 
3747 	ret = irdma_create_cm_node(&iwdev->cm_core, iwdev, conn_param, &cm_info,
3748 				   &cm_node);
3749 	if (ret)
3750 		return ret;
3751 	ret = cm_node->cm_core->cm_create_ah(cm_node, true);
3752 	if (ret)
3753 		goto err;
3754 	if (irdma_manage_qhash(iwdev, &cm_info,
3755 			       IRDMA_QHASH_TYPE_TCP_ESTABLISHED,
3756 			       IRDMA_QHASH_MANAGE_TYPE_ADD, NULL, true)) {
3757 		ret = -EINVAL;
3758 		goto err;
3759 	}
3760 	cm_node->qhash_set = true;
3761 
3762 	cm_node->apbvt_entry = irdma_add_apbvt(iwdev, cm_info.loc_port);
3763 	if (!cm_node->apbvt_entry) {
3764 		ret = -EINVAL;
3765 		goto err;
3766 	}
3767 
3768 	cm_node->apbvt_set = true;
3769 	iwqp->cm_node = cm_node;
3770 	cm_node->iwqp = iwqp;
3771 	iwqp->cm_id = cm_id;
3772 	irdma_qp_add_ref(&iwqp->ibqp);
3773 	cm_id->add_ref(cm_id);
3774 
3775 	if (cm_node->state != IRDMA_CM_STATE_OFFLOADED) {
3776 		cm_node->state = IRDMA_CM_STATE_SYN_SENT;
3777 		ret = irdma_send_syn(cm_node, 0);
3778 		if (ret)
3779 			goto err;
3780 	}
3781 
3782 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3783 		    "rem_port=0x%04x, loc_port=0x%04x rem_addr=%pI4 loc_addr=%pI4 cm_node=%p cm_id=%p qp_id = %d\n\n",
3784 		    cm_node->rem_port, cm_node->loc_port, cm_node->rem_addr,
3785 		    cm_node->loc_addr, cm_node, cm_id, ibqp->qp_num);
3786 
3787 	return 0;
3788 
3789 err:
3790 	if (cm_info.ipv4)
3791 		irdma_debug(&iwdev->rf->sc_dev,
3792 			    IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%pI4",
3793 			    cm_info.rem_addr);
3794 	else
3795 		irdma_debug(&iwdev->rf->sc_dev,
3796 			    IRDMA_DEBUG_CM, "connect() FAILED: dest addr=%pI6",
3797 			    cm_info.rem_addr);
3798 	irdma_rem_ref_cm_node(cm_node);
3799 	iwdev->cm_core.stats_connect_errs++;
3800 
3801 	return ret;
3802 }
3803 
3804 /**
3805  * irdma_create_listen - registered call creating listener
3806  * @cm_id: cm information for passive connection
3807  * @backlog: to max accept pending count
3808  */
3809 int
3810 irdma_create_listen(struct iw_cm_id *cm_id, int backlog)
3811 {
3812 	struct irdma_device *iwdev;
3813 	struct irdma_cm_listener *cm_listen_node;
3814 	struct irdma_cm_info cm_info = {0};
3815 	struct sockaddr_in *laddr;
3816 	struct sockaddr_in6 *laddr6;
3817 	bool wildcard = false;
3818 	int err;
3819 
3820 	iwdev = to_iwdev(cm_id->device);
3821 	if (!iwdev)
3822 		return -EINVAL;
3823 
3824 	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3825 	laddr6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3826 	cm_info.qh_qpid = iwdev->vsi.ilq->qp_id;
3827 
3828 	if (laddr->sin_family == AF_INET) {
3829 		if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV4)
3830 			return -EINVAL;
3831 
3832 		cm_info.ipv4 = true;
3833 		cm_info.loc_addr[0] = ntohl(laddr->sin_addr.s_addr);
3834 		cm_info.loc_port = ntohs(laddr->sin_port);
3835 
3836 		if (laddr->sin_addr.s_addr != htonl(INADDR_ANY)) {
3837 			cm_info.vlan_id = irdma_get_vlan_ipv4(cm_info.loc_addr);
3838 		} else {
3839 			cm_info.vlan_id = 0xFFFF;
3840 			wildcard = true;
3841 		}
3842 	} else {
3843 		if (iwdev->vsi.mtu < IRDMA_MIN_MTU_IPV6)
3844 			return -EINVAL;
3845 
3846 		cm_info.ipv4 = false;
3847 		irdma_copy_ip_ntohl(cm_info.loc_addr,
3848 				    laddr6->sin6_addr.__u6_addr.__u6_addr32);
3849 		cm_info.loc_port = ntohs(laddr6->sin6_port);
3850 		if (!IN6_IS_ADDR_UNSPECIFIED(&laddr6->sin6_addr)) {
3851 			irdma_netdev_vlan_ipv6(cm_info.loc_addr,
3852 					       &cm_info.vlan_id, NULL);
3853 		} else {
3854 			cm_info.vlan_id = 0xFFFF;
3855 			wildcard = true;
3856 		}
3857 	}
3858 
3859 	if (cm_info.vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode)
3860 		cm_info.vlan_id = 0;
3861 	cm_info.backlog = backlog;
3862 	cm_info.cm_id = cm_id;
3863 
3864 	cm_listen_node = irdma_make_listen_node(&iwdev->cm_core, iwdev,
3865 						&cm_info);
3866 	if (!cm_listen_node) {
3867 		irdma_debug(&iwdev->rf->sc_dev,
3868 			    IRDMA_DEBUG_CM, "cm_listen_node == NULL\n");
3869 		return -ENOMEM;
3870 	}
3871 
3872 	cm_id->provider_data = cm_listen_node;
3873 
3874 	cm_listen_node->tos = cm_id->tos;
3875 	if (iwdev->vsi.dscp_mode)
3876 		cm_listen_node->user_pri =
3877 		    iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)];
3878 	else
3879 		cm_listen_node->user_pri = rt_tos2priority(cm_id->tos);
3880 	cm_info.user_pri = cm_listen_node->user_pri;
3881 	if (!cm_listen_node->reused_node) {
3882 		if (wildcard) {
3883 			err = irdma_add_mqh(iwdev, &cm_info, cm_listen_node);
3884 			if (err)
3885 				goto error;
3886 		} else {
3887 			if (!iwdev->vsi.dscp_mode)
3888 				cm_info.user_pri = cm_listen_node->user_pri =
3889 				    irdma_get_egress_vlan_prio(cm_info.loc_addr,
3890 							       cm_info.user_pri,
3891 							       cm_info.ipv4);
3892 			err = irdma_manage_qhash(iwdev, &cm_info,
3893 						 IRDMA_QHASH_TYPE_TCP_SYN,
3894 						 IRDMA_QHASH_MANAGE_TYPE_ADD,
3895 						 NULL, true);
3896 			if (err)
3897 				goto error;
3898 
3899 			cm_listen_node->qhash_set = true;
3900 		}
3901 
3902 		cm_listen_node->apbvt_entry = irdma_add_apbvt(iwdev,
3903 							      cm_info.loc_port);
3904 		if (!cm_listen_node->apbvt_entry)
3905 			goto error;
3906 	}
3907 	cm_id->add_ref(cm_id);
3908 	cm_listen_node->cm_core->stats_listen_created++;
3909 	irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
3910 		    "loc_port=0x%04x loc_addr=%pI4 cm_listen_node=%p cm_id=%p qhash_set=%d vlan_id=%d\n",
3911 		    cm_listen_node->loc_port, cm_listen_node->loc_addr,
3912 		    cm_listen_node, cm_listen_node->cm_id,
3913 		    cm_listen_node->qhash_set, cm_listen_node->vlan_id);
3914 
3915 	return 0;
3916 
3917 error:
3918 
3919 	irdma_cm_del_listen(&iwdev->cm_core, cm_listen_node, false);
3920 
3921 	return -EINVAL;
3922 }
3923 
3924 /**
3925  * irdma_destroy_listen - registered call to destroy listener
3926  * @cm_id: cm information for passive connection
3927  */
3928 int
3929 irdma_destroy_listen(struct iw_cm_id *cm_id)
3930 {
3931 	struct irdma_device *iwdev;
3932 
3933 	iwdev = to_iwdev(cm_id->device);
3934 	if (cm_id->provider_data)
3935 		irdma_cm_del_listen(&iwdev->cm_core, cm_id->provider_data,
3936 				    true);
3937 	else
3938 		irdma_debug(&iwdev->rf->sc_dev,
3939 			    IRDMA_DEBUG_CM, "cm_id->provider_data was NULL\n");
3940 
3941 	cm_id->rem_ref(cm_id);
3942 
3943 	return 0;
3944 }
3945 
3946 /**
3947  * irdma_iw_teardown_list_prep - add conn nodes slated for tear
3948  * down to list
3949  * @cm_core: cm's core
3950  * @teardown_list: a list to which cm_node will be selected
3951  * @ipaddr: pointer to ip address
3952  * @nfo: pointer to cm_info structure instance
3953  * @disconnect_all: flag indicating disconnect all QPs
3954  */
3955 static void
3956 irdma_iw_teardown_list_prep(struct irdma_cm_core *cm_core,
3957 			    struct list_head *teardown_list,
3958 			    u32 *ipaddr,
3959 			    struct irdma_cm_info *nfo,
3960 			    bool disconnect_all)
3961 {
3962 	struct irdma_cm_node *cm_node;
3963 	int bkt;
3964 
3965 	HASH_FOR_EACH_RCU(cm_core->cm_hash_tbl, bkt, cm_node, list) {
3966 		if ((disconnect_all ||
3967 		     (nfo->vlan_id == cm_node->vlan_id &&
3968 		      !memcmp(cm_node->loc_addr, ipaddr, nfo->ipv4 ? 4 : 16))) &&
3969 		    atomic_inc_not_zero(&cm_node->refcnt))
3970 			list_add(&cm_node->teardown_entry, teardown_list);
3971 	}
3972 }
3973 
3974 static inline bool
3975 irdma_ip_vlan_match(u32 *ip1, u16 vlan_id1,
3976 		    bool check_vlan, u32 *ip2,
3977 		    u16 vlan_id2, bool ipv4)
3978 {
3979 	return (!check_vlan || vlan_id1 == vlan_id2) &&
3980 	    !memcmp(ip1, ip2, ipv4 ? 4 : 16);
3981 }
3982 
3983 /**
3984  * irdma_roce_teardown_list_prep - add conn nodes slated for
3985  * tear down to list
3986  * @iwdev: RDMA device
3987  * @teardown_list: a list to which cm_node will be selected
3988  * @ipaddr: pointer to ip address
3989  * @nfo: pointer to cm_info structure instance
3990  * @disconnect_all: flag indicating disconnect all QPs
3991  */
3992 static void
3993 irdma_roce_teardown_list_prep(struct irdma_device *iwdev,
3994 			      struct list_head *teardown_list,
3995 			      u32 *ipaddr,
3996 			      struct irdma_cm_info *nfo,
3997 			      bool disconnect_all)
3998 {
3999 	struct irdma_sc_vsi *vsi = &iwdev->vsi;
4000 	struct irdma_sc_qp *sc_qp;
4001 	struct list_head *list_node;
4002 	struct irdma_qp *qp;
4003 	unsigned long flags;
4004 	int i;
4005 
4006 	for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
4007 		mutex_lock(&vsi->qos[i].qos_mutex);
4008 		list_for_each(list_node, &vsi->qos[i].qplist) {
4009 			u32 qp_ip[4];
4010 
4011 			sc_qp = container_of(list_node, struct irdma_sc_qp,
4012 					     list);
4013 			if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
4014 				continue;
4015 
4016 			qp = sc_qp->qp_uk.back_qp;
4017 			if (!disconnect_all) {
4018 				if (nfo->ipv4)
4019 					qp_ip[0] = qp->udp_info.local_ipaddr[3];
4020 				else
4021 					memcpy(qp_ip,
4022 					       &qp->udp_info.local_ipaddr[0],
4023 					       sizeof(qp_ip));
4024 			}
4025 
4026 			if (disconnect_all ||
4027 			    irdma_ip_vlan_match(qp_ip,
4028 						qp->udp_info.vlan_tag & EVL_VLID_MASK,
4029 						qp->udp_info.insert_vlan_tag,
4030 						ipaddr, nfo->vlan_id, nfo->ipv4)) {
4031 				spin_lock_irqsave(&iwdev->rf->qptable_lock, flags);
4032 				if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
4033 					irdma_qp_add_ref(&qp->ibqp);
4034 					list_add(&qp->teardown_entry, teardown_list);
4035 				}
4036 				spin_unlock_irqrestore(&iwdev->rf->qptable_lock, flags);
4037 			}
4038 		}
4039 		mutex_unlock(&vsi->qos[i].qos_mutex);
4040 	}
4041 }
4042 
4043 /**
4044  * irdma_cm_event_connected - handle connected active node
4045  * @event: the info for cm_node of connection
4046  */
4047 static void
4048 irdma_cm_event_connected(struct irdma_cm_event *event)
4049 {
4050 	struct irdma_qp *iwqp;
4051 	struct irdma_device *iwdev;
4052 	struct irdma_cm_node *cm_node;
4053 	struct irdma_sc_dev *dev;
4054 	struct ib_qp_attr attr = {0};
4055 	struct iw_cm_id *cm_id;
4056 	int status;
4057 	bool read0;
4058 	int wait_ret = 0;
4059 
4060 	cm_node = event->cm_node;
4061 	cm_id = cm_node->cm_id;
4062 	iwqp = cm_id->provider_data;
4063 	iwdev = iwqp->iwdev;
4064 	dev = &iwdev->rf->sc_dev;
4065 	if (iwqp->sc_qp.qp_uk.destroy_pending) {
4066 		status = -ETIMEDOUT;
4067 		goto error;
4068 	}
4069 
4070 	irdma_cm_init_tsa_conn(iwqp, cm_node);
4071 	read0 = (cm_node->send_rdma0_op == SEND_RDMA_READ_ZERO);
4072 	if (iwqp->page)
4073 		iwqp->sc_qp.qp_uk.sq_base = kmap_local_page(iwqp->page);
4074 	irdma_sc_send_rtt(&iwqp->sc_qp, read0);
4075 	if (iwqp->page)
4076 		kunmap_local(iwqp->sc_qp.qp_uk.sq_base);
4077 
4078 	attr.qp_state = IB_QPS_RTS;
4079 	cm_node->qhash_set = false;
4080 	irdma_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
4081 	if (dev->hw_attrs.uk_attrs.feature_flags & IRDMA_FEATURE_RTS_AE) {
4082 		wait_ret = wait_event_interruptible_timeout(iwqp->waitq,
4083 							    iwqp->rts_ae_rcvd,
4084 							    IRDMA_MAX_TIMEOUT);
4085 		if (!wait_ret)
4086 			irdma_debug(&iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
4087 				    "Slow Connection: cm_node=%p, loc_port=%d, rem_port=%d, cm_id=%p\n",
4088 				    cm_node, cm_node->loc_port,
4089 				    cm_node->rem_port, cm_node->cm_id);
4090 	}
4091 
4092 	irdma_send_cm_event(cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY, 0);
4093 	cm_node->accelerated = true;
4094 	complete(&cm_node->establish_comp);
4095 	cm_node->cm_core->cm_free_ah(cm_node);
4096 	return;
4097 
4098 error:
4099 	iwqp->cm_id = NULL;
4100 	cm_id->provider_data = NULL;
4101 	irdma_send_cm_event(event->cm_node, cm_id, IW_CM_EVENT_CONNECT_REPLY,
4102 			    status);
4103 	irdma_rem_ref_cm_node(event->cm_node);
4104 }
4105 
4106 /**
4107  * irdma_cm_event_reset - handle reset
4108  * @event: the info for cm_node of connection
4109  */
4110 static void
4111 irdma_cm_event_reset(struct irdma_cm_event *event)
4112 {
4113 	struct irdma_cm_node *cm_node = event->cm_node;
4114 	struct iw_cm_id *cm_id = cm_node->cm_id;
4115 	struct irdma_qp *iwqp;
4116 
4117 	if (!cm_id)
4118 		return;
4119 
4120 	iwqp = cm_id->provider_data;
4121 	if (!iwqp)
4122 		return;
4123 
4124 	irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
4125 		    "reset event %p - cm_id = %p\n",
4126 		    event->cm_node, cm_id);
4127 	iwqp->cm_id = NULL;
4128 
4129 	irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_DISCONNECT,
4130 			    -ECONNRESET);
4131 	irdma_send_cm_event(cm_node, cm_node->cm_id, IW_CM_EVENT_CLOSE, 0);
4132 }
4133 
4134 /**
4135  * irdma_cm_event_handler - send event to cm upper layer
4136  * @work: pointer of cm event info.
4137  */
4138 static void
4139 irdma_cm_event_handler(struct work_struct *work)
4140 {
4141 	struct irdma_cm_event *event = container_of(work, struct irdma_cm_event, event_work);
4142 	struct irdma_cm_node *cm_node;
4143 
4144 	if (!event || !event->cm_node || !event->cm_node->cm_core)
4145 		return;
4146 
4147 	cm_node = event->cm_node;
4148 
4149 	switch (event->type) {
4150 	case IRDMA_CM_EVENT_MPA_REQ:
4151 		irdma_send_cm_event(cm_node, cm_node->cm_id,
4152 				    IW_CM_EVENT_CONNECT_REQUEST, 0);
4153 		break;
4154 	case IRDMA_CM_EVENT_RESET:
4155 		irdma_cm_event_reset(event);
4156 		break;
4157 	case IRDMA_CM_EVENT_CONNECTED:
4158 		if (!event->cm_node->cm_id ||
4159 		    event->cm_node->state != IRDMA_CM_STATE_OFFLOADED)
4160 			break;
4161 		irdma_cm_event_connected(event);
4162 		break;
4163 	case IRDMA_CM_EVENT_MPA_REJECT:
4164 		if (!event->cm_node->cm_id ||
4165 		    cm_node->state == IRDMA_CM_STATE_OFFLOADED)
4166 			break;
4167 		irdma_send_cm_event(cm_node, cm_node->cm_id,
4168 				    IW_CM_EVENT_CONNECT_REPLY, -ECONNREFUSED);
4169 		break;
4170 	case IRDMA_CM_EVENT_ABORTED:
4171 		if (!event->cm_node->cm_id ||
4172 		    event->cm_node->state == IRDMA_CM_STATE_OFFLOADED)
4173 			break;
4174 		irdma_event_connect_error(event);
4175 		break;
4176 	default:
4177 		irdma_debug(&cm_node->iwdev->rf->sc_dev, IRDMA_DEBUG_CM,
4178 			    "bad event type = %d\n",
4179 			    event->type);
4180 		break;
4181 	}
4182 
4183 	irdma_rem_ref_cm_node(event->cm_node);
4184 	kfree(event);
4185 }
4186 
4187 /**
4188  * irdma_cm_post_event - queue event request for worker thread
4189  * @event: cm node's info for up event call
4190  */
4191 static void
4192 irdma_cm_post_event(struct irdma_cm_event *event)
4193 {
4194 	atomic_inc(&event->cm_node->refcnt);
4195 	INIT_WORK(&event->event_work, irdma_cm_event_handler);
4196 	queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
4197 }
4198 
4199 /**
4200  * irdma_cm_teardown_connections - teardown QPs
4201  * @iwdev: device pointer
4202  * @ipaddr: Pointer to IPv4 or IPv6 address
4203  * @nfo: Connection info
4204  * @disconnect_all: flag indicating disconnect all QPs
4205  *
4206  * teardown QPs where source or destination addr matches ip addr
4207  */
4208 static void __unused
4209 irdma_cm_teardown_connections(struct irdma_device *iwdev,
4210 			      u32 *ipaddr,
4211 			      struct irdma_cm_info *nfo,
4212 			      bool disconnect_all)
4213 {
4214 	struct irdma_cm_core *cm_core = &iwdev->cm_core;
4215 	struct list_head *list_core_temp;
4216 	struct list_head *list_node;
4217 	struct irdma_cm_node *cm_node;
4218 	struct list_head teardown_list;
4219 	struct ib_qp_attr attr;
4220 	struct irdma_qp *qp;
4221 
4222 	INIT_LIST_HEAD(&teardown_list);
4223 
4224 	rcu_read_lock();
4225 	irdma_iw_teardown_list_prep(cm_core, &teardown_list, ipaddr, nfo, disconnect_all);
4226 	rcu_read_unlock();
4227 
4228 	attr.qp_state = IB_QPS_ERR;
4229 	list_for_each_safe(list_node, list_core_temp, &teardown_list) {
4230 		cm_node = container_of(list_node, struct irdma_cm_node,
4231 				       teardown_entry);
4232 		irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL);
4233 		if (iwdev->rf->reset)
4234 			irdma_cm_disconn(cm_node->iwqp);
4235 		irdma_rem_ref_cm_node(cm_node);
4236 	}
4237 
4238 	if (!rdma_protocol_roce(&iwdev->ibdev, 1))
4239 		return;
4240 
4241 	INIT_LIST_HEAD(&teardown_list);
4242 	irdma_roce_teardown_list_prep(iwdev, &teardown_list, ipaddr, nfo, disconnect_all);
4243 
4244 	list_for_each_safe(list_node, list_core_temp, &teardown_list) {
4245 		qp = container_of(list_node, struct irdma_qp, teardown_entry);
4246 		irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
4247 		irdma_ib_qp_event(qp, IRDMA_QP_EVENT_CATASTROPHIC);
4248 		irdma_qp_rem_ref(&qp->ibqp);
4249 	}
4250 }
4251