xref: /linux/drivers/infiniband/hw/cxgb4/cm.c (revision 95e9fd10f06cb5642028b6b851e32b8c8afb4571)
1 /*
2  * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *	  copyright notice, this list of conditions and the following
16  *	  disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *	  copyright notice, this list of conditions and the following
20  *	  disclaimer in the documentation and/or other materials
21  *	  provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <linux/module.h>
33 #include <linux/list.h>
34 #include <linux/workqueue.h>
35 #include <linux/skbuff.h>
36 #include <linux/timer.h>
37 #include <linux/notifier.h>
38 #include <linux/inetdevice.h>
39 #include <linux/ip.h>
40 #include <linux/tcp.h>
41 
42 #include <net/neighbour.h>
43 #include <net/netevent.h>
44 #include <net/route.h>
45 
46 #include "iw_cxgb4.h"
47 
48 static char *states[] = {
49 	"idle",
50 	"listen",
51 	"connecting",
52 	"mpa_wait_req",
53 	"mpa_req_sent",
54 	"mpa_req_rcvd",
55 	"mpa_rep_sent",
56 	"fpdu_mode",
57 	"aborting",
58 	"closing",
59 	"moribund",
60 	"dead",
61 	NULL,
62 };
63 
64 static int dack_mode = 1;
65 module_param(dack_mode, int, 0644);
66 MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
67 
68 int c4iw_max_read_depth = 8;
69 module_param(c4iw_max_read_depth, int, 0644);
70 MODULE_PARM_DESC(c4iw_max_read_depth, "Per-connection max ORD/IRD (default=8)");
71 
72 static int enable_tcp_timestamps;
73 module_param(enable_tcp_timestamps, int, 0644);
74 MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
75 
76 static int enable_tcp_sack;
77 module_param(enable_tcp_sack, int, 0644);
78 MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
79 
80 static int enable_tcp_window_scaling = 1;
81 module_param(enable_tcp_window_scaling, int, 0644);
82 MODULE_PARM_DESC(enable_tcp_window_scaling,
83 		 "Enable tcp window scaling (default=1)");
84 
85 int c4iw_debug;
86 module_param(c4iw_debug, int, 0644);
87 MODULE_PARM_DESC(c4iw_debug, "Enable debug logging (default=0)");
88 
89 static int peer2peer;
90 module_param(peer2peer, int, 0644);
91 MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
92 
93 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
94 module_param(p2p_type, int, 0644);
95 MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
96 			   "1=RDMA_READ 0=RDMA_WRITE (default 1)");
97 
98 static int ep_timeout_secs = 60;
99 module_param(ep_timeout_secs, int, 0644);
100 MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
101 				   "in seconds (default=60)");
102 
103 static int mpa_rev = 1;
104 module_param(mpa_rev, int, 0644);
105 MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
106 		"1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft"
107 		" compliant (default=1)");
108 
109 static int markers_enabled;
110 module_param(markers_enabled, int, 0644);
111 MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
112 
113 static int crc_enabled = 1;
114 module_param(crc_enabled, int, 0644);
115 MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
116 
117 static int rcv_win = 256 * 1024;
118 module_param(rcv_win, int, 0644);
119 MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
120 
121 static int snd_win = 128 * 1024;
122 module_param(snd_win, int, 0644);
123 MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
124 
125 static struct workqueue_struct *workq;
126 
127 static struct sk_buff_head rxq;
128 
129 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
130 static void ep_timeout(unsigned long arg);
131 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
132 
133 static LIST_HEAD(timeout_list);
134 static spinlock_t timeout_lock;
135 
136 static void start_ep_timer(struct c4iw_ep *ep)
137 {
138 	PDBG("%s ep %p\n", __func__, ep);
139 	if (timer_pending(&ep->timer)) {
140 		PDBG("%s stopped / restarted timer ep %p\n", __func__, ep);
141 		del_timer_sync(&ep->timer);
142 	} else
143 		c4iw_get_ep(&ep->com);
144 	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
145 	ep->timer.data = (unsigned long)ep;
146 	ep->timer.function = ep_timeout;
147 	add_timer(&ep->timer);
148 }
149 
150 static void stop_ep_timer(struct c4iw_ep *ep)
151 {
152 	PDBG("%s ep %p\n", __func__, ep);
153 	if (!timer_pending(&ep->timer)) {
154 		printk(KERN_ERR "%s timer stopped when its not running! "
155 		       "ep %p state %u\n", __func__, ep, ep->com.state);
156 		WARN_ON(1);
157 		return;
158 	}
159 	del_timer_sync(&ep->timer);
160 	c4iw_put_ep(&ep->com);
161 }
162 
163 static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
164 		  struct l2t_entry *l2e)
165 {
166 	int	error = 0;
167 
168 	if (c4iw_fatal_error(rdev)) {
169 		kfree_skb(skb);
170 		PDBG("%s - device in error state - dropping\n", __func__);
171 		return -EIO;
172 	}
173 	error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
174 	if (error < 0)
175 		kfree_skb(skb);
176 	return error < 0 ? error : 0;
177 }
178 
179 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
180 {
181 	int	error = 0;
182 
183 	if (c4iw_fatal_error(rdev)) {
184 		kfree_skb(skb);
185 		PDBG("%s - device in error state - dropping\n", __func__);
186 		return -EIO;
187 	}
188 	error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
189 	if (error < 0)
190 		kfree_skb(skb);
191 	return error < 0 ? error : 0;
192 }
193 
194 static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
195 {
196 	struct cpl_tid_release *req;
197 
198 	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
199 	if (!skb)
200 		return;
201 	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
202 	INIT_TP_WR(req, hwtid);
203 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
204 	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
205 	c4iw_ofld_send(rdev, skb);
206 	return;
207 }
208 
209 static void set_emss(struct c4iw_ep *ep, u16 opt)
210 {
211 	ep->emss = ep->com.dev->rdev.lldi.mtus[GET_TCPOPT_MSS(opt)] - 40;
212 	ep->mss = ep->emss;
213 	if (GET_TCPOPT_TSTAMP(opt))
214 		ep->emss -= 12;
215 	if (ep->emss < 128)
216 		ep->emss = 128;
217 	PDBG("%s mss_idx %u mss %u emss=%u\n", __func__, GET_TCPOPT_MSS(opt),
218 	     ep->mss, ep->emss);
219 }
220 
221 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
222 {
223 	enum c4iw_ep_state state;
224 
225 	mutex_lock(&epc->mutex);
226 	state = epc->state;
227 	mutex_unlock(&epc->mutex);
228 	return state;
229 }
230 
231 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
232 {
233 	epc->state = new;
234 }
235 
236 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
237 {
238 	mutex_lock(&epc->mutex);
239 	PDBG("%s - %s -> %s\n", __func__, states[epc->state], states[new]);
240 	__state_set(epc, new);
241 	mutex_unlock(&epc->mutex);
242 	return;
243 }
244 
245 static void *alloc_ep(int size, gfp_t gfp)
246 {
247 	struct c4iw_ep_common *epc;
248 
249 	epc = kzalloc(size, gfp);
250 	if (epc) {
251 		kref_init(&epc->kref);
252 		mutex_init(&epc->mutex);
253 		c4iw_init_wr_wait(&epc->wr_wait);
254 	}
255 	PDBG("%s alloc ep %p\n", __func__, epc);
256 	return epc;
257 }
258 
259 void _c4iw_free_ep(struct kref *kref)
260 {
261 	struct c4iw_ep *ep;
262 
263 	ep = container_of(kref, struct c4iw_ep, com.kref);
264 	PDBG("%s ep %p state %s\n", __func__, ep, states[state_read(&ep->com)]);
265 	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
266 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
267 		dst_release(ep->dst);
268 		cxgb4_l2t_release(ep->l2t);
269 	}
270 	kfree(ep);
271 }
272 
273 static void release_ep_resources(struct c4iw_ep *ep)
274 {
275 	set_bit(RELEASE_RESOURCES, &ep->com.flags);
276 	c4iw_put_ep(&ep->com);
277 }
278 
279 static int status2errno(int status)
280 {
281 	switch (status) {
282 	case CPL_ERR_NONE:
283 		return 0;
284 	case CPL_ERR_CONN_RESET:
285 		return -ECONNRESET;
286 	case CPL_ERR_ARP_MISS:
287 		return -EHOSTUNREACH;
288 	case CPL_ERR_CONN_TIMEDOUT:
289 		return -ETIMEDOUT;
290 	case CPL_ERR_TCAM_FULL:
291 		return -ENOMEM;
292 	case CPL_ERR_CONN_EXIST:
293 		return -EADDRINUSE;
294 	default:
295 		return -EIO;
296 	}
297 }
298 
299 /*
300  * Try and reuse skbs already allocated...
301  */
302 static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
303 {
304 	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
305 		skb_trim(skb, 0);
306 		skb_get(skb);
307 		skb_reset_transport_header(skb);
308 	} else {
309 		skb = alloc_skb(len, gfp);
310 	}
311 	return skb;
312 }
313 
314 static struct rtable *find_route(struct c4iw_dev *dev, __be32 local_ip,
315 				 __be32 peer_ip, __be16 local_port,
316 				 __be16 peer_port, u8 tos)
317 {
318 	struct rtable *rt;
319 	struct flowi4 fl4;
320 
321 	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
322 				   peer_port, local_port, IPPROTO_TCP,
323 				   tos, 0);
324 	if (IS_ERR(rt))
325 		return NULL;
326 	return rt;
327 }
328 
329 static void arp_failure_discard(void *handle, struct sk_buff *skb)
330 {
331 	PDBG("%s c4iw_dev %p\n", __func__, handle);
332 	kfree_skb(skb);
333 }
334 
335 /*
336  * Handle an ARP failure for an active open.
337  */
338 static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
339 {
340 	printk(KERN_ERR MOD "ARP failure duing connect\n");
341 	kfree_skb(skb);
342 }
343 
344 /*
345  * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
346  * and send it along.
347  */
348 static void abort_arp_failure(void *handle, struct sk_buff *skb)
349 {
350 	struct c4iw_rdev *rdev = handle;
351 	struct cpl_abort_req *req = cplhdr(skb);
352 
353 	PDBG("%s rdev %p\n", __func__, rdev);
354 	req->cmd = CPL_ABORT_NO_RST;
355 	c4iw_ofld_send(rdev, skb);
356 }
357 
358 static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
359 {
360 	unsigned int flowclen = 80;
361 	struct fw_flowc_wr *flowc;
362 	int i;
363 
364 	skb = get_skb(skb, flowclen, GFP_KERNEL);
365 	flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
366 
367 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP(FW_FLOWC_WR) |
368 					   FW_FLOWC_WR_NPARAMS(8));
369 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(flowclen,
370 					  16)) | FW_WR_FLOWID(ep->hwtid));
371 
372 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
373 	flowc->mnemval[0].val = cpu_to_be32(PCI_FUNC(ep->com.dev->rdev.lldi.pdev->devfn) << 8);
374 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
375 	flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
376 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
377 	flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
378 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
379 	flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
380 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
381 	flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
382 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
383 	flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
384 	flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
385 	flowc->mnemval[6].val = cpu_to_be32(snd_win);
386 	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
387 	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
388 	/* Pad WR to 16 byte boundary */
389 	flowc->mnemval[8].mnemonic = 0;
390 	flowc->mnemval[8].val = 0;
391 	for (i = 0; i < 9; i++) {
392 		flowc->mnemval[i].r4[0] = 0;
393 		flowc->mnemval[i].r4[1] = 0;
394 		flowc->mnemval[i].r4[2] = 0;
395 	}
396 
397 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
398 	c4iw_ofld_send(&ep->com.dev->rdev, skb);
399 }
400 
401 static int send_halfclose(struct c4iw_ep *ep, gfp_t gfp)
402 {
403 	struct cpl_close_con_req *req;
404 	struct sk_buff *skb;
405 	int wrlen = roundup(sizeof *req, 16);
406 
407 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
408 	skb = get_skb(NULL, wrlen, gfp);
409 	if (!skb) {
410 		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
411 		return -ENOMEM;
412 	}
413 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
414 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
415 	req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
416 	memset(req, 0, wrlen);
417 	INIT_TP_WR(req, ep->hwtid);
418 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
419 						    ep->hwtid));
420 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
421 }
422 
423 static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
424 {
425 	struct cpl_abort_req *req;
426 	int wrlen = roundup(sizeof *req, 16);
427 
428 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
429 	skb = get_skb(skb, wrlen, gfp);
430 	if (!skb) {
431 		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
432 		       __func__);
433 		return -ENOMEM;
434 	}
435 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
436 	t4_set_arp_err_handler(skb, &ep->com.dev->rdev, abort_arp_failure);
437 	req = (struct cpl_abort_req *) skb_put(skb, wrlen);
438 	memset(req, 0, wrlen);
439 	INIT_TP_WR(req, ep->hwtid);
440 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
441 	req->cmd = CPL_ABORT_SEND_RST;
442 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
443 }
444 
445 static int send_connect(struct c4iw_ep *ep)
446 {
447 	struct cpl_act_open_req *req;
448 	struct sk_buff *skb;
449 	u64 opt0;
450 	u32 opt2;
451 	unsigned int mtu_idx;
452 	int wscale;
453 	int wrlen = roundup(sizeof *req, 16);
454 
455 	PDBG("%s ep %p atid %u\n", __func__, ep, ep->atid);
456 
457 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
458 	if (!skb) {
459 		printk(KERN_ERR MOD "%s - failed to alloc skb.\n",
460 		       __func__);
461 		return -ENOMEM;
462 	}
463 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
464 
465 	cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
466 	wscale = compute_wscale(rcv_win);
467 	opt0 = KEEP_ALIVE(1) |
468 	       DELACK(1) |
469 	       WND_SCALE(wscale) |
470 	       MSS_IDX(mtu_idx) |
471 	       L2T_IDX(ep->l2t->idx) |
472 	       TX_CHAN(ep->tx_chan) |
473 	       SMAC_SEL(ep->smac_idx) |
474 	       DSCP(ep->tos) |
475 	       ULP_MODE(ULP_MODE_TCPDDP) |
476 	       RCV_BUFSIZ(rcv_win>>10);
477 	opt2 = RX_CHANNEL(0) |
478 	       RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
479 	if (enable_tcp_timestamps)
480 		opt2 |= TSTAMPS_EN(1);
481 	if (enable_tcp_sack)
482 		opt2 |= SACK_EN(1);
483 	if (wscale && enable_tcp_window_scaling)
484 		opt2 |= WND_SCALE_EN(1);
485 	t4_set_arp_err_handler(skb, NULL, act_open_req_arp_failure);
486 
487 	req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
488 	INIT_TP_WR(req, 0);
489 	OPCODE_TID(req) = cpu_to_be32(
490 		MK_OPCODE_TID(CPL_ACT_OPEN_REQ, ((ep->rss_qid<<14)|ep->atid)));
491 	req->local_port = ep->com.local_addr.sin_port;
492 	req->peer_port = ep->com.remote_addr.sin_port;
493 	req->local_ip = ep->com.local_addr.sin_addr.s_addr;
494 	req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
495 	req->opt0 = cpu_to_be64(opt0);
496 	req->params = 0;
497 	req->opt2 = cpu_to_be32(opt2);
498 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
499 }
500 
501 static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
502 		u8 mpa_rev_to_use)
503 {
504 	int mpalen, wrlen;
505 	struct fw_ofld_tx_data_wr *req;
506 	struct mpa_message *mpa;
507 	struct mpa_v2_conn_params mpa_v2_params;
508 
509 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
510 
511 	BUG_ON(skb_cloned(skb));
512 
513 	mpalen = sizeof(*mpa) + ep->plen;
514 	if (mpa_rev_to_use == 2)
515 		mpalen += sizeof(struct mpa_v2_conn_params);
516 	wrlen = roundup(mpalen + sizeof *req, 16);
517 	skb = get_skb(skb, wrlen, GFP_KERNEL);
518 	if (!skb) {
519 		connect_reply_upcall(ep, -ENOMEM);
520 		return;
521 	}
522 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
523 
524 	req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
525 	memset(req, 0, wrlen);
526 	req->op_to_immdlen = cpu_to_be32(
527 		FW_WR_OP(FW_OFLD_TX_DATA_WR) |
528 		FW_WR_COMPL(1) |
529 		FW_WR_IMMDLEN(mpalen));
530 	req->flowid_len16 = cpu_to_be32(
531 		FW_WR_FLOWID(ep->hwtid) |
532 		FW_WR_LEN16(wrlen >> 4));
533 	req->plen = cpu_to_be32(mpalen);
534 	req->tunnel_to_proxy = cpu_to_be32(
535 		FW_OFLD_TX_DATA_WR_FLUSH(1) |
536 		FW_OFLD_TX_DATA_WR_SHOVE(1));
537 
538 	mpa = (struct mpa_message *)(req + 1);
539 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
540 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
541 		     (markers_enabled ? MPA_MARKERS : 0) |
542 		     (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
543 	mpa->private_data_size = htons(ep->plen);
544 	mpa->revision = mpa_rev_to_use;
545 	if (mpa_rev_to_use == 1) {
546 		ep->tried_with_mpa_v1 = 1;
547 		ep->retry_with_mpa_v1 = 0;
548 	}
549 
550 	if (mpa_rev_to_use == 2) {
551 		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
552 					       sizeof (struct mpa_v2_conn_params));
553 		mpa_v2_params.ird = htons((u16)ep->ird);
554 		mpa_v2_params.ord = htons((u16)ep->ord);
555 
556 		if (peer2peer) {
557 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
558 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
559 				mpa_v2_params.ord |=
560 					htons(MPA_V2_RDMA_WRITE_RTR);
561 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
562 				mpa_v2_params.ord |=
563 					htons(MPA_V2_RDMA_READ_RTR);
564 		}
565 		memcpy(mpa->private_data, &mpa_v2_params,
566 		       sizeof(struct mpa_v2_conn_params));
567 
568 		if (ep->plen)
569 			memcpy(mpa->private_data +
570 			       sizeof(struct mpa_v2_conn_params),
571 			       ep->mpa_pkt + sizeof(*mpa), ep->plen);
572 	} else
573 		if (ep->plen)
574 			memcpy(mpa->private_data,
575 					ep->mpa_pkt + sizeof(*mpa), ep->plen);
576 
577 	/*
578 	 * Reference the mpa skb.  This ensures the data area
579 	 * will remain in memory until the hw acks the tx.
580 	 * Function fw4_ack() will deref it.
581 	 */
582 	skb_get(skb);
583 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
584 	BUG_ON(ep->mpa_skb);
585 	ep->mpa_skb = skb;
586 	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
587 	start_ep_timer(ep);
588 	state_set(&ep->com, MPA_REQ_SENT);
589 	ep->mpa_attr.initiator = 1;
590 	return;
591 }
592 
593 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
594 {
595 	int mpalen, wrlen;
596 	struct fw_ofld_tx_data_wr *req;
597 	struct mpa_message *mpa;
598 	struct sk_buff *skb;
599 	struct mpa_v2_conn_params mpa_v2_params;
600 
601 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
602 
603 	mpalen = sizeof(*mpa) + plen;
604 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
605 		mpalen += sizeof(struct mpa_v2_conn_params);
606 	wrlen = roundup(mpalen + sizeof *req, 16);
607 
608 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
609 	if (!skb) {
610 		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
611 		return -ENOMEM;
612 	}
613 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
614 
615 	req = (struct fw_ofld_tx_data_wr *)skb_put(skb, wrlen);
616 	memset(req, 0, wrlen);
617 	req->op_to_immdlen = cpu_to_be32(
618 		FW_WR_OP(FW_OFLD_TX_DATA_WR) |
619 		FW_WR_COMPL(1) |
620 		FW_WR_IMMDLEN(mpalen));
621 	req->flowid_len16 = cpu_to_be32(
622 		FW_WR_FLOWID(ep->hwtid) |
623 		FW_WR_LEN16(wrlen >> 4));
624 	req->plen = cpu_to_be32(mpalen);
625 	req->tunnel_to_proxy = cpu_to_be32(
626 		FW_OFLD_TX_DATA_WR_FLUSH(1) |
627 		FW_OFLD_TX_DATA_WR_SHOVE(1));
628 
629 	mpa = (struct mpa_message *)(req + 1);
630 	memset(mpa, 0, sizeof(*mpa));
631 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
632 	mpa->flags = MPA_REJECT;
633 	mpa->revision = mpa_rev;
634 	mpa->private_data_size = htons(plen);
635 
636 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
637 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
638 		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
639 					       sizeof (struct mpa_v2_conn_params));
640 		mpa_v2_params.ird = htons(((u16)ep->ird) |
641 					  (peer2peer ? MPA_V2_PEER2PEER_MODEL :
642 					   0));
643 		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
644 					  (p2p_type ==
645 					   FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
646 					   MPA_V2_RDMA_WRITE_RTR : p2p_type ==
647 					   FW_RI_INIT_P2PTYPE_READ_REQ ?
648 					   MPA_V2_RDMA_READ_RTR : 0) : 0));
649 		memcpy(mpa->private_data, &mpa_v2_params,
650 		       sizeof(struct mpa_v2_conn_params));
651 
652 		if (ep->plen)
653 			memcpy(mpa->private_data +
654 			       sizeof(struct mpa_v2_conn_params), pdata, plen);
655 	} else
656 		if (plen)
657 			memcpy(mpa->private_data, pdata, plen);
658 
659 	/*
660 	 * Reference the mpa skb again.  This ensures the data area
661 	 * will remain in memory until the hw acks the tx.
662 	 * Function fw4_ack() will deref it.
663 	 */
664 	skb_get(skb);
665 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
666 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
667 	BUG_ON(ep->mpa_skb);
668 	ep->mpa_skb = skb;
669 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
670 }
671 
672 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
673 {
674 	int mpalen, wrlen;
675 	struct fw_ofld_tx_data_wr *req;
676 	struct mpa_message *mpa;
677 	struct sk_buff *skb;
678 	struct mpa_v2_conn_params mpa_v2_params;
679 
680 	PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
681 
682 	mpalen = sizeof(*mpa) + plen;
683 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
684 		mpalen += sizeof(struct mpa_v2_conn_params);
685 	wrlen = roundup(mpalen + sizeof *req, 16);
686 
687 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
688 	if (!skb) {
689 		printk(KERN_ERR MOD "%s - cannot alloc skb!\n", __func__);
690 		return -ENOMEM;
691 	}
692 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
693 
694 	req = (struct fw_ofld_tx_data_wr *) skb_put(skb, wrlen);
695 	memset(req, 0, wrlen);
696 	req->op_to_immdlen = cpu_to_be32(
697 		FW_WR_OP(FW_OFLD_TX_DATA_WR) |
698 		FW_WR_COMPL(1) |
699 		FW_WR_IMMDLEN(mpalen));
700 	req->flowid_len16 = cpu_to_be32(
701 		FW_WR_FLOWID(ep->hwtid) |
702 		FW_WR_LEN16(wrlen >> 4));
703 	req->plen = cpu_to_be32(mpalen);
704 	req->tunnel_to_proxy = cpu_to_be32(
705 		FW_OFLD_TX_DATA_WR_FLUSH(1) |
706 		FW_OFLD_TX_DATA_WR_SHOVE(1));
707 
708 	mpa = (struct mpa_message *)(req + 1);
709 	memset(mpa, 0, sizeof(*mpa));
710 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
711 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
712 		     (markers_enabled ? MPA_MARKERS : 0);
713 	mpa->revision = ep->mpa_attr.version;
714 	mpa->private_data_size = htons(plen);
715 
716 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
717 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
718 		mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
719 					       sizeof (struct mpa_v2_conn_params));
720 		mpa_v2_params.ird = htons((u16)ep->ird);
721 		mpa_v2_params.ord = htons((u16)ep->ord);
722 		if (peer2peer && (ep->mpa_attr.p2p_type !=
723 					FW_RI_INIT_P2PTYPE_DISABLED)) {
724 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
725 
726 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
727 				mpa_v2_params.ord |=
728 					htons(MPA_V2_RDMA_WRITE_RTR);
729 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
730 				mpa_v2_params.ord |=
731 					htons(MPA_V2_RDMA_READ_RTR);
732 		}
733 
734 		memcpy(mpa->private_data, &mpa_v2_params,
735 		       sizeof(struct mpa_v2_conn_params));
736 
737 		if (ep->plen)
738 			memcpy(mpa->private_data +
739 			       sizeof(struct mpa_v2_conn_params), pdata, plen);
740 	} else
741 		if (plen)
742 			memcpy(mpa->private_data, pdata, plen);
743 
744 	/*
745 	 * Reference the mpa skb.  This ensures the data area
746 	 * will remain in memory until the hw acks the tx.
747 	 * Function fw4_ack() will deref it.
748 	 */
749 	skb_get(skb);
750 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
751 	ep->mpa_skb = skb;
752 	state_set(&ep->com, MPA_REP_SENT);
753 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
754 }
755 
756 static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
757 {
758 	struct c4iw_ep *ep;
759 	struct cpl_act_establish *req = cplhdr(skb);
760 	unsigned int tid = GET_TID(req);
761 	unsigned int atid = GET_TID_TID(ntohl(req->tos_atid));
762 	struct tid_info *t = dev->rdev.lldi.tids;
763 
764 	ep = lookup_atid(t, atid);
765 
766 	PDBG("%s ep %p tid %u snd_isn %u rcv_isn %u\n", __func__, ep, tid,
767 	     be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
768 
769 	dst_confirm(ep->dst);
770 
771 	/* setup the hwtid for this connection */
772 	ep->hwtid = tid;
773 	cxgb4_insert_tid(t, ep, tid);
774 
775 	ep->snd_seq = be32_to_cpu(req->snd_isn);
776 	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
777 
778 	set_emss(ep, ntohs(req->tcp_opt));
779 
780 	/* dealloc the atid */
781 	cxgb4_free_atid(t, atid);
782 
783 	/* start MPA negotiation */
784 	send_flowc(ep, NULL);
785 	if (ep->retry_with_mpa_v1)
786 		send_mpa_req(ep, skb, 1);
787 	else
788 		send_mpa_req(ep, skb, mpa_rev);
789 
790 	return 0;
791 }
792 
793 static void close_complete_upcall(struct c4iw_ep *ep)
794 {
795 	struct iw_cm_event event;
796 
797 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
798 	memset(&event, 0, sizeof(event));
799 	event.event = IW_CM_EVENT_CLOSE;
800 	if (ep->com.cm_id) {
801 		PDBG("close complete delivered ep %p cm_id %p tid %u\n",
802 		     ep, ep->com.cm_id, ep->hwtid);
803 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
804 		ep->com.cm_id->rem_ref(ep->com.cm_id);
805 		ep->com.cm_id = NULL;
806 		ep->com.qp = NULL;
807 	}
808 }
809 
810 static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
811 {
812 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
813 	close_complete_upcall(ep);
814 	state_set(&ep->com, ABORTING);
815 	return send_abort(ep, skb, gfp);
816 }
817 
818 static void peer_close_upcall(struct c4iw_ep *ep)
819 {
820 	struct iw_cm_event event;
821 
822 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
823 	memset(&event, 0, sizeof(event));
824 	event.event = IW_CM_EVENT_DISCONNECT;
825 	if (ep->com.cm_id) {
826 		PDBG("peer close delivered ep %p cm_id %p tid %u\n",
827 		     ep, ep->com.cm_id, ep->hwtid);
828 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
829 	}
830 }
831 
832 static void peer_abort_upcall(struct c4iw_ep *ep)
833 {
834 	struct iw_cm_event event;
835 
836 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
837 	memset(&event, 0, sizeof(event));
838 	event.event = IW_CM_EVENT_CLOSE;
839 	event.status = -ECONNRESET;
840 	if (ep->com.cm_id) {
841 		PDBG("abort delivered ep %p cm_id %p tid %u\n", ep,
842 		     ep->com.cm_id, ep->hwtid);
843 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
844 		ep->com.cm_id->rem_ref(ep->com.cm_id);
845 		ep->com.cm_id = NULL;
846 		ep->com.qp = NULL;
847 	}
848 }
849 
850 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
851 {
852 	struct iw_cm_event event;
853 
854 	PDBG("%s ep %p tid %u status %d\n", __func__, ep, ep->hwtid, status);
855 	memset(&event, 0, sizeof(event));
856 	event.event = IW_CM_EVENT_CONNECT_REPLY;
857 	event.status = status;
858 	event.local_addr = ep->com.local_addr;
859 	event.remote_addr = ep->com.remote_addr;
860 
861 	if ((status == 0) || (status == -ECONNREFUSED)) {
862 		if (!ep->tried_with_mpa_v1) {
863 			/* this means MPA_v2 is used */
864 			event.private_data_len = ep->plen -
865 				sizeof(struct mpa_v2_conn_params);
866 			event.private_data = ep->mpa_pkt +
867 				sizeof(struct mpa_message) +
868 				sizeof(struct mpa_v2_conn_params);
869 		} else {
870 			/* this means MPA_v1 is used */
871 			event.private_data_len = ep->plen;
872 			event.private_data = ep->mpa_pkt +
873 				sizeof(struct mpa_message);
874 		}
875 	}
876 
877 	PDBG("%s ep %p tid %u status %d\n", __func__, ep,
878 	     ep->hwtid, status);
879 	ep->com.cm_id->event_handler(ep->com.cm_id, &event);
880 
881 	if (status < 0) {
882 		ep->com.cm_id->rem_ref(ep->com.cm_id);
883 		ep->com.cm_id = NULL;
884 		ep->com.qp = NULL;
885 	}
886 }
887 
888 static void connect_request_upcall(struct c4iw_ep *ep)
889 {
890 	struct iw_cm_event event;
891 
892 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
893 	memset(&event, 0, sizeof(event));
894 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
895 	event.local_addr = ep->com.local_addr;
896 	event.remote_addr = ep->com.remote_addr;
897 	event.provider_data = ep;
898 	if (!ep->tried_with_mpa_v1) {
899 		/* this means MPA_v2 is used */
900 		event.ord = ep->ord;
901 		event.ird = ep->ird;
902 		event.private_data_len = ep->plen -
903 			sizeof(struct mpa_v2_conn_params);
904 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
905 			sizeof(struct mpa_v2_conn_params);
906 	} else {
907 		/* this means MPA_v1 is used. Send max supported */
908 		event.ord = c4iw_max_read_depth;
909 		event.ird = c4iw_max_read_depth;
910 		event.private_data_len = ep->plen;
911 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
912 	}
913 	if (state_read(&ep->parent_ep->com) != DEAD) {
914 		c4iw_get_ep(&ep->com);
915 		ep->parent_ep->com.cm_id->event_handler(
916 						ep->parent_ep->com.cm_id,
917 						&event);
918 	}
919 	c4iw_put_ep(&ep->parent_ep->com);
920 	ep->parent_ep = NULL;
921 }
922 
923 static void established_upcall(struct c4iw_ep *ep)
924 {
925 	struct iw_cm_event event;
926 
927 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
928 	memset(&event, 0, sizeof(event));
929 	event.event = IW_CM_EVENT_ESTABLISHED;
930 	event.ird = ep->ird;
931 	event.ord = ep->ord;
932 	if (ep->com.cm_id) {
933 		PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
934 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
935 	}
936 }
937 
938 static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
939 {
940 	struct cpl_rx_data_ack *req;
941 	struct sk_buff *skb;
942 	int wrlen = roundup(sizeof *req, 16);
943 
944 	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
945 	skb = get_skb(NULL, wrlen, GFP_KERNEL);
946 	if (!skb) {
947 		printk(KERN_ERR MOD "update_rx_credits - cannot alloc skb!\n");
948 		return 0;
949 	}
950 
951 	req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
952 	memset(req, 0, wrlen);
953 	INIT_TP_WR(req, ep->hwtid);
954 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
955 						    ep->hwtid));
956 	req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK(1) |
957 				       F_RX_DACK_CHANGE |
958 				       V_RX_DACK_MODE(dack_mode));
959 	set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
960 	c4iw_ofld_send(&ep->com.dev->rdev, skb);
961 	return credits;
962 }
963 
964 static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
965 {
966 	struct mpa_message *mpa;
967 	struct mpa_v2_conn_params *mpa_v2_params;
968 	u16 plen;
969 	u16 resp_ird, resp_ord;
970 	u8 rtr_mismatch = 0, insuff_ird = 0;
971 	struct c4iw_qp_attributes attrs;
972 	enum c4iw_qp_attr_mask mask;
973 	int err;
974 
975 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
976 
977 	/*
978 	 * Stop mpa timer.  If it expired, then the state has
979 	 * changed and we bail since ep_timeout already aborted
980 	 * the connection.
981 	 */
982 	stop_ep_timer(ep);
983 	if (state_read(&ep->com) != MPA_REQ_SENT)
984 		return;
985 
986 	/*
987 	 * If we get more than the supported amount of private data
988 	 * then we must fail this connection.
989 	 */
990 	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
991 		err = -EINVAL;
992 		goto err;
993 	}
994 
995 	/*
996 	 * copy the new data into our accumulation buffer.
997 	 */
998 	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
999 				  skb->len);
1000 	ep->mpa_pkt_len += skb->len;
1001 
1002 	/*
1003 	 * if we don't even have the mpa message, then bail.
1004 	 */
1005 	if (ep->mpa_pkt_len < sizeof(*mpa))
1006 		return;
1007 	mpa = (struct mpa_message *) ep->mpa_pkt;
1008 
1009 	/* Validate MPA header. */
1010 	if (mpa->revision > mpa_rev) {
1011 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1012 		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
1013 		err = -EPROTO;
1014 		goto err;
1015 	}
1016 	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1017 		err = -EPROTO;
1018 		goto err;
1019 	}
1020 
1021 	plen = ntohs(mpa->private_data_size);
1022 
1023 	/*
1024 	 * Fail if there's too much private data.
1025 	 */
1026 	if (plen > MPA_MAX_PRIVATE_DATA) {
1027 		err = -EPROTO;
1028 		goto err;
1029 	}
1030 
1031 	/*
1032 	 * If plen does not account for pkt size
1033 	 */
1034 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1035 		err = -EPROTO;
1036 		goto err;
1037 	}
1038 
1039 	ep->plen = (u8) plen;
1040 
1041 	/*
1042 	 * If we don't have all the pdata yet, then bail.
1043 	 * We'll continue process when more data arrives.
1044 	 */
1045 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1046 		return;
1047 
1048 	if (mpa->flags & MPA_REJECT) {
1049 		err = -ECONNREFUSED;
1050 		goto err;
1051 	}
1052 
1053 	/*
1054 	 * If we get here we have accumulated the entire mpa
1055 	 * start reply message including private data. And
1056 	 * the MPA header is valid.
1057 	 */
1058 	state_set(&ep->com, FPDU_MODE);
1059 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1060 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1061 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1062 	ep->mpa_attr.version = mpa->revision;
1063 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1064 
1065 	if (mpa->revision == 2) {
1066 		ep->mpa_attr.enhanced_rdma_conn =
1067 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1068 		if (ep->mpa_attr.enhanced_rdma_conn) {
1069 			mpa_v2_params = (struct mpa_v2_conn_params *)
1070 				(ep->mpa_pkt + sizeof(*mpa));
1071 			resp_ird = ntohs(mpa_v2_params->ird) &
1072 				MPA_V2_IRD_ORD_MASK;
1073 			resp_ord = ntohs(mpa_v2_params->ord) &
1074 				MPA_V2_IRD_ORD_MASK;
1075 
1076 			/*
1077 			 * This is a double-check. Ideally, below checks are
1078 			 * not required since ird/ord stuff has been taken
1079 			 * care of in c4iw_accept_cr
1080 			 */
1081 			if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1082 				err = -ENOMEM;
1083 				ep->ird = resp_ord;
1084 				ep->ord = resp_ird;
1085 				insuff_ird = 1;
1086 			}
1087 
1088 			if (ntohs(mpa_v2_params->ird) &
1089 					MPA_V2_PEER2PEER_MODEL) {
1090 				if (ntohs(mpa_v2_params->ord) &
1091 						MPA_V2_RDMA_WRITE_RTR)
1092 					ep->mpa_attr.p2p_type =
1093 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1094 				else if (ntohs(mpa_v2_params->ord) &
1095 						MPA_V2_RDMA_READ_RTR)
1096 					ep->mpa_attr.p2p_type =
1097 						FW_RI_INIT_P2PTYPE_READ_REQ;
1098 			}
1099 		}
1100 	} else if (mpa->revision == 1)
1101 		if (peer2peer)
1102 			ep->mpa_attr.p2p_type = p2p_type;
1103 
1104 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1105 	     "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = "
1106 	     "%d\n", __func__, ep->mpa_attr.crc_enabled,
1107 	     ep->mpa_attr.recv_marker_enabled,
1108 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1109 	     ep->mpa_attr.p2p_type, p2p_type);
1110 
1111 	/*
1112 	 * If responder's RTR does not match with that of initiator, assign
1113 	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1114 	 * generated when moving QP to RTS state.
1115 	 * A TERM message will be sent after QP has moved to RTS state
1116 	 */
1117 	if ((ep->mpa_attr.version == 2) && peer2peer &&
1118 			(ep->mpa_attr.p2p_type != p2p_type)) {
1119 		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1120 		rtr_mismatch = 1;
1121 	}
1122 
1123 	attrs.mpa_attr = ep->mpa_attr;
1124 	attrs.max_ird = ep->ird;
1125 	attrs.max_ord = ep->ord;
1126 	attrs.llp_stream_handle = ep;
1127 	attrs.next_state = C4IW_QP_STATE_RTS;
1128 
1129 	mask = C4IW_QP_ATTR_NEXT_STATE |
1130 	    C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1131 	    C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1132 
1133 	/* bind QP and TID with INIT_WR */
1134 	err = c4iw_modify_qp(ep->com.qp->rhp,
1135 			     ep->com.qp, mask, &attrs, 1);
1136 	if (err)
1137 		goto err;
1138 
1139 	/*
1140 	 * If responder's RTR requirement did not match with what initiator
1141 	 * supports, generate TERM message
1142 	 */
1143 	if (rtr_mismatch) {
1144 		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1145 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1146 		attrs.ecode = MPA_NOMATCH_RTR;
1147 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1148 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1149 				C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1150 		err = -ENOMEM;
1151 		goto out;
1152 	}
1153 
1154 	/*
1155 	 * Generate TERM if initiator IRD is not sufficient for responder
1156 	 * provided ORD. Currently, we do the same behaviour even when
1157 	 * responder provided IRD is also not sufficient as regards to
1158 	 * initiator ORD.
1159 	 */
1160 	if (insuff_ird) {
1161 		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1162 				__func__);
1163 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1164 		attrs.ecode = MPA_INSUFF_IRD;
1165 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1166 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1167 				C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1168 		err = -ENOMEM;
1169 		goto out;
1170 	}
1171 	goto out;
1172 err:
1173 	state_set(&ep->com, ABORTING);
1174 	send_abort(ep, skb, GFP_KERNEL);
1175 out:
1176 	connect_reply_upcall(ep, err);
1177 	return;
1178 }
1179 
1180 static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1181 {
1182 	struct mpa_message *mpa;
1183 	struct mpa_v2_conn_params *mpa_v2_params;
1184 	u16 plen;
1185 
1186 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1187 
1188 	if (state_read(&ep->com) != MPA_REQ_WAIT)
1189 		return;
1190 
1191 	/*
1192 	 * If we get more than the supported amount of private data
1193 	 * then we must fail this connection.
1194 	 */
1195 	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1196 		stop_ep_timer(ep);
1197 		abort_connection(ep, skb, GFP_KERNEL);
1198 		return;
1199 	}
1200 
1201 	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1202 
1203 	/*
1204 	 * Copy the new data into our accumulation buffer.
1205 	 */
1206 	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1207 				  skb->len);
1208 	ep->mpa_pkt_len += skb->len;
1209 
1210 	/*
1211 	 * If we don't even have the mpa message, then bail.
1212 	 * We'll continue process when more data arrives.
1213 	 */
1214 	if (ep->mpa_pkt_len < sizeof(*mpa))
1215 		return;
1216 
1217 	PDBG("%s enter (%s line %u)\n", __func__, __FILE__, __LINE__);
1218 	stop_ep_timer(ep);
1219 	mpa = (struct mpa_message *) ep->mpa_pkt;
1220 
1221 	/*
1222 	 * Validate MPA Header.
1223 	 */
1224 	if (mpa->revision > mpa_rev) {
1225 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1226 		       " Received = %d\n", __func__, mpa_rev, mpa->revision);
1227 		abort_connection(ep, skb, GFP_KERNEL);
1228 		return;
1229 	}
1230 
1231 	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) {
1232 		abort_connection(ep, skb, GFP_KERNEL);
1233 		return;
1234 	}
1235 
1236 	plen = ntohs(mpa->private_data_size);
1237 
1238 	/*
1239 	 * Fail if there's too much private data.
1240 	 */
1241 	if (plen > MPA_MAX_PRIVATE_DATA) {
1242 		abort_connection(ep, skb, GFP_KERNEL);
1243 		return;
1244 	}
1245 
1246 	/*
1247 	 * If plen does not account for pkt size
1248 	 */
1249 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1250 		abort_connection(ep, skb, GFP_KERNEL);
1251 		return;
1252 	}
1253 	ep->plen = (u8) plen;
1254 
1255 	/*
1256 	 * If we don't have all the pdata yet, then bail.
1257 	 */
1258 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1259 		return;
1260 
1261 	/*
1262 	 * If we get here we have accumulated the entire mpa
1263 	 * start reply message including private data.
1264 	 */
1265 	ep->mpa_attr.initiator = 0;
1266 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1267 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1268 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1269 	ep->mpa_attr.version = mpa->revision;
1270 	if (mpa->revision == 1)
1271 		ep->tried_with_mpa_v1 = 1;
1272 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1273 
1274 	if (mpa->revision == 2) {
1275 		ep->mpa_attr.enhanced_rdma_conn =
1276 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1277 		if (ep->mpa_attr.enhanced_rdma_conn) {
1278 			mpa_v2_params = (struct mpa_v2_conn_params *)
1279 				(ep->mpa_pkt + sizeof(*mpa));
1280 			ep->ird = ntohs(mpa_v2_params->ird) &
1281 				MPA_V2_IRD_ORD_MASK;
1282 			ep->ord = ntohs(mpa_v2_params->ord) &
1283 				MPA_V2_IRD_ORD_MASK;
1284 			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1285 				if (peer2peer) {
1286 					if (ntohs(mpa_v2_params->ord) &
1287 							MPA_V2_RDMA_WRITE_RTR)
1288 						ep->mpa_attr.p2p_type =
1289 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1290 					else if (ntohs(mpa_v2_params->ord) &
1291 							MPA_V2_RDMA_READ_RTR)
1292 						ep->mpa_attr.p2p_type =
1293 						FW_RI_INIT_P2PTYPE_READ_REQ;
1294 				}
1295 		}
1296 	} else if (mpa->revision == 1)
1297 		if (peer2peer)
1298 			ep->mpa_attr.p2p_type = p2p_type;
1299 
1300 	PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1301 	     "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1302 	     ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1303 	     ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1304 	     ep->mpa_attr.p2p_type);
1305 
1306 	state_set(&ep->com, MPA_REQ_RCVD);
1307 
1308 	/* drive upcall */
1309 	connect_request_upcall(ep);
1310 	return;
1311 }
1312 
1313 static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1314 {
1315 	struct c4iw_ep *ep;
1316 	struct cpl_rx_data *hdr = cplhdr(skb);
1317 	unsigned int dlen = ntohs(hdr->len);
1318 	unsigned int tid = GET_TID(hdr);
1319 	struct tid_info *t = dev->rdev.lldi.tids;
1320 
1321 	ep = lookup_tid(t, tid);
1322 	PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
1323 	skb_pull(skb, sizeof(*hdr));
1324 	skb_trim(skb, dlen);
1325 
1326 	ep->rcv_seq += dlen;
1327 	BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
1328 
1329 	/* update RX credits */
1330 	update_rx_credits(ep, dlen);
1331 
1332 	switch (state_read(&ep->com)) {
1333 	case MPA_REQ_SENT:
1334 		process_mpa_reply(ep, skb);
1335 		break;
1336 	case MPA_REQ_WAIT:
1337 		process_mpa_request(ep, skb);
1338 		break;
1339 	case MPA_REP_SENT:
1340 		break;
1341 	default:
1342 		printk(KERN_ERR MOD "%s Unexpected streaming data."
1343 		       " ep %p state %d tid %u\n",
1344 		       __func__, ep, state_read(&ep->com), ep->hwtid);
1345 
1346 		/*
1347 		 * The ep will timeout and inform the ULP of the failure.
1348 		 * See ep_timeout().
1349 		 */
1350 		break;
1351 	}
1352 	return 0;
1353 }
1354 
1355 static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1356 {
1357 	struct c4iw_ep *ep;
1358 	struct cpl_abort_rpl_rss *rpl = cplhdr(skb);
1359 	int release = 0;
1360 	unsigned int tid = GET_TID(rpl);
1361 	struct tid_info *t = dev->rdev.lldi.tids;
1362 
1363 	ep = lookup_tid(t, tid);
1364 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1365 	if (!ep) {
1366 		printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1367 		return 0;
1368 	}
1369 	mutex_lock(&ep->com.mutex);
1370 	switch (ep->com.state) {
1371 	case ABORTING:
1372 		__state_set(&ep->com, DEAD);
1373 		release = 1;
1374 		break;
1375 	default:
1376 		printk(KERN_ERR "%s ep %p state %d\n",
1377 		     __func__, ep, ep->com.state);
1378 		break;
1379 	}
1380 	mutex_unlock(&ep->com.mutex);
1381 
1382 	if (release)
1383 		release_ep_resources(ep);
1384 	return 0;
1385 }
1386 
1387 /*
1388  * Return whether a failed active open has allocated a TID
1389  */
1390 static inline int act_open_has_tid(int status)
1391 {
1392 	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
1393 	       status != CPL_ERR_ARP_MISS;
1394 }
1395 
1396 static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1397 {
1398 	struct c4iw_ep *ep;
1399 	struct cpl_act_open_rpl *rpl = cplhdr(skb);
1400 	unsigned int atid = GET_TID_TID(GET_AOPEN_ATID(
1401 					ntohl(rpl->atid_status)));
1402 	struct tid_info *t = dev->rdev.lldi.tids;
1403 	int status = GET_AOPEN_STATUS(ntohl(rpl->atid_status));
1404 
1405 	ep = lookup_atid(t, atid);
1406 
1407 	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
1408 	     status, status2errno(status));
1409 
1410 	if (status == CPL_ERR_RTX_NEG_ADVICE) {
1411 		printk(KERN_WARNING MOD "Connection problems for atid %u\n",
1412 			atid);
1413 		return 0;
1414 	}
1415 
1416 	/*
1417 	 * Log interesting failures.
1418 	 */
1419 	switch (status) {
1420 	case CPL_ERR_CONN_RESET:
1421 	case CPL_ERR_CONN_TIMEDOUT:
1422 		break;
1423 	default:
1424 		printk(KERN_INFO MOD "Active open failure - "
1425 		       "atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
1426 		       atid, status, status2errno(status),
1427 		       &ep->com.local_addr.sin_addr.s_addr,
1428 		       ntohs(ep->com.local_addr.sin_port),
1429 		       &ep->com.remote_addr.sin_addr.s_addr,
1430 		       ntohs(ep->com.remote_addr.sin_port));
1431 		break;
1432 	}
1433 
1434 	connect_reply_upcall(ep, status2errno(status));
1435 	state_set(&ep->com, DEAD);
1436 
1437 	if (status && act_open_has_tid(status))
1438 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
1439 
1440 	cxgb4_free_atid(t, atid);
1441 	dst_release(ep->dst);
1442 	cxgb4_l2t_release(ep->l2t);
1443 	c4iw_put_ep(&ep->com);
1444 
1445 	return 0;
1446 }
1447 
1448 static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1449 {
1450 	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
1451 	struct tid_info *t = dev->rdev.lldi.tids;
1452 	unsigned int stid = GET_TID(rpl);
1453 	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1454 
1455 	if (!ep) {
1456 		printk(KERN_ERR MOD "stid %d lookup failure!\n", stid);
1457 		return 0;
1458 	}
1459 	PDBG("%s ep %p status %d error %d\n", __func__, ep,
1460 	     rpl->status, status2errno(rpl->status));
1461 	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1462 
1463 	return 0;
1464 }
1465 
1466 static int listen_stop(struct c4iw_listen_ep *ep)
1467 {
1468 	struct sk_buff *skb;
1469 	struct cpl_close_listsvr_req *req;
1470 
1471 	PDBG("%s ep %p\n", __func__, ep);
1472 	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1473 	if (!skb) {
1474 		printk(KERN_ERR MOD "%s - failed to alloc skb\n", __func__);
1475 		return -ENOMEM;
1476 	}
1477 	req = (struct cpl_close_listsvr_req *) skb_put(skb, sizeof(*req));
1478 	INIT_TP_WR(req, 0);
1479 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
1480 						    ep->stid));
1481 	req->reply_ctrl = cpu_to_be16(
1482 			  QUEUENO(ep->com.dev->rdev.lldi.rxq_ids[0]));
1483 	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
1484 	return c4iw_ofld_send(&ep->com.dev->rdev, skb);
1485 }
1486 
1487 static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1488 {
1489 	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
1490 	struct tid_info *t = dev->rdev.lldi.tids;
1491 	unsigned int stid = GET_TID(rpl);
1492 	struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1493 
1494 	PDBG("%s ep %p\n", __func__, ep);
1495 	c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1496 	return 0;
1497 }
1498 
1499 static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
1500 		      struct cpl_pass_accept_req *req)
1501 {
1502 	struct cpl_pass_accept_rpl *rpl;
1503 	unsigned int mtu_idx;
1504 	u64 opt0;
1505 	u32 opt2;
1506 	int wscale;
1507 
1508 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1509 	BUG_ON(skb_cloned(skb));
1510 	skb_trim(skb, sizeof(*rpl));
1511 	skb_get(skb);
1512 	cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
1513 	wscale = compute_wscale(rcv_win);
1514 	opt0 = KEEP_ALIVE(1) |
1515 	       DELACK(1) |
1516 	       WND_SCALE(wscale) |
1517 	       MSS_IDX(mtu_idx) |
1518 	       L2T_IDX(ep->l2t->idx) |
1519 	       TX_CHAN(ep->tx_chan) |
1520 	       SMAC_SEL(ep->smac_idx) |
1521 	       DSCP(ep->tos) |
1522 	       ULP_MODE(ULP_MODE_TCPDDP) |
1523 	       RCV_BUFSIZ(rcv_win>>10);
1524 	opt2 = RX_CHANNEL(0) |
1525 	       RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
1526 
1527 	if (enable_tcp_timestamps && req->tcpopt.tstamp)
1528 		opt2 |= TSTAMPS_EN(1);
1529 	if (enable_tcp_sack && req->tcpopt.sack)
1530 		opt2 |= SACK_EN(1);
1531 	if (wscale && enable_tcp_window_scaling)
1532 		opt2 |= WND_SCALE_EN(1);
1533 
1534 	rpl = cplhdr(skb);
1535 	INIT_TP_WR(rpl, ep->hwtid);
1536 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
1537 				      ep->hwtid));
1538 	rpl->opt0 = cpu_to_be64(opt0);
1539 	rpl->opt2 = cpu_to_be32(opt2);
1540 	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
1541 	c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1542 
1543 	return;
1544 }
1545 
1546 static void reject_cr(struct c4iw_dev *dev, u32 hwtid, __be32 peer_ip,
1547 		      struct sk_buff *skb)
1548 {
1549 	PDBG("%s c4iw_dev %p tid %u peer_ip %x\n", __func__, dev, hwtid,
1550 	     peer_ip);
1551 	BUG_ON(skb_cloned(skb));
1552 	skb_trim(skb, sizeof(struct cpl_tid_release));
1553 	skb_get(skb);
1554 	release_tid(&dev->rdev, hwtid, skb);
1555 	return;
1556 }
1557 
1558 static void get_4tuple(struct cpl_pass_accept_req *req,
1559 		       __be32 *local_ip, __be32 *peer_ip,
1560 		       __be16 *local_port, __be16 *peer_port)
1561 {
1562 	int eth_len = G_ETH_HDR_LEN(be32_to_cpu(req->hdr_len));
1563 	int ip_len = G_IP_HDR_LEN(be32_to_cpu(req->hdr_len));
1564 	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
1565 	struct tcphdr *tcp = (struct tcphdr *)
1566 			     ((u8 *)(req + 1) + eth_len + ip_len);
1567 
1568 	PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
1569 	     ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
1570 	     ntohs(tcp->dest));
1571 
1572 	*peer_ip = ip->saddr;
1573 	*local_ip = ip->daddr;
1574 	*peer_port = tcp->source;
1575 	*local_port = tcp->dest;
1576 
1577 	return;
1578 }
1579 
1580 static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
1581 		     struct c4iw_dev *cdev, bool clear_mpa_v1)
1582 {
1583 	struct neighbour *n;
1584 	int err, step;
1585 
1586 	n = dst_neigh_lookup(dst, &peer_ip);
1587 	if (!n)
1588 		return -ENODEV;
1589 
1590 	rcu_read_lock();
1591 	err = -ENOMEM;
1592 	if (n->dev->flags & IFF_LOOPBACK) {
1593 		struct net_device *pdev;
1594 
1595 		pdev = ip_dev_find(&init_net, peer_ip);
1596 		if (!pdev) {
1597 			err = -ENODEV;
1598 			goto out;
1599 		}
1600 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1601 					n, pdev, 0);
1602 		if (!ep->l2t)
1603 			goto out;
1604 		ep->mtu = pdev->mtu;
1605 		ep->tx_chan = cxgb4_port_chan(pdev);
1606 		ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1607 		step = cdev->rdev.lldi.ntxq /
1608 			cdev->rdev.lldi.nchan;
1609 		ep->txq_idx = cxgb4_port_idx(pdev) * step;
1610 		step = cdev->rdev.lldi.nrxq /
1611 			cdev->rdev.lldi.nchan;
1612 		ep->ctrlq_idx = cxgb4_port_idx(pdev);
1613 		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1614 			cxgb4_port_idx(pdev) * step];
1615 		dev_put(pdev);
1616 	} else {
1617 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1618 					n, n->dev, 0);
1619 		if (!ep->l2t)
1620 			goto out;
1621 		ep->mtu = dst_mtu(dst);
1622 		ep->tx_chan = cxgb4_port_chan(n->dev);
1623 		ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
1624 		step = cdev->rdev.lldi.ntxq /
1625 			cdev->rdev.lldi.nchan;
1626 		ep->txq_idx = cxgb4_port_idx(n->dev) * step;
1627 		ep->ctrlq_idx = cxgb4_port_idx(n->dev);
1628 		step = cdev->rdev.lldi.nrxq /
1629 			cdev->rdev.lldi.nchan;
1630 		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1631 			cxgb4_port_idx(n->dev) * step];
1632 
1633 		if (clear_mpa_v1) {
1634 			ep->retry_with_mpa_v1 = 0;
1635 			ep->tried_with_mpa_v1 = 0;
1636 		}
1637 	}
1638 	err = 0;
1639 out:
1640 	rcu_read_unlock();
1641 
1642 	neigh_release(n);
1643 
1644 	return err;
1645 }
1646 
1647 static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
1648 {
1649 	struct c4iw_ep *child_ep, *parent_ep;
1650 	struct cpl_pass_accept_req *req = cplhdr(skb);
1651 	unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid));
1652 	struct tid_info *t = dev->rdev.lldi.tids;
1653 	unsigned int hwtid = GET_TID(req);
1654 	struct dst_entry *dst;
1655 	struct rtable *rt;
1656 	__be32 local_ip, peer_ip;
1657 	__be16 local_port, peer_port;
1658 	int err;
1659 
1660 	parent_ep = lookup_stid(t, stid);
1661 	PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1662 
1663 	get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
1664 
1665 	if (state_read(&parent_ep->com) != LISTEN) {
1666 		printk(KERN_ERR "%s - listening ep not in LISTEN\n",
1667 		       __func__);
1668 		goto reject;
1669 	}
1670 
1671 	/* Find output route */
1672 	rt = find_route(dev, local_ip, peer_ip, local_port, peer_port,
1673 			GET_POPEN_TOS(ntohl(req->tos_stid)));
1674 	if (!rt) {
1675 		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
1676 		       __func__);
1677 		goto reject;
1678 	}
1679 	dst = &rt->dst;
1680 
1681 	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1682 	if (!child_ep) {
1683 		printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1684 		       __func__);
1685 		dst_release(dst);
1686 		goto reject;
1687 	}
1688 
1689 	err = import_ep(child_ep, peer_ip, dst, dev, false);
1690 	if (err) {
1691 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1692 		       __func__);
1693 		dst_release(dst);
1694 		kfree(child_ep);
1695 		goto reject;
1696 	}
1697 
1698 	state_set(&child_ep->com, CONNECTING);
1699 	child_ep->com.dev = dev;
1700 	child_ep->com.cm_id = NULL;
1701 	child_ep->com.local_addr.sin_family = PF_INET;
1702 	child_ep->com.local_addr.sin_port = local_port;
1703 	child_ep->com.local_addr.sin_addr.s_addr = local_ip;
1704 	child_ep->com.remote_addr.sin_family = PF_INET;
1705 	child_ep->com.remote_addr.sin_port = peer_port;
1706 	child_ep->com.remote_addr.sin_addr.s_addr = peer_ip;
1707 	c4iw_get_ep(&parent_ep->com);
1708 	child_ep->parent_ep = parent_ep;
1709 	child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
1710 	child_ep->dst = dst;
1711 	child_ep->hwtid = hwtid;
1712 
1713 	PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
1714 	     child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
1715 
1716 	init_timer(&child_ep->timer);
1717 	cxgb4_insert_tid(t, child_ep, hwtid);
1718 	accept_cr(child_ep, peer_ip, skb, req);
1719 	goto out;
1720 reject:
1721 	reject_cr(dev, hwtid, peer_ip, skb);
1722 out:
1723 	return 0;
1724 }
1725 
1726 static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1727 {
1728 	struct c4iw_ep *ep;
1729 	struct cpl_pass_establish *req = cplhdr(skb);
1730 	struct tid_info *t = dev->rdev.lldi.tids;
1731 	unsigned int tid = GET_TID(req);
1732 
1733 	ep = lookup_tid(t, tid);
1734 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1735 	ep->snd_seq = be32_to_cpu(req->snd_isn);
1736 	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1737 
1738 	set_emss(ep, ntohs(req->tcp_opt));
1739 
1740 	dst_confirm(ep->dst);
1741 	state_set(&ep->com, MPA_REQ_WAIT);
1742 	start_ep_timer(ep);
1743 	send_flowc(ep, skb);
1744 
1745 	return 0;
1746 }
1747 
1748 static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
1749 {
1750 	struct cpl_peer_close *hdr = cplhdr(skb);
1751 	struct c4iw_ep *ep;
1752 	struct c4iw_qp_attributes attrs;
1753 	int disconnect = 1;
1754 	int release = 0;
1755 	struct tid_info *t = dev->rdev.lldi.tids;
1756 	unsigned int tid = GET_TID(hdr);
1757 	int ret;
1758 
1759 	ep = lookup_tid(t, tid);
1760 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1761 	dst_confirm(ep->dst);
1762 
1763 	mutex_lock(&ep->com.mutex);
1764 	switch (ep->com.state) {
1765 	case MPA_REQ_WAIT:
1766 		__state_set(&ep->com, CLOSING);
1767 		break;
1768 	case MPA_REQ_SENT:
1769 		__state_set(&ep->com, CLOSING);
1770 		connect_reply_upcall(ep, -ECONNRESET);
1771 		break;
1772 	case MPA_REQ_RCVD:
1773 
1774 		/*
1775 		 * We're gonna mark this puppy DEAD, but keep
1776 		 * the reference on it until the ULP accepts or
1777 		 * rejects the CR. Also wake up anyone waiting
1778 		 * in rdma connection migration (see c4iw_accept_cr()).
1779 		 */
1780 		__state_set(&ep->com, CLOSING);
1781 		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1782 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1783 		break;
1784 	case MPA_REP_SENT:
1785 		__state_set(&ep->com, CLOSING);
1786 		PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
1787 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1788 		break;
1789 	case FPDU_MODE:
1790 		start_ep_timer(ep);
1791 		__state_set(&ep->com, CLOSING);
1792 		attrs.next_state = C4IW_QP_STATE_CLOSING;
1793 		ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1794 				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1795 		if (ret != -ECONNRESET) {
1796 			peer_close_upcall(ep);
1797 			disconnect = 1;
1798 		}
1799 		break;
1800 	case ABORTING:
1801 		disconnect = 0;
1802 		break;
1803 	case CLOSING:
1804 		__state_set(&ep->com, MORIBUND);
1805 		disconnect = 0;
1806 		break;
1807 	case MORIBUND:
1808 		stop_ep_timer(ep);
1809 		if (ep->com.cm_id && ep->com.qp) {
1810 			attrs.next_state = C4IW_QP_STATE_IDLE;
1811 			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1812 				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1813 		}
1814 		close_complete_upcall(ep);
1815 		__state_set(&ep->com, DEAD);
1816 		release = 1;
1817 		disconnect = 0;
1818 		break;
1819 	case DEAD:
1820 		disconnect = 0;
1821 		break;
1822 	default:
1823 		BUG_ON(1);
1824 	}
1825 	mutex_unlock(&ep->com.mutex);
1826 	if (disconnect)
1827 		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1828 	if (release)
1829 		release_ep_resources(ep);
1830 	return 0;
1831 }
1832 
1833 /*
1834  * Returns whether an ABORT_REQ_RSS message is a negative advice.
1835  */
1836 static int is_neg_adv_abort(unsigned int status)
1837 {
1838 	return status == CPL_ERR_RTX_NEG_ADVICE ||
1839 	       status == CPL_ERR_PERSIST_NEG_ADVICE;
1840 }
1841 
1842 static int c4iw_reconnect(struct c4iw_ep *ep)
1843 {
1844 	struct rtable *rt;
1845 	int err = 0;
1846 
1847 	PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1848 	init_timer(&ep->timer);
1849 
1850 	/*
1851 	 * Allocate an active TID to initiate a TCP connection.
1852 	 */
1853 	ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1854 	if (ep->atid == -1) {
1855 		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
1856 		err = -ENOMEM;
1857 		goto fail2;
1858 	}
1859 
1860 	/* find a route */
1861 	rt = find_route(ep->com.dev,
1862 			ep->com.cm_id->local_addr.sin_addr.s_addr,
1863 			ep->com.cm_id->remote_addr.sin_addr.s_addr,
1864 			ep->com.cm_id->local_addr.sin_port,
1865 			ep->com.cm_id->remote_addr.sin_port, 0);
1866 	if (!rt) {
1867 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
1868 		err = -EHOSTUNREACH;
1869 		goto fail3;
1870 	}
1871 	ep->dst = &rt->dst;
1872 
1873 	err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr,
1874 			ep->dst, ep->com.dev, false);
1875 	if (err) {
1876 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1877 		goto fail4;
1878 	}
1879 
1880 	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
1881 	     __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
1882 	     ep->l2t->idx);
1883 
1884 	state_set(&ep->com, CONNECTING);
1885 	ep->tos = 0;
1886 
1887 	/* send connect request to rnic */
1888 	err = send_connect(ep);
1889 	if (!err)
1890 		goto out;
1891 
1892 	cxgb4_l2t_release(ep->l2t);
1893 fail4:
1894 	dst_release(ep->dst);
1895 fail3:
1896 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
1897 fail2:
1898 	/*
1899 	 * remember to send notification to upper layer.
1900 	 * We are in here so the upper layer is not aware that this is
1901 	 * re-connect attempt and so, upper layer is still waiting for
1902 	 * response of 1st connect request.
1903 	 */
1904 	connect_reply_upcall(ep, -ECONNRESET);
1905 	c4iw_put_ep(&ep->com);
1906 out:
1907 	return err;
1908 }
1909 
1910 static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
1911 {
1912 	struct cpl_abort_req_rss *req = cplhdr(skb);
1913 	struct c4iw_ep *ep;
1914 	struct cpl_abort_rpl *rpl;
1915 	struct sk_buff *rpl_skb;
1916 	struct c4iw_qp_attributes attrs;
1917 	int ret;
1918 	int release = 0;
1919 	struct tid_info *t = dev->rdev.lldi.tids;
1920 	unsigned int tid = GET_TID(req);
1921 
1922 	ep = lookup_tid(t, tid);
1923 	if (is_neg_adv_abort(req->status)) {
1924 		PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
1925 		     ep->hwtid);
1926 		return 0;
1927 	}
1928 	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
1929 	     ep->com.state);
1930 
1931 	/*
1932 	 * Wake up any threads in rdma_init() or rdma_fini().
1933 	 * However, this is not needed if com state is just
1934 	 * MPA_REQ_SENT
1935 	 */
1936 	if (ep->com.state != MPA_REQ_SENT)
1937 		c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
1938 
1939 	mutex_lock(&ep->com.mutex);
1940 	switch (ep->com.state) {
1941 	case CONNECTING:
1942 		break;
1943 	case MPA_REQ_WAIT:
1944 		stop_ep_timer(ep);
1945 		break;
1946 	case MPA_REQ_SENT:
1947 		stop_ep_timer(ep);
1948 		if (mpa_rev == 2 && ep->tried_with_mpa_v1)
1949 			connect_reply_upcall(ep, -ECONNRESET);
1950 		else {
1951 			/*
1952 			 * we just don't send notification upwards because we
1953 			 * want to retry with mpa_v1 without upper layers even
1954 			 * knowing it.
1955 			 *
1956 			 * do some housekeeping so as to re-initiate the
1957 			 * connection
1958 			 */
1959 			PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
1960 			     mpa_rev);
1961 			ep->retry_with_mpa_v1 = 1;
1962 		}
1963 		break;
1964 	case MPA_REP_SENT:
1965 		break;
1966 	case MPA_REQ_RCVD:
1967 		break;
1968 	case MORIBUND:
1969 	case CLOSING:
1970 		stop_ep_timer(ep);
1971 		/*FALLTHROUGH*/
1972 	case FPDU_MODE:
1973 		if (ep->com.cm_id && ep->com.qp) {
1974 			attrs.next_state = C4IW_QP_STATE_ERROR;
1975 			ret = c4iw_modify_qp(ep->com.qp->rhp,
1976 				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
1977 				     &attrs, 1);
1978 			if (ret)
1979 				printk(KERN_ERR MOD
1980 				       "%s - qp <- error failed!\n",
1981 				       __func__);
1982 		}
1983 		peer_abort_upcall(ep);
1984 		break;
1985 	case ABORTING:
1986 		break;
1987 	case DEAD:
1988 		PDBG("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
1989 		mutex_unlock(&ep->com.mutex);
1990 		return 0;
1991 	default:
1992 		BUG_ON(1);
1993 		break;
1994 	}
1995 	dst_confirm(ep->dst);
1996 	if (ep->com.state != ABORTING) {
1997 		__state_set(&ep->com, DEAD);
1998 		/* we don't release if we want to retry with mpa_v1 */
1999 		if (!ep->retry_with_mpa_v1)
2000 			release = 1;
2001 	}
2002 	mutex_unlock(&ep->com.mutex);
2003 
2004 	rpl_skb = get_skb(skb, sizeof(*rpl), GFP_KERNEL);
2005 	if (!rpl_skb) {
2006 		printk(KERN_ERR MOD "%s - cannot allocate skb!\n",
2007 		       __func__);
2008 		release = 1;
2009 		goto out;
2010 	}
2011 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
2012 	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
2013 	INIT_TP_WR(rpl, ep->hwtid);
2014 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
2015 	rpl->cmd = CPL_ABORT_NO_RST;
2016 	c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2017 out:
2018 	if (release)
2019 		release_ep_resources(ep);
2020 
2021 	/* retry with mpa-v1 */
2022 	if (ep && ep->retry_with_mpa_v1) {
2023 		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2024 		dst_release(ep->dst);
2025 		cxgb4_l2t_release(ep->l2t);
2026 		c4iw_reconnect(ep);
2027 	}
2028 
2029 	return 0;
2030 }
2031 
2032 static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2033 {
2034 	struct c4iw_ep *ep;
2035 	struct c4iw_qp_attributes attrs;
2036 	struct cpl_close_con_rpl *rpl = cplhdr(skb);
2037 	int release = 0;
2038 	struct tid_info *t = dev->rdev.lldi.tids;
2039 	unsigned int tid = GET_TID(rpl);
2040 
2041 	ep = lookup_tid(t, tid);
2042 
2043 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2044 	BUG_ON(!ep);
2045 
2046 	/* The cm_id may be null if we failed to connect */
2047 	mutex_lock(&ep->com.mutex);
2048 	switch (ep->com.state) {
2049 	case CLOSING:
2050 		__state_set(&ep->com, MORIBUND);
2051 		break;
2052 	case MORIBUND:
2053 		stop_ep_timer(ep);
2054 		if ((ep->com.cm_id) && (ep->com.qp)) {
2055 			attrs.next_state = C4IW_QP_STATE_IDLE;
2056 			c4iw_modify_qp(ep->com.qp->rhp,
2057 					     ep->com.qp,
2058 					     C4IW_QP_ATTR_NEXT_STATE,
2059 					     &attrs, 1);
2060 		}
2061 		close_complete_upcall(ep);
2062 		__state_set(&ep->com, DEAD);
2063 		release = 1;
2064 		break;
2065 	case ABORTING:
2066 	case DEAD:
2067 		break;
2068 	default:
2069 		BUG_ON(1);
2070 		break;
2071 	}
2072 	mutex_unlock(&ep->com.mutex);
2073 	if (release)
2074 		release_ep_resources(ep);
2075 	return 0;
2076 }
2077 
2078 static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
2079 {
2080 	struct cpl_rdma_terminate *rpl = cplhdr(skb);
2081 	struct tid_info *t = dev->rdev.lldi.tids;
2082 	unsigned int tid = GET_TID(rpl);
2083 	struct c4iw_ep *ep;
2084 	struct c4iw_qp_attributes attrs;
2085 
2086 	ep = lookup_tid(t, tid);
2087 	BUG_ON(!ep);
2088 
2089 	if (ep && ep->com.qp) {
2090 		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2091 		       ep->com.qp->wq.sq.qid);
2092 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2093 		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2094 			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2095 	} else
2096 		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2097 
2098 	return 0;
2099 }
2100 
2101 /*
2102  * Upcall from the adapter indicating data has been transmitted.
2103  * For us its just the single MPA request or reply.  We can now free
2104  * the skb holding the mpa message.
2105  */
2106 static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
2107 {
2108 	struct c4iw_ep *ep;
2109 	struct cpl_fw4_ack *hdr = cplhdr(skb);
2110 	u8 credits = hdr->credits;
2111 	unsigned int tid = GET_TID(hdr);
2112 	struct tid_info *t = dev->rdev.lldi.tids;
2113 
2114 
2115 	ep = lookup_tid(t, tid);
2116 	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
2117 	if (credits == 0) {
2118 		PDBG("%s 0 credit ack ep %p tid %u state %u\n",
2119 		     __func__, ep, ep->hwtid, state_read(&ep->com));
2120 		return 0;
2121 	}
2122 
2123 	dst_confirm(ep->dst);
2124 	if (ep->mpa_skb) {
2125 		PDBG("%s last streaming msg ack ep %p tid %u state %u "
2126 		     "initiator %u freeing skb\n", __func__, ep, ep->hwtid,
2127 		     state_read(&ep->com), ep->mpa_attr.initiator ? 1 : 0);
2128 		kfree_skb(ep->mpa_skb);
2129 		ep->mpa_skb = NULL;
2130 	}
2131 	return 0;
2132 }
2133 
2134 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2135 {
2136 	int err;
2137 	struct c4iw_ep *ep = to_ep(cm_id);
2138 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2139 
2140 	if (state_read(&ep->com) == DEAD) {
2141 		c4iw_put_ep(&ep->com);
2142 		return -ECONNRESET;
2143 	}
2144 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2145 	if (mpa_rev == 0)
2146 		abort_connection(ep, NULL, GFP_KERNEL);
2147 	else {
2148 		err = send_mpa_reject(ep, pdata, pdata_len);
2149 		err = c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2150 	}
2151 	c4iw_put_ep(&ep->com);
2152 	return 0;
2153 }
2154 
2155 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2156 {
2157 	int err;
2158 	struct c4iw_qp_attributes attrs;
2159 	enum c4iw_qp_attr_mask mask;
2160 	struct c4iw_ep *ep = to_ep(cm_id);
2161 	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2162 	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2163 
2164 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2165 	if (state_read(&ep->com) == DEAD) {
2166 		err = -ECONNRESET;
2167 		goto err;
2168 	}
2169 
2170 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2171 	BUG_ON(!qp);
2172 
2173 	if ((conn_param->ord > c4iw_max_read_depth) ||
2174 	    (conn_param->ird > c4iw_max_read_depth)) {
2175 		abort_connection(ep, NULL, GFP_KERNEL);
2176 		err = -EINVAL;
2177 		goto err;
2178 	}
2179 
2180 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2181 		if (conn_param->ord > ep->ird) {
2182 			ep->ird = conn_param->ird;
2183 			ep->ord = conn_param->ord;
2184 			send_mpa_reject(ep, conn_param->private_data,
2185 					conn_param->private_data_len);
2186 			abort_connection(ep, NULL, GFP_KERNEL);
2187 			err = -ENOMEM;
2188 			goto err;
2189 		}
2190 		if (conn_param->ird > ep->ord) {
2191 			if (!ep->ord)
2192 				conn_param->ird = 1;
2193 			else {
2194 				abort_connection(ep, NULL, GFP_KERNEL);
2195 				err = -ENOMEM;
2196 				goto err;
2197 			}
2198 		}
2199 
2200 	}
2201 	ep->ird = conn_param->ird;
2202 	ep->ord = conn_param->ord;
2203 
2204 	if (ep->mpa_attr.version != 2)
2205 		if (peer2peer && ep->ird == 0)
2206 			ep->ird = 1;
2207 
2208 	PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2209 
2210 	cm_id->add_ref(cm_id);
2211 	ep->com.cm_id = cm_id;
2212 	ep->com.qp = qp;
2213 
2214 	/* bind QP to EP and move to RTS */
2215 	attrs.mpa_attr = ep->mpa_attr;
2216 	attrs.max_ird = ep->ird;
2217 	attrs.max_ord = ep->ord;
2218 	attrs.llp_stream_handle = ep;
2219 	attrs.next_state = C4IW_QP_STATE_RTS;
2220 
2221 	/* bind QP and TID with INIT_WR */
2222 	mask = C4IW_QP_ATTR_NEXT_STATE |
2223 			     C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2224 			     C4IW_QP_ATTR_MPA_ATTR |
2225 			     C4IW_QP_ATTR_MAX_IRD |
2226 			     C4IW_QP_ATTR_MAX_ORD;
2227 
2228 	err = c4iw_modify_qp(ep->com.qp->rhp,
2229 			     ep->com.qp, mask, &attrs, 1);
2230 	if (err)
2231 		goto err1;
2232 	err = send_mpa_reply(ep, conn_param->private_data,
2233 			     conn_param->private_data_len);
2234 	if (err)
2235 		goto err1;
2236 
2237 	state_set(&ep->com, FPDU_MODE);
2238 	established_upcall(ep);
2239 	c4iw_put_ep(&ep->com);
2240 	return 0;
2241 err1:
2242 	ep->com.cm_id = NULL;
2243 	ep->com.qp = NULL;
2244 	cm_id->rem_ref(cm_id);
2245 err:
2246 	c4iw_put_ep(&ep->com);
2247 	return err;
2248 }
2249 
2250 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2251 {
2252 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2253 	struct c4iw_ep *ep;
2254 	struct rtable *rt;
2255 	int err = 0;
2256 
2257 	if ((conn_param->ord > c4iw_max_read_depth) ||
2258 	    (conn_param->ird > c4iw_max_read_depth)) {
2259 		err = -EINVAL;
2260 		goto out;
2261 	}
2262 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2263 	if (!ep) {
2264 		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2265 		err = -ENOMEM;
2266 		goto out;
2267 	}
2268 	init_timer(&ep->timer);
2269 	ep->plen = conn_param->private_data_len;
2270 	if (ep->plen)
2271 		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2272 		       conn_param->private_data, ep->plen);
2273 	ep->ird = conn_param->ird;
2274 	ep->ord = conn_param->ord;
2275 
2276 	if (peer2peer && ep->ord == 0)
2277 		ep->ord = 1;
2278 
2279 	cm_id->add_ref(cm_id);
2280 	ep->com.dev = dev;
2281 	ep->com.cm_id = cm_id;
2282 	ep->com.qp = get_qhp(dev, conn_param->qpn);
2283 	BUG_ON(!ep->com.qp);
2284 	PDBG("%s qpn 0x%x qp %p cm_id %p\n", __func__, conn_param->qpn,
2285 	     ep->com.qp, cm_id);
2286 
2287 	/*
2288 	 * Allocate an active TID to initiate a TCP connection.
2289 	 */
2290 	ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
2291 	if (ep->atid == -1) {
2292 		printk(KERN_ERR MOD "%s - cannot alloc atid.\n", __func__);
2293 		err = -ENOMEM;
2294 		goto fail2;
2295 	}
2296 
2297 	PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__,
2298 	     ntohl(cm_id->local_addr.sin_addr.s_addr),
2299 	     ntohs(cm_id->local_addr.sin_port),
2300 	     ntohl(cm_id->remote_addr.sin_addr.s_addr),
2301 	     ntohs(cm_id->remote_addr.sin_port));
2302 
2303 	/* find a route */
2304 	rt = find_route(dev,
2305 			cm_id->local_addr.sin_addr.s_addr,
2306 			cm_id->remote_addr.sin_addr.s_addr,
2307 			cm_id->local_addr.sin_port,
2308 			cm_id->remote_addr.sin_port, 0);
2309 	if (!rt) {
2310 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2311 		err = -EHOSTUNREACH;
2312 		goto fail3;
2313 	}
2314 	ep->dst = &rt->dst;
2315 
2316 	err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr,
2317 			ep->dst, ep->com.dev, true);
2318 	if (err) {
2319 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
2320 		goto fail4;
2321 	}
2322 
2323 	PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2324 		__func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2325 		ep->l2t->idx);
2326 
2327 	state_set(&ep->com, CONNECTING);
2328 	ep->tos = 0;
2329 	ep->com.local_addr = cm_id->local_addr;
2330 	ep->com.remote_addr = cm_id->remote_addr;
2331 
2332 	/* send connect request to rnic */
2333 	err = send_connect(ep);
2334 	if (!err)
2335 		goto out;
2336 
2337 	cxgb4_l2t_release(ep->l2t);
2338 fail4:
2339 	dst_release(ep->dst);
2340 fail3:
2341 	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2342 fail2:
2343 	cm_id->rem_ref(cm_id);
2344 	c4iw_put_ep(&ep->com);
2345 out:
2346 	return err;
2347 }
2348 
2349 int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2350 {
2351 	int err = 0;
2352 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2353 	struct c4iw_listen_ep *ep;
2354 
2355 
2356 	might_sleep();
2357 
2358 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2359 	if (!ep) {
2360 		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2361 		err = -ENOMEM;
2362 		goto fail1;
2363 	}
2364 	PDBG("%s ep %p\n", __func__, ep);
2365 	cm_id->add_ref(cm_id);
2366 	ep->com.cm_id = cm_id;
2367 	ep->com.dev = dev;
2368 	ep->backlog = backlog;
2369 	ep->com.local_addr = cm_id->local_addr;
2370 
2371 	/*
2372 	 * Allocate a server TID.
2373 	 */
2374 	ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
2375 	if (ep->stid == -1) {
2376 		printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
2377 		err = -ENOMEM;
2378 		goto fail2;
2379 	}
2380 
2381 	state_set(&ep->com, LISTEN);
2382 	c4iw_init_wr_wait(&ep->com.wr_wait);
2383 	err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid,
2384 				  ep->com.local_addr.sin_addr.s_addr,
2385 				  ep->com.local_addr.sin_port,
2386 				  ep->com.dev->rdev.lldi.rxq_ids[0]);
2387 	if (err)
2388 		goto fail3;
2389 
2390 	/* wait for pass_open_rpl */
2391 	err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2392 				  __func__);
2393 	if (!err) {
2394 		cm_id->provider_data = ep;
2395 		goto out;
2396 	}
2397 fail3:
2398 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2399 fail2:
2400 	cm_id->rem_ref(cm_id);
2401 	c4iw_put_ep(&ep->com);
2402 fail1:
2403 out:
2404 	return err;
2405 }
2406 
2407 int c4iw_destroy_listen(struct iw_cm_id *cm_id)
2408 {
2409 	int err;
2410 	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
2411 
2412 	PDBG("%s ep %p\n", __func__, ep);
2413 
2414 	might_sleep();
2415 	state_set(&ep->com, DEAD);
2416 	c4iw_init_wr_wait(&ep->com.wr_wait);
2417 	err = listen_stop(ep);
2418 	if (err)
2419 		goto done;
2420 	err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2421 				  __func__);
2422 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2423 done:
2424 	cm_id->rem_ref(cm_id);
2425 	c4iw_put_ep(&ep->com);
2426 	return err;
2427 }
2428 
2429 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2430 {
2431 	int ret = 0;
2432 	int close = 0;
2433 	int fatal = 0;
2434 	struct c4iw_rdev *rdev;
2435 
2436 	mutex_lock(&ep->com.mutex);
2437 
2438 	PDBG("%s ep %p state %s, abrupt %d\n", __func__, ep,
2439 	     states[ep->com.state], abrupt);
2440 
2441 	rdev = &ep->com.dev->rdev;
2442 	if (c4iw_fatal_error(rdev)) {
2443 		fatal = 1;
2444 		close_complete_upcall(ep);
2445 		ep->com.state = DEAD;
2446 	}
2447 	switch (ep->com.state) {
2448 	case MPA_REQ_WAIT:
2449 	case MPA_REQ_SENT:
2450 	case MPA_REQ_RCVD:
2451 	case MPA_REP_SENT:
2452 	case FPDU_MODE:
2453 		close = 1;
2454 		if (abrupt)
2455 			ep->com.state = ABORTING;
2456 		else {
2457 			ep->com.state = CLOSING;
2458 			start_ep_timer(ep);
2459 		}
2460 		set_bit(CLOSE_SENT, &ep->com.flags);
2461 		break;
2462 	case CLOSING:
2463 		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2464 			close = 1;
2465 			if (abrupt) {
2466 				stop_ep_timer(ep);
2467 				ep->com.state = ABORTING;
2468 			} else
2469 				ep->com.state = MORIBUND;
2470 		}
2471 		break;
2472 	case MORIBUND:
2473 	case ABORTING:
2474 	case DEAD:
2475 		PDBG("%s ignoring disconnect ep %p state %u\n",
2476 		     __func__, ep, ep->com.state);
2477 		break;
2478 	default:
2479 		BUG();
2480 		break;
2481 	}
2482 
2483 	if (close) {
2484 		if (abrupt) {
2485 			close_complete_upcall(ep);
2486 			ret = send_abort(ep, NULL, gfp);
2487 		} else
2488 			ret = send_halfclose(ep, gfp);
2489 		if (ret)
2490 			fatal = 1;
2491 	}
2492 	mutex_unlock(&ep->com.mutex);
2493 	if (fatal)
2494 		release_ep_resources(ep);
2495 	return ret;
2496 }
2497 
2498 static int async_event(struct c4iw_dev *dev, struct sk_buff *skb)
2499 {
2500 	struct cpl_fw6_msg *rpl = cplhdr(skb);
2501 	c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
2502 	return 0;
2503 }
2504 
2505 /*
2506  * These are the real handlers that are called from a
2507  * work queue.
2508  */
2509 static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
2510 	[CPL_ACT_ESTABLISH] = act_establish,
2511 	[CPL_ACT_OPEN_RPL] = act_open_rpl,
2512 	[CPL_RX_DATA] = rx_data,
2513 	[CPL_ABORT_RPL_RSS] = abort_rpl,
2514 	[CPL_ABORT_RPL] = abort_rpl,
2515 	[CPL_PASS_OPEN_RPL] = pass_open_rpl,
2516 	[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
2517 	[CPL_PASS_ACCEPT_REQ] = pass_accept_req,
2518 	[CPL_PASS_ESTABLISH] = pass_establish,
2519 	[CPL_PEER_CLOSE] = peer_close,
2520 	[CPL_ABORT_REQ_RSS] = peer_abort,
2521 	[CPL_CLOSE_CON_RPL] = close_con_rpl,
2522 	[CPL_RDMA_TERMINATE] = terminate,
2523 	[CPL_FW4_ACK] = fw4_ack,
2524 	[CPL_FW6_MSG] = async_event
2525 };
2526 
2527 static void process_timeout(struct c4iw_ep *ep)
2528 {
2529 	struct c4iw_qp_attributes attrs;
2530 	int abort = 1;
2531 
2532 	mutex_lock(&ep->com.mutex);
2533 	PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
2534 	     ep->com.state);
2535 	switch (ep->com.state) {
2536 	case MPA_REQ_SENT:
2537 		__state_set(&ep->com, ABORTING);
2538 		connect_reply_upcall(ep, -ETIMEDOUT);
2539 		break;
2540 	case MPA_REQ_WAIT:
2541 		__state_set(&ep->com, ABORTING);
2542 		break;
2543 	case CLOSING:
2544 	case MORIBUND:
2545 		if (ep->com.cm_id && ep->com.qp) {
2546 			attrs.next_state = C4IW_QP_STATE_ERROR;
2547 			c4iw_modify_qp(ep->com.qp->rhp,
2548 				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2549 				     &attrs, 1);
2550 		}
2551 		__state_set(&ep->com, ABORTING);
2552 		break;
2553 	default:
2554 		printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n",
2555 			__func__, ep, ep->hwtid, ep->com.state);
2556 		WARN_ON(1);
2557 		abort = 0;
2558 	}
2559 	mutex_unlock(&ep->com.mutex);
2560 	if (abort)
2561 		abort_connection(ep, NULL, GFP_KERNEL);
2562 	c4iw_put_ep(&ep->com);
2563 }
2564 
2565 static void process_timedout_eps(void)
2566 {
2567 	struct c4iw_ep *ep;
2568 
2569 	spin_lock_irq(&timeout_lock);
2570 	while (!list_empty(&timeout_list)) {
2571 		struct list_head *tmp;
2572 
2573 		tmp = timeout_list.next;
2574 		list_del(tmp);
2575 		spin_unlock_irq(&timeout_lock);
2576 		ep = list_entry(tmp, struct c4iw_ep, entry);
2577 		process_timeout(ep);
2578 		spin_lock_irq(&timeout_lock);
2579 	}
2580 	spin_unlock_irq(&timeout_lock);
2581 }
2582 
2583 static void process_work(struct work_struct *work)
2584 {
2585 	struct sk_buff *skb = NULL;
2586 	struct c4iw_dev *dev;
2587 	struct cpl_act_establish *rpl;
2588 	unsigned int opcode;
2589 	int ret;
2590 
2591 	while ((skb = skb_dequeue(&rxq))) {
2592 		rpl = cplhdr(skb);
2593 		dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
2594 		opcode = rpl->ot.opcode;
2595 
2596 		BUG_ON(!work_handlers[opcode]);
2597 		ret = work_handlers[opcode](dev, skb);
2598 		if (!ret)
2599 			kfree_skb(skb);
2600 	}
2601 	process_timedout_eps();
2602 }
2603 
2604 static DECLARE_WORK(skb_work, process_work);
2605 
2606 static void ep_timeout(unsigned long arg)
2607 {
2608 	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2609 
2610 	spin_lock(&timeout_lock);
2611 	list_add_tail(&ep->entry, &timeout_list);
2612 	spin_unlock(&timeout_lock);
2613 	queue_work(workq, &skb_work);
2614 }
2615 
2616 /*
2617  * All the CM events are handled on a work queue to have a safe context.
2618  */
2619 static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
2620 {
2621 
2622 	/*
2623 	 * Save dev in the skb->cb area.
2624 	 */
2625 	*((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
2626 
2627 	/*
2628 	 * Queue the skb and schedule the worker thread.
2629 	 */
2630 	skb_queue_tail(&rxq, skb);
2631 	queue_work(workq, &skb_work);
2632 	return 0;
2633 }
2634 
2635 static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2636 {
2637 	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
2638 
2639 	if (rpl->status != CPL_ERR_NONE) {
2640 		printk(KERN_ERR MOD "Unexpected SET_TCB_RPL status %u "
2641 		       "for tid %u\n", rpl->status, GET_TID(rpl));
2642 	}
2643 	kfree_skb(skb);
2644 	return 0;
2645 }
2646 
2647 static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
2648 {
2649 	struct cpl_fw6_msg *rpl = cplhdr(skb);
2650 	struct c4iw_wr_wait *wr_waitp;
2651 	int ret;
2652 
2653 	PDBG("%s type %u\n", __func__, rpl->type);
2654 
2655 	switch (rpl->type) {
2656 	case 1:
2657 		ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
2658 		wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
2659 		PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
2660 		if (wr_waitp)
2661 			c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2662 		kfree_skb(skb);
2663 		break;
2664 	case 2:
2665 		sched(dev, skb);
2666 		break;
2667 	default:
2668 		printk(KERN_ERR MOD "%s unexpected fw6 msg type %u\n", __func__,
2669 		       rpl->type);
2670 		kfree_skb(skb);
2671 		break;
2672 	}
2673 	return 0;
2674 }
2675 
2676 static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
2677 {
2678 	struct cpl_abort_req_rss *req = cplhdr(skb);
2679 	struct c4iw_ep *ep;
2680 	struct tid_info *t = dev->rdev.lldi.tids;
2681 	unsigned int tid = GET_TID(req);
2682 
2683 	ep = lookup_tid(t, tid);
2684 	if (!ep) {
2685 		printk(KERN_WARNING MOD
2686 		       "Abort on non-existent endpoint, tid %d\n", tid);
2687 		kfree_skb(skb);
2688 		return 0;
2689 	}
2690 	if (is_neg_adv_abort(req->status)) {
2691 		PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
2692 		     ep->hwtid);
2693 		kfree_skb(skb);
2694 		return 0;
2695 	}
2696 	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2697 	     ep->com.state);
2698 
2699 	/*
2700 	 * Wake up any threads in rdma_init() or rdma_fini().
2701 	 */
2702 	c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2703 	sched(dev, skb);
2704 	return 0;
2705 }
2706 
2707 /*
2708  * Most upcalls from the T4 Core go to sched() to
2709  * schedule the processing on a work queue.
2710  */
2711 c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
2712 	[CPL_ACT_ESTABLISH] = sched,
2713 	[CPL_ACT_OPEN_RPL] = sched,
2714 	[CPL_RX_DATA] = sched,
2715 	[CPL_ABORT_RPL_RSS] = sched,
2716 	[CPL_ABORT_RPL] = sched,
2717 	[CPL_PASS_OPEN_RPL] = sched,
2718 	[CPL_CLOSE_LISTSRV_RPL] = sched,
2719 	[CPL_PASS_ACCEPT_REQ] = sched,
2720 	[CPL_PASS_ESTABLISH] = sched,
2721 	[CPL_PEER_CLOSE] = sched,
2722 	[CPL_CLOSE_CON_RPL] = sched,
2723 	[CPL_ABORT_REQ_RSS] = peer_abort_intr,
2724 	[CPL_RDMA_TERMINATE] = sched,
2725 	[CPL_FW4_ACK] = sched,
2726 	[CPL_SET_TCB_RPL] = set_tcb_rpl,
2727 	[CPL_FW6_MSG] = fw6_msg
2728 };
2729 
2730 int __init c4iw_cm_init(void)
2731 {
2732 	spin_lock_init(&timeout_lock);
2733 	skb_queue_head_init(&rxq);
2734 
2735 	workq = create_singlethread_workqueue("iw_cxgb4");
2736 	if (!workq)
2737 		return -ENOMEM;
2738 
2739 	return 0;
2740 }
2741 
2742 void __exit c4iw_cm_term(void)
2743 {
2744 	WARN_ON(!list_empty(&timeout_list));
2745 	flush_workqueue(workq);
2746 	destroy_workqueue(workq);
2747 }
2748