xref: /freebsd/sys/dev/cxgbe/iw_cxgbe/cm.c (revision 5dae51da3da0cc94d17bd67b308fad304ebec7e0)
1 /*
2  * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *	  copyright notice, this list of conditions and the following
16  *	  disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *	  copyright notice, this list of conditions and the following
20  *	  disclaimer in the documentation and/or other materials
21  *	  provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 
37 #ifdef TCP_OFFLOAD
38 #include <sys/types.h>
39 #include <sys/malloc.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42 #include <sys/sockio.h>
43 #include <sys/taskqueue.h>
44 #include <netinet/in.h>
45 #include <net/route.h>
46 
47 #include <netinet/in_systm.h>
48 #include <netinet/in_pcb.h>
49 #include <netinet/ip.h>
50 #include <netinet/in_fib.h>
51 #include <netinet/ip_var.h>
52 #include <netinet/tcp_var.h>
53 #include <netinet/tcp.h>
54 #include <netinet/tcpip.h>
55 
56 #include <netinet/toecore.h>
57 
58 struct sge_iq;
59 struct rss_header;
60 struct cpl_set_tcb_rpl;
61 #include <linux/types.h>
62 #include "offload.h"
63 #include "tom/t4_tom.h"
64 
65 #define TOEPCB(so)  ((struct toepcb *)(so_sototcpcb((so))->t_toe))
66 
67 #include "iw_cxgbe.h"
68 #include <linux/module.h>
69 #include <linux/workqueue.h>
70 #include <linux/notifier.h>
71 #include <linux/inetdevice.h>
72 #include <linux/if_vlan.h>
73 #include <net/netevent.h>
74 
75 static spinlock_t req_lock;
76 static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list;
77 static struct work_struct c4iw_task;
78 static struct workqueue_struct *c4iw_taskq;
79 static LIST_HEAD(timeout_list);
80 static spinlock_t timeout_lock;
81 
82 static void process_req(struct work_struct *ctx);
83 static void start_ep_timer(struct c4iw_ep *ep);
84 static int stop_ep_timer(struct c4iw_ep *ep);
85 static int set_tcpinfo(struct c4iw_ep *ep);
86 static void process_timeout(struct c4iw_ep *ep);
87 static void process_timedout_eps(void);
88 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc);
89 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
90 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate);
91 static void *alloc_ep(int size, gfp_t flags);
92 void __free_ep(struct c4iw_ep_common *epc);
93 static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
94 		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4);
95 static int close_socket(struct c4iw_ep_common *epc, int close);
96 static int shutdown_socket(struct c4iw_ep_common *epc);
97 static void abort_socket(struct c4iw_ep *ep);
98 static int send_mpa_req(struct c4iw_ep *ep);
99 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen);
100 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen);
101 static void close_complete_upcall(struct c4iw_ep *ep, int status);
102 static int send_abort(struct c4iw_ep *ep);
103 static void peer_close_upcall(struct c4iw_ep *ep);
104 static void peer_abort_upcall(struct c4iw_ep *ep);
105 static void connect_reply_upcall(struct c4iw_ep *ep, int status);
106 static int connect_request_upcall(struct c4iw_ep *ep);
107 static void established_upcall(struct c4iw_ep *ep);
108 static int process_mpa_reply(struct c4iw_ep *ep);
109 static int process_mpa_request(struct c4iw_ep *ep);
110 static void process_peer_close(struct c4iw_ep *ep);
111 static void process_conn_error(struct c4iw_ep *ep);
112 static void process_close_complete(struct c4iw_ep *ep);
113 static void ep_timeout(unsigned long arg);
114 static void init_sock(struct c4iw_ep_common *epc);
115 static void process_data(struct c4iw_ep *ep);
116 static void process_connected(struct c4iw_ep *ep);
117 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag);
118 static void process_socket_event(struct c4iw_ep *ep);
119 static void release_ep_resources(struct c4iw_ep *ep);
120 
121 #define START_EP_TIMER(ep) \
122     do { \
123 	    CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \
124 		__func__, __LINE__, (ep)); \
125 	    start_ep_timer(ep); \
126     } while (0)
127 
128 #define STOP_EP_TIMER(ep) \
129     ({ \
130 	    CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \
131 		__func__, __LINE__, (ep)); \
132 	    stop_ep_timer(ep); \
133     })
134 
135 #ifdef KTR
136 static char *states[] = {
137 	"idle",
138 	"listen",
139 	"connecting",
140 	"mpa_wait_req",
141 	"mpa_req_sent",
142 	"mpa_req_rcvd",
143 	"mpa_rep_sent",
144 	"fpdu_mode",
145 	"aborting",
146 	"closing",
147 	"moribund",
148 	"dead",
149 	NULL,
150 };
151 #endif
152 
153 
154 static void deref_cm_id(struct c4iw_ep_common *epc)
155 {
156       epc->cm_id->rem_ref(epc->cm_id);
157       epc->cm_id = NULL;
158       set_bit(CM_ID_DEREFED, &epc->history);
159 }
160 
161 static void ref_cm_id(struct c4iw_ep_common *epc)
162 {
163       set_bit(CM_ID_REFED, &epc->history);
164       epc->cm_id->add_ref(epc->cm_id);
165 }
166 
167 static void deref_qp(struct c4iw_ep *ep)
168 {
169 	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
170 	clear_bit(QP_REFERENCED, &ep->com.flags);
171 	set_bit(QP_DEREFED, &ep->com.history);
172 }
173 
174 static void ref_qp(struct c4iw_ep *ep)
175 {
176 	set_bit(QP_REFERENCED, &ep->com.flags);
177 	set_bit(QP_REFED, &ep->com.history);
178 	c4iw_qp_add_ref(&ep->com.qp->ibqp);
179 }
180 
181 static void process_timeout(struct c4iw_ep *ep)
182 {
183 	struct c4iw_qp_attributes attrs;
184 	int abort = 1;
185 
186 	mutex_lock(&ep->com.mutex);
187 	CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__,
188 			ep, ep->hwtid, ep->com.state);
189 	set_bit(TIMEDOUT, &ep->com.history);
190 	switch (ep->com.state) {
191 	case MPA_REQ_SENT:
192 		connect_reply_upcall(ep, -ETIMEDOUT);
193 		break;
194 	case MPA_REQ_WAIT:
195 	case MPA_REQ_RCVD:
196 	case MPA_REP_SENT:
197 	case FPDU_MODE:
198 		break;
199 	case CLOSING:
200 	case MORIBUND:
201 		if (ep->com.cm_id && ep->com.qp) {
202 			attrs.next_state = C4IW_QP_STATE_ERROR;
203 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
204 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
205 		}
206 		close_complete_upcall(ep, -ETIMEDOUT);
207 		break;
208 	case ABORTING:
209 	case DEAD:
210 		/*
211 		 * These states are expected if the ep timed out at the same
212 		 * time as another thread was calling stop_ep_timer().
213 		 * So we silently do nothing for these states.
214 		 */
215 		abort = 0;
216 		break;
217 	default:
218 		CTR4(KTR_IW_CXGBE, "%s unexpected state ep %p tid %u state %u\n"
219 				, __func__, ep, ep->hwtid, ep->com.state);
220 		abort = 0;
221 	}
222 	mutex_unlock(&ep->com.mutex);
223 	if (abort)
224 		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
225 	c4iw_put_ep(&ep->com);
226 	return;
227 }
228 
229 static void process_timedout_eps(void)
230 {
231 	struct c4iw_ep *ep;
232 
233 	spin_lock(&timeout_lock);
234 	while (!list_empty(&timeout_list)) {
235 		struct list_head *tmp;
236 		tmp = timeout_list.next;
237 		list_del(tmp);
238 		tmp->next = tmp->prev = NULL;
239 		spin_unlock(&timeout_lock);
240 		ep = list_entry(tmp, struct c4iw_ep, entry);
241 		process_timeout(ep);
242 		spin_lock(&timeout_lock);
243 	}
244 	spin_unlock(&timeout_lock);
245 	return;
246 }
247 
248 static void
249 process_req(struct work_struct *ctx)
250 {
251 	struct c4iw_ep_common *epc;
252 
253 	process_timedout_eps();
254 	spin_lock(&req_lock);
255 	while (!TAILQ_EMPTY(&req_list)) {
256 		epc = TAILQ_FIRST(&req_list);
257 		TAILQ_REMOVE(&req_list, epc, entry);
258 		epc->entry.tqe_prev = NULL;
259 		spin_unlock(&req_lock);
260 		CTR3(KTR_IW_CXGBE, "%s so :%p, ep:%p", __func__,
261 				epc->so, epc);
262 		if (epc->so)
263 			process_socket_event((struct c4iw_ep *)epc);
264 		c4iw_put_ep(epc);
265 		process_timedout_eps();
266 		spin_lock(&req_lock);
267 	}
268 	spin_unlock(&req_lock);
269 }
270 
271 /*
272  * XXX: doesn't belong here in the iWARP driver.
273  * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is
274  *      set.  Is this a valid assumption for active open?
275  */
276 static int
277 set_tcpinfo(struct c4iw_ep *ep)
278 {
279 	struct socket *so = ep->com.so;
280 	struct inpcb *inp = sotoinpcb(so);
281 	struct tcpcb *tp;
282 	struct toepcb *toep;
283 	int rc = 0;
284 
285 	INP_WLOCK(inp);
286 	tp = intotcpcb(inp);
287 	if ((tp->t_flags & TF_TOE) == 0) {
288 		rc = EINVAL;
289 		log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n",
290 		    __func__, so, ep);
291 		goto done;
292 	}
293 	toep = TOEPCB(so);
294 
295 	ep->hwtid = toep->tid;
296 	ep->snd_seq = tp->snd_nxt;
297 	ep->rcv_seq = tp->rcv_nxt;
298 	ep->emss = max(tp->t_maxseg, 128);
299 done:
300 	INP_WUNLOCK(inp);
301 	return (rc);
302 
303 }
304 
305 static int
306 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port,
307 		__be16 peer_port, u8 tos, struct nhop4_extended *pnh4)
308 {
309 	struct in_addr addr;
310 	int err;
311 
312 	CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip,
313 	    peer_ip, ntohs(local_port), ntohs(peer_port));
314 
315 	addr.s_addr = peer_ip;
316 	err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4);
317 
318 	CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err);
319 	return err;
320 }
321 
322 static int
323 close_socket(struct c4iw_ep_common *epc, int close)
324 {
325 	struct socket *so = epc->so;
326 	int rc;
327 
328 	CTR5(KTR_IW_CXGBE, "%s:csoB so %p, ep %p, state %s, tid %d", __func__,
329 			so, epc, states[epc->state],
330 			((struct c4iw_ep *)epc)->hwtid);
331 	mutex_lock(&epc->so_mutex);
332 	if ((so == NULL) || (so->so_count == 0)) {
333 		mutex_unlock(&epc->so_mutex);
334 		CTR5(KTR_IW_CXGBE, "%s:cso1 so %p, ep %p, state %s, tid %d",
335 				__func__, so, epc, states[epc->state],
336 				((struct c4iw_ep *)epc)->hwtid);
337 		return -EINVAL;
338 	}
339 
340 	SOCK_LOCK(so);
341 	soupcall_clear(so, SO_RCV);
342 	SOCK_UNLOCK(so);
343 
344 	if (close)
345                 rc = soclose(so);
346         else
347                 rc = soshutdown(so, SHUT_WR | SHUT_RD);
348 	epc->so = NULL;
349 
350 	mutex_unlock(&epc->so_mutex);
351 	return (rc);
352 }
353 
354 static int
355 shutdown_socket(struct c4iw_ep_common *epc)
356 {
357 
358 	struct socket *so = epc->so;
359 	int rc;
360 
361 	CTR5(KTR_IW_CXGBE, "%s:ssoB so %p, ep %p, state %s, tid %d", __func__,
362 			epc->so, epc, states[epc->state],
363 			((struct c4iw_ep *)epc)->hwtid);
364 	mutex_lock(&epc->so_mutex);
365 	if ((so == NULL) || (so->so_count == 0)) {
366 		mutex_unlock(&epc->so_mutex);
367 		CTR5(KTR_IW_CXGBE, "%s:sso1 so %p, ep %p, state %s, tid %d",
368 			__func__, epc->so, epc, states[epc->state],
369 			((struct c4iw_ep *)epc)->hwtid);
370 		return -EINVAL;
371 	}
372 	rc = soshutdown(so, SHUT_WR);
373 	mutex_unlock(&epc->so_mutex);
374 	return rc;
375 }
376 
377 static void
378 abort_socket(struct c4iw_ep *ep)
379 {
380 	struct sockopt sopt;
381 	int rc;
382 	struct linger l;
383 
384 	CTR5(KTR_IW_CXGBE, "%s ep %p so %p state %s tid %d", __func__, ep,
385 			ep->com.so, states[ep->com.state], ep->hwtid);
386 	mutex_lock(&ep->com.so_mutex);
387 	l.l_onoff = 1;
388 	l.l_linger = 0;
389 
390 	/* linger_time of 0 forces RST to be sent */
391 	sopt.sopt_dir = SOPT_SET;
392 	sopt.sopt_level = SOL_SOCKET;
393 	sopt.sopt_name = SO_LINGER;
394 	sopt.sopt_val = (caddr_t)&l;
395 	sopt.sopt_valsize = sizeof l;
396 	sopt.sopt_td = NULL;
397 	rc = sosetopt(ep->com.so, &sopt);
398 	if (rc) {
399 		log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n",
400 		    __func__, rc);
401 	}
402 	mutex_unlock(&ep->com.so_mutex);
403 }
404 
405 static void
406 process_peer_close(struct c4iw_ep *ep)
407 {
408 	struct c4iw_qp_attributes attrs;
409 	int disconnect = 1;
410 	int release = 0;
411 
412 	CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep,
413 	    ep->com.so, states[ep->com.state]);
414 
415 	mutex_lock(&ep->com.mutex);
416 	switch (ep->com.state) {
417 
418 		case MPA_REQ_WAIT:
419 			CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING",
420 			    __func__, ep);
421 			__state_set(&ep->com, CLOSING);
422 			break;
423 
424 		case MPA_REQ_SENT:
425 			CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING",
426 			    __func__, ep);
427 			__state_set(&ep->com, DEAD);
428 			connect_reply_upcall(ep, -ECONNABORTED);
429 
430 			disconnect = 0;
431 			STOP_EP_TIMER(ep);
432 			close_socket(&ep->com, 0);
433 			deref_cm_id(&ep->com);
434 			release = 1;
435 			break;
436 
437 		case MPA_REQ_RCVD:
438 
439 			/*
440 			 * We're gonna mark this puppy DEAD, but keep
441 			 * the reference on it until the ULP accepts or
442 			 * rejects the CR.
443 			 */
444 			CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING",
445 			    __func__, ep);
446 			__state_set(&ep->com, CLOSING);
447 			c4iw_get_ep(&ep->com);
448 			break;
449 
450 		case MPA_REP_SENT:
451 			CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING",
452 			    __func__, ep);
453 			__state_set(&ep->com, CLOSING);
454 			break;
455 
456 		case FPDU_MODE:
457 			CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING",
458 			    __func__, ep);
459 			START_EP_TIMER(ep);
460 			__state_set(&ep->com, CLOSING);
461 			attrs.next_state = C4IW_QP_STATE_CLOSING;
462 			c4iw_modify_qp(ep->com.dev, ep->com.qp,
463 					C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
464 			peer_close_upcall(ep);
465 			break;
466 
467 		case ABORTING:
468 			CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)",
469 			    __func__, ep);
470 			disconnect = 0;
471 			break;
472 
473 		case CLOSING:
474 			CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND",
475 			    __func__, ep);
476 			__state_set(&ep->com, MORIBUND);
477 			disconnect = 0;
478 			break;
479 
480 		case MORIBUND:
481 			CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__,
482 			    ep);
483 			STOP_EP_TIMER(ep);
484 			if (ep->com.cm_id && ep->com.qp) {
485 				attrs.next_state = C4IW_QP_STATE_IDLE;
486 				c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
487 						C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
488 			}
489 			close_socket(&ep->com, 0);
490 			close_complete_upcall(ep, 0);
491 			__state_set(&ep->com, DEAD);
492 			release = 1;
493 			disconnect = 0;
494 			break;
495 
496 		case DEAD:
497 			CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)",
498 			    __func__, ep);
499 			disconnect = 0;
500 			break;
501 
502 		default:
503 			panic("%s: ep %p state %d", __func__, ep,
504 			    ep->com.state);
505 			break;
506 	}
507 
508 	mutex_unlock(&ep->com.mutex);
509 
510 	if (disconnect) {
511 
512 		CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep);
513 		c4iw_ep_disconnect(ep, 0, M_NOWAIT);
514 	}
515 	if (release) {
516 
517 		CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep);
518 		c4iw_put_ep(&ep->com);
519 	}
520 	CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep);
521 	return;
522 }
523 
524 static void
525 process_conn_error(struct c4iw_ep *ep)
526 {
527 	struct c4iw_qp_attributes attrs;
528 	int ret;
529 	int state;
530 
531 	mutex_lock(&ep->com.mutex);
532 	state = ep->com.state;
533 	CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s",
534 	    __func__, ep, ep->com.so, ep->com.so->so_error,
535 	    states[ep->com.state]);
536 
537 	switch (state) {
538 
539 		case MPA_REQ_WAIT:
540 			STOP_EP_TIMER(ep);
541 			break;
542 
543 		case MPA_REQ_SENT:
544 			STOP_EP_TIMER(ep);
545 			connect_reply_upcall(ep, -ECONNRESET);
546 			break;
547 
548 		case MPA_REP_SENT:
549 			ep->com.rpl_err = ECONNRESET;
550 			CTR1(KTR_IW_CXGBE, "waking up ep %p", ep);
551 			break;
552 
553 		case MPA_REQ_RCVD:
554 
555 			/*
556 			 * We're gonna mark this puppy DEAD, but keep
557 			 * the reference on it until the ULP accepts or
558 			 * rejects the CR.
559 			 */
560 			c4iw_get_ep(&ep->com);
561 			break;
562 
563 		case MORIBUND:
564 		case CLOSING:
565 			STOP_EP_TIMER(ep);
566 			/*FALLTHROUGH*/
567 		case FPDU_MODE:
568 
569 			if (ep->com.cm_id && ep->com.qp) {
570 
571 				attrs.next_state = C4IW_QP_STATE_ERROR;
572 				ret = c4iw_modify_qp(ep->com.qp->rhp,
573 					ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
574 					&attrs, 1);
575 				if (ret)
576 					log(LOG_ERR,
577 							"%s - qp <- error failed!\n",
578 							__func__);
579 			}
580 			peer_abort_upcall(ep);
581 			break;
582 
583 		case ABORTING:
584 			break;
585 
586 		case DEAD:
587 			CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!",
588 			    __func__, ep->com.so->so_error);
589 			mutex_unlock(&ep->com.mutex);
590 			return;
591 
592 		default:
593 			panic("%s: ep %p state %d", __func__, ep, state);
594 			break;
595 	}
596 
597 	if (state != ABORTING) {
598 		if (ep->parent_ep) {
599 			CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep);
600 			close_socket(&ep->com, 1);
601 		} else {
602 			CTR2(KTR_IW_CXGBE, "%s:pce2 %p", __func__, ep);
603 			close_socket(&ep->com, 0);
604 		}
605 
606 		__state_set(&ep->com, DEAD);
607 		c4iw_put_ep(&ep->com);
608 	}
609 	mutex_unlock(&ep->com.mutex);
610 	CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep);
611 	return;
612 }
613 
614 static void
615 process_close_complete(struct c4iw_ep *ep)
616 {
617 	struct c4iw_qp_attributes attrs;
618 	int release = 0;
619 
620 	CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep,
621 	    ep->com.so, states[ep->com.state]);
622 
623 	/* The cm_id may be null if we failed to connect */
624 	mutex_lock(&ep->com.mutex);
625 	set_bit(CLOSE_CON_RPL, &ep->com.history);
626 
627 	switch (ep->com.state) {
628 
629 		case CLOSING:
630 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND",
631 			    __func__, ep);
632 			__state_set(&ep->com, MORIBUND);
633 			break;
634 
635 		case MORIBUND:
636 			CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__,
637 			    ep);
638 			STOP_EP_TIMER(ep);
639 
640 			if ((ep->com.cm_id) && (ep->com.qp)) {
641 
642 				CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE",
643 				    __func__, ep);
644 				attrs.next_state = C4IW_QP_STATE_IDLE;
645 				c4iw_modify_qp(ep->com.dev,
646 						ep->com.qp,
647 						C4IW_QP_ATTR_NEXT_STATE,
648 						&attrs, 1);
649 			}
650 
651 			if (ep->parent_ep) {
652 
653 				CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep);
654 				close_socket(&ep->com, 1);
655 			}
656 			else {
657 
658 				CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep);
659 				close_socket(&ep->com, 0);
660 			}
661 			close_complete_upcall(ep, 0);
662 			__state_set(&ep->com, DEAD);
663 			release = 1;
664 			break;
665 
666 		case ABORTING:
667 			CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep);
668 			break;
669 
670 		case DEAD:
671 			CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep);
672 			break;
673 		default:
674 			CTR2(KTR_IW_CXGBE, "%s:pcc7 %p unknown ep state",
675 					__func__, ep);
676 			panic("%s:pcc6 %p unknown ep state", __func__, ep);
677 			break;
678 	}
679 	mutex_unlock(&ep->com.mutex);
680 
681 	if (release) {
682 
683 		CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep);
684 		c4iw_put_ep(&ep->com);
685 	}
686 	CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep);
687 	return;
688 }
689 
690 static void
691 init_sock(struct c4iw_ep_common *epc)
692 {
693 	int rc;
694 	struct sockopt sopt;
695 	struct socket *so = epc->so;
696 	int on = 1;
697 
698 	mutex_lock(&epc->so_mutex);
699 	if ((so == NULL) || (so->so_count == 0)) {
700 		mutex_unlock(&epc->so_mutex);
701 		CTR5(KTR_IW_CXGBE, "%s:iso1 so %p, ep %p, state %s, tid %d",
702 			__func__, so, epc, states[epc->state],
703 			((struct c4iw_ep *)epc)->hwtid);
704 		return;
705 	}
706 	SOCK_LOCK(so);
707 	soupcall_set(so, SO_RCV, c4iw_so_upcall, epc);
708 	so->so_state |= SS_NBIO;
709 	SOCK_UNLOCK(so);
710 	sopt.sopt_dir = SOPT_SET;
711 	sopt.sopt_level = IPPROTO_TCP;
712 	sopt.sopt_name = TCP_NODELAY;
713 	sopt.sopt_val = (caddr_t)&on;
714 	sopt.sopt_valsize = sizeof on;
715 	sopt.sopt_td = NULL;
716 	rc = sosetopt(so, &sopt);
717 	if (rc) {
718 		log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n",
719 		    __func__, so, rc);
720 	}
721 	mutex_unlock(&epc->so_mutex);
722 }
723 
724 static void
725 process_data(struct c4iw_ep *ep)
726 {
727 	struct sockaddr_in *local, *remote;
728 	int disconnect = 0;
729 
730 	CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__,
731 	    ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv));
732 
733 	switch (state_read(&ep->com)) {
734 	case MPA_REQ_SENT:
735 		disconnect = process_mpa_reply(ep);
736 		break;
737 	case MPA_REQ_WAIT:
738 		in_getsockaddr(ep->com.so, (struct sockaddr **)&local);
739 		in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote);
740 		ep->com.local_addr = *local;
741 		ep->com.remote_addr = *remote;
742 		free(local, M_SONAME);
743 		free(remote, M_SONAME);
744 		disconnect = process_mpa_request(ep);
745 		break;
746 	default:
747 		if (sbused(&ep->com.so->so_rcv))
748 			log(LOG_ERR, "%s: Unexpected streaming data. ep %p, "
749 			    "state %d, so %p, so_state 0x%x, sbused %u\n",
750 			    __func__, ep, state_read(&ep->com), ep->com.so,
751 			    ep->com.so->so_state, sbused(&ep->com.so->so_rcv));
752 		break;
753 	}
754 	if (disconnect)
755 		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
756 
757 }
758 
759 static void
760 process_connected(struct c4iw_ep *ep)
761 {
762 
763 	if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) {
764 		if (send_mpa_req(ep))
765 			goto err;
766 	}
767 	else {
768 		connect_reply_upcall(ep, -ep->com.so->so_error);
769 		goto err;
770 	}
771 	return;
772 err:
773 	close_socket(&ep->com, 0);
774 	state_set(&ep->com, DEAD);
775 	c4iw_put_ep(&ep->com);
776 	return;
777 }
778 
779 void
780 process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so)
781 {
782 	struct c4iw_ep *child_ep;
783 	struct sockaddr_in *local;
784 	struct sockaddr_in *remote;
785 	struct c4iw_ep *parent_ep = parent_cm_id->provider_data;
786 	int ret = 0;
787 
788 	if (!child_so) {
789 		CTR4(KTR_IW_CXGBE,
790 		    "%s: parent so %p, parent ep %p, child so %p, invalid so",
791 		    __func__, parent_ep->com.so, parent_ep, child_so);
792 		log(LOG_ERR, "%s: invalid child socket\n", __func__);
793 		return;
794 	}
795 	child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT);
796 	if (!child_ep) {
797 		CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM",
798 		    __func__, parent_ep->com.so, parent_ep);
799 		log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__);
800 		return;
801 	}
802 	SOCKBUF_LOCK(&child_so->so_rcv);
803 	soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep);
804 	SOCKBUF_UNLOCK(&child_so->so_rcv);
805 
806 	CTR5(KTR_IW_CXGBE,
807 	    "%s: parent so %p, parent ep %p, child so %p, child ep %p",
808 	     __func__, parent_ep->com.so, parent_ep, child_so, child_ep);
809 
810 	in_getsockaddr(child_so, (struct sockaddr **)&local);
811 	in_getpeeraddr(child_so, (struct sockaddr **)&remote);
812 
813 	child_ep->com.local_addr = *local;
814 	child_ep->com.remote_addr = *remote;
815 	child_ep->com.dev = parent_ep->com.dev;
816 	child_ep->com.so = child_so;
817 	child_ep->com.cm_id = NULL;
818 	child_ep->com.thread = parent_ep->com.thread;
819 	child_ep->parent_ep = parent_ep;
820 
821 	free(local, M_SONAME);
822 	free(remote, M_SONAME);
823 
824 	c4iw_get_ep(&parent_ep->com);
825 	init_timer(&child_ep->timer);
826 	state_set(&child_ep->com, MPA_REQ_WAIT);
827 	START_EP_TIMER(child_ep);
828 
829 	/* maybe the request has already been queued up on the socket... */
830 	ret = process_mpa_request(child_ep);
831 	if (ret == 2)
832 		/* ABORT */
833 		c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL);
834 	else if (ret == 1)
835 		/* CLOSE */
836 		c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL);
837 
838 	return;
839 }
840 
841 static int
842 c4iw_so_upcall(struct socket *so, void *arg, int waitflag)
843 {
844 	struct c4iw_ep *ep = arg;
845 
846 	spin_lock(&req_lock);
847 
848 	CTR6(KTR_IW_CXGBE,
849 	    "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p",
850 	    __func__, so, so->so_state, ep, states[ep->com.state],
851 	    ep->com.entry.tqe_prev);
852 
853 	if (ep && ep->com.so && !ep->com.entry.tqe_prev) {
854 		KASSERT(ep->com.so == so, ("%s: XXX review.", __func__));
855 		c4iw_get_ep(&ep->com);
856 		TAILQ_INSERT_TAIL(&req_list, &ep->com, entry);
857 		queue_work(c4iw_taskq, &c4iw_task);
858 	}
859 
860 	spin_unlock(&req_lock);
861 	return (SU_OK);
862 }
863 
864 static void
865 process_socket_event(struct c4iw_ep *ep)
866 {
867 	int state = state_read(&ep->com);
868 	struct socket *so = ep->com.so;
869 
870 	CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, "
871 	    "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state,
872 	    so->so_error, so->so_rcv.sb_state, ep, states[state]);
873 
874 	if (state == CONNECTING) {
875 		process_connected(ep);
876 		return;
877 	}
878 
879 	if (state == LISTEN) {
880 		/* socket listening events are handled at IWCM */
881 		CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__,
882 			    ep->com.state, ep);
883 		BUG();
884 		return;
885 	}
886 
887 	/* connection error */
888 	if (so->so_error) {
889 		process_conn_error(ep);
890 		return;
891 	}
892 
893 	/* peer close */
894 	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state <= CLOSING) {
895 		process_peer_close(ep);
896 		/*
897 		 * check whether socket disconnect event is pending before
898 		 * returning. Fallthrough if yes.
899 		 */
900 		if (!(so->so_state & SS_ISDISCONNECTED))
901 			return;
902 	}
903 
904 	/* close complete */
905 	if (so->so_state & SS_ISDISCONNECTED) {
906 		process_close_complete(ep);
907 		return;
908 	}
909 
910 	/* rx data */
911 	process_data(ep);
912 }
913 
914 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters");
915 
916 static int dack_mode = 0;
917 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0,
918 		"Delayed ack mode (default = 0)");
919 
920 int c4iw_max_read_depth = 8;
921 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0,
922 		"Per-connection max ORD/IRD (default = 8)");
923 
924 static int enable_tcp_timestamps;
925 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0,
926 		"Enable tcp timestamps (default = 0)");
927 
928 static int enable_tcp_sack;
929 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0,
930 		"Enable tcp SACK (default = 0)");
931 
932 static int enable_tcp_window_scaling = 1;
933 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0,
934 		"Enable tcp window scaling (default = 1)");
935 
936 int c4iw_debug = 1;
937 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0,
938 		"Enable debug logging (default = 0)");
939 
940 static int peer2peer = 1;
941 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0,
942 		"Support peer2peer ULPs (default = 1)");
943 
944 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
945 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0,
946 		"RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)");
947 
948 static int ep_timeout_secs = 60;
949 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0,
950 		"CM Endpoint operation timeout in seconds (default = 60)");
951 
952 static int mpa_rev = 1;
953 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0,
954 		"MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)");
955 
956 static int markers_enabled;
957 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0,
958 		"Enable MPA MARKERS (default(0) = disabled)");
959 
960 static int crc_enabled = 1;
961 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0,
962 		"Enable MPA CRC (default(1) = enabled)");
963 
964 static int rcv_win = 256 * 1024;
965 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0,
966 		"TCP receive window in bytes (default = 256KB)");
967 
968 static int snd_win = 128 * 1024;
969 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0,
970 		"TCP send window in bytes (default = 128KB)");
971 
972 static void
973 start_ep_timer(struct c4iw_ep *ep)
974 {
975 
976 	if (timer_pending(&ep->timer)) {
977 		CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep);
978 		printk(KERN_ERR "%s timer already started! ep %p\n", __func__,
979 		    ep);
980 		return;
981 	}
982 	clear_bit(TIMEOUT, &ep->com.flags);
983 	c4iw_get_ep(&ep->com);
984 	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
985 	ep->timer.data = (unsigned long)ep;
986 	ep->timer.function = ep_timeout;
987 	add_timer(&ep->timer);
988 }
989 
990 static int
991 stop_ep_timer(struct c4iw_ep *ep)
992 {
993 
994 	del_timer_sync(&ep->timer);
995 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
996 		c4iw_put_ep(&ep->com);
997 		return 0;
998 	}
999 	return 1;
1000 }
1001 
1002 static enum
1003 c4iw_ep_state state_read(struct c4iw_ep_common *epc)
1004 {
1005 	enum c4iw_ep_state state;
1006 
1007 	mutex_lock(&epc->mutex);
1008 	state = epc->state;
1009 	mutex_unlock(&epc->mutex);
1010 
1011 	return (state);
1012 }
1013 
1014 static void
1015 __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
1016 {
1017 
1018 	epc->state = new;
1019 }
1020 
1021 static void
1022 state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
1023 {
1024 
1025 	mutex_lock(&epc->mutex);
1026 	__state_set(epc, new);
1027 	mutex_unlock(&epc->mutex);
1028 }
1029 
1030 static void *
1031 alloc_ep(int size, gfp_t gfp)
1032 {
1033 	struct c4iw_ep_common *epc;
1034 
1035 	epc = kzalloc(size, gfp);
1036 	if (epc == NULL)
1037 		return (NULL);
1038 
1039 	kref_init(&epc->kref);
1040 	mutex_init(&epc->mutex);
1041 	mutex_init(&epc->so_mutex);
1042 	c4iw_init_wr_wait(&epc->wr_wait);
1043 
1044 	return (epc);
1045 }
1046 
1047 void
1048 __free_ep(struct c4iw_ep_common *epc)
1049 {
1050 	CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc);
1051 	KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so));
1052 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc));
1053 	free(epc, M_DEVBUF);
1054 	CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc);
1055 }
1056 
1057 void _c4iw_free_ep(struct kref *kref)
1058 {
1059 	struct c4iw_ep *ep;
1060 	struct c4iw_ep_common *epc;
1061 
1062 	ep = container_of(kref, struct c4iw_ep, com.kref);
1063 	epc = &ep->com;
1064 	KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list",
1065 	    __func__, epc));
1066 	if (test_bit(QP_REFERENCED, &ep->com.flags))
1067 		deref_qp(ep);
1068 	kfree(ep);
1069 }
1070 
1071 static void release_ep_resources(struct c4iw_ep *ep)
1072 {
1073 	CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep);
1074 	set_bit(RELEASE_RESOURCES, &ep->com.flags);
1075 	c4iw_put_ep(&ep->com);
1076 	CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep);
1077 }
1078 
1079 static int
1080 send_mpa_req(struct c4iw_ep *ep)
1081 {
1082 	int mpalen;
1083 	struct mpa_message *mpa;
1084 	struct mpa_v2_conn_params mpa_v2_params;
1085 	struct mbuf *m;
1086 	char mpa_rev_to_use = mpa_rev;
1087 	int err = 0;
1088 
1089 	if (ep->retry_with_mpa_v1)
1090 		mpa_rev_to_use = 1;
1091 	mpalen = sizeof(*mpa) + ep->plen;
1092 	if (mpa_rev_to_use == 2)
1093 		mpalen += sizeof(struct mpa_v2_conn_params);
1094 
1095 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1096 	if (mpa == NULL) {
1097 		err = -ENOMEM;
1098 		CTR3(KTR_IW_CXGBE, "%s:smr1 ep: %p , error: %d",
1099 				__func__, ep, err);
1100 		goto err;
1101 	}
1102 
1103 	memset(mpa, 0, mpalen);
1104 	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
1105 	mpa->flags = (crc_enabled ? MPA_CRC : 0) |
1106 		(markers_enabled ? MPA_MARKERS : 0) |
1107 		(mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
1108 	mpa->private_data_size = htons(ep->plen);
1109 	mpa->revision = mpa_rev_to_use;
1110 
1111 	if (mpa_rev_to_use == 1) {
1112 		ep->tried_with_mpa_v1 = 1;
1113 		ep->retry_with_mpa_v1 = 0;
1114 	}
1115 
1116 	if (mpa_rev_to_use == 2) {
1117 		mpa->private_data_size +=
1118 			htons(sizeof(struct mpa_v2_conn_params));
1119 		mpa_v2_params.ird = htons((u16)ep->ird);
1120 		mpa_v2_params.ord = htons((u16)ep->ord);
1121 
1122 		if (peer2peer) {
1123 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1124 
1125 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1126 				mpa_v2_params.ord |=
1127 				    htons(MPA_V2_RDMA_WRITE_RTR);
1128 			} else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1129 				mpa_v2_params.ord |=
1130 					htons(MPA_V2_RDMA_READ_RTR);
1131 			}
1132 		}
1133 		memcpy(mpa->private_data, &mpa_v2_params,
1134 			sizeof(struct mpa_v2_conn_params));
1135 
1136 		if (ep->plen) {
1137 
1138 			memcpy(mpa->private_data +
1139 				sizeof(struct mpa_v2_conn_params),
1140 				ep->mpa_pkt + sizeof(*mpa), ep->plen);
1141 		}
1142 	} else {
1143 
1144 		if (ep->plen)
1145 			memcpy(mpa->private_data,
1146 					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1147 		CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep);
1148 	}
1149 
1150 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1151 	if (m == NULL) {
1152 		err = -ENOMEM;
1153 		CTR3(KTR_IW_CXGBE, "%s:smr2 ep: %p , error: %d",
1154 				__func__, ep, err);
1155 		free(mpa, M_CXGBE);
1156 		goto err;
1157 	}
1158 	m_copyback(m, 0, mpalen, (void *)mpa);
1159 	free(mpa, M_CXGBE);
1160 
1161 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1162 			ep->com.thread);
1163 	if (err) {
1164 		CTR3(KTR_IW_CXGBE, "%s:smr3 ep: %p , error: %d",
1165 				__func__, ep, err);
1166 		goto err;
1167 	}
1168 
1169 	START_EP_TIMER(ep);
1170 	state_set(&ep->com, MPA_REQ_SENT);
1171 	ep->mpa_attr.initiator = 1;
1172 	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1173 	return 0;
1174 err:
1175 	connect_reply_upcall(ep, err);
1176 	CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err);
1177 	return err;
1178 }
1179 
1180 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1181 {
1182 	int mpalen ;
1183 	struct mpa_message *mpa;
1184 	struct mpa_v2_conn_params mpa_v2_params;
1185 	struct mbuf *m;
1186 	int err;
1187 
1188 	CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid,
1189 	    ep->plen);
1190 
1191 	mpalen = sizeof(*mpa) + plen;
1192 
1193 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1194 
1195 		mpalen += sizeof(struct mpa_v2_conn_params);
1196 		CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep,
1197 		    ep->mpa_attr.version, mpalen);
1198 	}
1199 
1200 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1201 	if (mpa == NULL)
1202 		return (-ENOMEM);
1203 
1204 	memset(mpa, 0, mpalen);
1205 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1206 	mpa->flags = MPA_REJECT;
1207 	mpa->revision = mpa_rev;
1208 	mpa->private_data_size = htons(plen);
1209 
1210 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1211 
1212 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1213 		mpa->private_data_size +=
1214 			htons(sizeof(struct mpa_v2_conn_params));
1215 		mpa_v2_params.ird = htons(((u16)ep->ird) |
1216 				(peer2peer ? MPA_V2_PEER2PEER_MODEL :
1217 				 0));
1218 		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1219 					(p2p_type ==
1220 					 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1221 					 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1222 					 FW_RI_INIT_P2PTYPE_READ_REQ ?
1223 					 MPA_V2_RDMA_READ_RTR : 0) : 0));
1224 		memcpy(mpa->private_data, &mpa_v2_params,
1225 				sizeof(struct mpa_v2_conn_params));
1226 
1227 		if (ep->plen)
1228 			memcpy(mpa->private_data +
1229 					sizeof(struct mpa_v2_conn_params), pdata, plen);
1230 		CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep,
1231 		    mpa_v2_params.ird, mpa_v2_params.ord, ep->plen);
1232 	} else
1233 		if (plen)
1234 			memcpy(mpa->private_data, pdata, plen);
1235 
1236 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1237 	if (m == NULL) {
1238 		free(mpa, M_CXGBE);
1239 		return (-ENOMEM);
1240 	}
1241 	m_copyback(m, 0, mpalen, (void *)mpa);
1242 	free(mpa, M_CXGBE);
1243 
1244 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread);
1245 	if (!err)
1246 		ep->snd_seq += mpalen;
1247 	CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err);
1248 	return err;
1249 }
1250 
1251 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1252 {
1253 	int mpalen;
1254 	struct mpa_message *mpa;
1255 	struct mbuf *m;
1256 	struct mpa_v2_conn_params mpa_v2_params;
1257 	int err;
1258 
1259 	CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep);
1260 
1261 	mpalen = sizeof(*mpa) + plen;
1262 
1263 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1264 
1265 		CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep,
1266 		    ep->mpa_attr.version);
1267 		mpalen += sizeof(struct mpa_v2_conn_params);
1268 	}
1269 
1270 	mpa = malloc(mpalen, M_CXGBE, M_NOWAIT);
1271 	if (mpa == NULL)
1272 		return (-ENOMEM);
1273 
1274 	memset(mpa, 0, sizeof(*mpa));
1275 	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1276 	mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
1277 		(markers_enabled ? MPA_MARKERS : 0);
1278 	mpa->revision = ep->mpa_attr.version;
1279 	mpa->private_data_size = htons(plen);
1280 
1281 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1282 
1283 		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1284 		mpa->private_data_size +=
1285 			htons(sizeof(struct mpa_v2_conn_params));
1286 		mpa_v2_params.ird = htons((u16)ep->ird);
1287 		mpa_v2_params.ord = htons((u16)ep->ord);
1288 		CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep,
1289 		    ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord);
1290 
1291 		if (peer2peer && (ep->mpa_attr.p2p_type !=
1292 			FW_RI_INIT_P2PTYPE_DISABLED)) {
1293 
1294 			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1295 
1296 			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) {
1297 
1298 				mpa_v2_params.ord |=
1299 					htons(MPA_V2_RDMA_WRITE_RTR);
1300 				CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d",
1301 				    __func__, ep, p2p_type, mpa_v2_params.ird,
1302 				    mpa_v2_params.ord);
1303 			}
1304 			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) {
1305 
1306 				mpa_v2_params.ord |=
1307 					htons(MPA_V2_RDMA_READ_RTR);
1308 				CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d",
1309 				    __func__, ep, p2p_type, mpa_v2_params.ird,
1310 				    mpa_v2_params.ord);
1311 			}
1312 		}
1313 
1314 		memcpy(mpa->private_data, &mpa_v2_params,
1315 			sizeof(struct mpa_v2_conn_params));
1316 
1317 		if (ep->plen)
1318 			memcpy(mpa->private_data +
1319 				sizeof(struct mpa_v2_conn_params), pdata, plen);
1320 	} else
1321 		if (plen)
1322 			memcpy(mpa->private_data, pdata, plen);
1323 
1324 	m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA);
1325 	if (m == NULL) {
1326 		free(mpa, M_CXGBE);
1327 		return (-ENOMEM);
1328 	}
1329 	m_copyback(m, 0, mpalen, (void *)mpa);
1330 	free(mpa, M_CXGBE);
1331 
1332 
1333 	state_set(&ep->com, MPA_REP_SENT);
1334 	ep->snd_seq += mpalen;
1335 	err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT,
1336 			ep->com.thread);
1337 	CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err);
1338 	return err;
1339 }
1340 
1341 
1342 
1343 static void close_complete_upcall(struct c4iw_ep *ep, int status)
1344 {
1345 	struct iw_cm_event event;
1346 
1347 	CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep);
1348 	memset(&event, 0, sizeof(event));
1349 	event.event = IW_CM_EVENT_CLOSE;
1350 	event.status = status;
1351 
1352 	if (ep->com.cm_id) {
1353 
1354 		CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep);
1355 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1356 		deref_cm_id(&ep->com);
1357 		set_bit(CLOSE_UPCALL, &ep->com.history);
1358 	}
1359 	CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep);
1360 }
1361 
1362 static int send_abort(struct c4iw_ep *ep)
1363 {
1364 	int err;
1365 
1366 	CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep);
1367 	abort_socket(ep);
1368 
1369 	/*
1370 	 * Since socket options were set as l_onoff=1 and l_linger=0 in in
1371 	 * abort_socket, invoking soclose here sends a RST (reset) to the peer.
1372 	 */
1373 	err = close_socket(&ep->com, 1);
1374 	set_bit(ABORT_CONN, &ep->com.history);
1375 	CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep);
1376 
1377 	/*
1378 	 * TBD: iw_cgbe driver should receive ABORT reply for every ABORT
1379 	 * request it has sent. But the current TOE driver is not propagating
1380 	 * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work-
1381 	 * around de-refer 'ep' (which was refered before sending ABORT request)
1382 	 * here instead of doing it in abort_rpl() handler of iw_cxgbe driver.
1383 	 */
1384 	c4iw_put_ep(&ep->com);
1385 	return err;
1386 }
1387 
1388 static void peer_close_upcall(struct c4iw_ep *ep)
1389 {
1390 	struct iw_cm_event event;
1391 
1392 	CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep);
1393 	memset(&event, 0, sizeof(event));
1394 	event.event = IW_CM_EVENT_DISCONNECT;
1395 
1396 	if (ep->com.cm_id) {
1397 
1398 		CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep);
1399 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1400 		set_bit(DISCONN_UPCALL, &ep->com.history);
1401 	}
1402 	CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep);
1403 }
1404 
1405 static void peer_abort_upcall(struct c4iw_ep *ep)
1406 {
1407 	struct iw_cm_event event;
1408 
1409 	CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep);
1410 	memset(&event, 0, sizeof(event));
1411 	event.event = IW_CM_EVENT_CLOSE;
1412 	event.status = -ECONNRESET;
1413 
1414 	if (ep->com.cm_id) {
1415 
1416 		CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep);
1417 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1418 		deref_cm_id(&ep->com);
1419 		set_bit(ABORT_UPCALL, &ep->com.history);
1420 	}
1421 	CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep);
1422 }
1423 
1424 static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1425 {
1426 	struct iw_cm_event event;
1427 
1428 	CTR3(KTR_IW_CXGBE, "%s:cruB %p, status: %d", __func__, ep, status);
1429 	memset(&event, 0, sizeof(event));
1430 	event.event = IW_CM_EVENT_CONNECT_REPLY;
1431 	event.status = ((status == -ECONNABORTED) || (status == -EPIPE)) ?
1432 					-ECONNRESET : status;
1433 	event.local_addr = ep->com.local_addr;
1434 	event.remote_addr = ep->com.remote_addr;
1435 
1436 	if ((status == 0) || (status == -ECONNREFUSED)) {
1437 
1438 		if (!ep->tried_with_mpa_v1) {
1439 
1440 			CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep);
1441 			/* this means MPA_v2 is used */
1442 			event.private_data_len = ep->plen -
1443 				sizeof(struct mpa_v2_conn_params);
1444 			event.private_data = ep->mpa_pkt +
1445 				sizeof(struct mpa_message) +
1446 				sizeof(struct mpa_v2_conn_params);
1447 		} else {
1448 
1449 			CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep);
1450 			/* this means MPA_v1 is used */
1451 			event.private_data_len = ep->plen;
1452 			event.private_data = ep->mpa_pkt +
1453 				sizeof(struct mpa_message);
1454 		}
1455 	}
1456 
1457 	if (ep->com.cm_id) {
1458 
1459 		CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep);
1460 		set_bit(CONN_RPL_UPCALL, &ep->com.history);
1461 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1462 	}
1463 
1464 	if(status == -ECONNABORTED) {
1465 
1466 		CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status);
1467 		return;
1468 	}
1469 
1470 	if (status < 0) {
1471 
1472 		CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status);
1473 		deref_cm_id(&ep->com);
1474 	}
1475 
1476 	CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep);
1477 }
1478 
1479 static int connect_request_upcall(struct c4iw_ep *ep)
1480 {
1481 	struct iw_cm_event event;
1482 	int ret;
1483 
1484 	CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep,
1485 	    ep->tried_with_mpa_v1);
1486 
1487 	memset(&event, 0, sizeof(event));
1488 	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1489 	event.local_addr = ep->com.local_addr;
1490 	event.remote_addr = ep->com.remote_addr;
1491 	event.provider_data = ep;
1492 	event.so = ep->com.so;
1493 
1494 	if (!ep->tried_with_mpa_v1) {
1495 		/* this means MPA_v2 is used */
1496 		event.ord = ep->ord;
1497 		event.ird = ep->ird;
1498 		event.private_data_len = ep->plen -
1499 			sizeof(struct mpa_v2_conn_params);
1500 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1501 			sizeof(struct mpa_v2_conn_params);
1502 	} else {
1503 
1504 		/* this means MPA_v1 is used. Send max supported */
1505 		event.ord = c4iw_max_read_depth;
1506 		event.ird = c4iw_max_read_depth;
1507 		event.private_data_len = ep->plen;
1508 		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1509 	}
1510 
1511 	c4iw_get_ep(&ep->com);
1512 	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1513 	    &event);
1514 	if(ret)
1515 		c4iw_put_ep(&ep->com);
1516 
1517 	set_bit(CONNREQ_UPCALL, &ep->com.history);
1518 	c4iw_put_ep(&ep->parent_ep->com);
1519 	return ret;
1520 }
1521 
1522 static void established_upcall(struct c4iw_ep *ep)
1523 {
1524 	struct iw_cm_event event;
1525 
1526 	CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep);
1527 	memset(&event, 0, sizeof(event));
1528 	event.event = IW_CM_EVENT_ESTABLISHED;
1529 	event.ird = ep->ird;
1530 	event.ord = ep->ord;
1531 
1532 	if (ep->com.cm_id) {
1533 
1534 		CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep);
1535 		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1536 		set_bit(ESTAB_UPCALL, &ep->com.history);
1537 	}
1538 	CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep);
1539 }
1540 
1541 
1542 /*
1543  * process_mpa_reply - process streaming mode MPA reply
1544  *
1545  * Returns:
1546  *
1547  * 0 upon success indicating a connect request was delivered to the ULP
1548  * or the mpa request is incomplete but valid so far.
1549  *
1550  * 1 if a failure requires the caller to close the connection.
1551  *
1552  * 2 if a failure requires the caller to abort the connection.
1553  */
1554 static int process_mpa_reply(struct c4iw_ep *ep)
1555 {
1556 	struct mpa_message *mpa;
1557 	struct mpa_v2_conn_params *mpa_v2_params;
1558 	u16 plen;
1559 	u16 resp_ird, resp_ord;
1560 	u8 rtr_mismatch = 0, insuff_ird = 0;
1561 	struct c4iw_qp_attributes attrs;
1562 	enum c4iw_qp_attr_mask mask;
1563 	int err;
1564 	struct mbuf *top, *m;
1565 	int flags = MSG_DONTWAIT;
1566 	struct uio uio;
1567 	int disconnect = 0;
1568 
1569 	CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep);
1570 
1571 	/*
1572 	 * Stop mpa timer.  If it expired, then
1573 	 * we ignore the MPA reply.  process_timeout()
1574 	 * will abort the connection.
1575 	 */
1576 	if (STOP_EP_TIMER(ep))
1577 		return 0;
1578 
1579 	uio.uio_resid = 1000000;
1580 	uio.uio_td = ep->com.thread;
1581 	err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags);
1582 
1583 	if (err) {
1584 
1585 		if (err == EWOULDBLOCK) {
1586 
1587 			CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep);
1588 			START_EP_TIMER(ep);
1589 			return 0;
1590 		}
1591 		err = -err;
1592 		CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep);
1593 		goto err;
1594 	}
1595 
1596 	if (ep->com.so->so_rcv.sb_mb) {
1597 
1598 		CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep);
1599 		printf("%s data after soreceive called! so %p sb_mb %p top %p\n",
1600 		       __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top);
1601 	}
1602 
1603 	m = top;
1604 
1605 	do {
1606 
1607 		CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep);
1608 		/*
1609 		 * If we get more than the supported amount of private data
1610 		 * then we must fail this connection.
1611 		 */
1612 		if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) {
1613 
1614 			CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep,
1615 			    ep->mpa_pkt_len + m->m_len);
1616 			err = (-EINVAL);
1617 			goto err_stop_timer;
1618 		}
1619 
1620 		/*
1621 		 * copy the new data into our accumulation buffer.
1622 		 */
1623 		m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len]));
1624 		ep->mpa_pkt_len += m->m_len;
1625 		if (!m->m_next)
1626 			m = m->m_nextpkt;
1627 		else
1628 			m = m->m_next;
1629 	} while (m);
1630 
1631 	m_freem(top);
1632 	/*
1633 	 * if we don't even have the mpa message, then bail.
1634 	 */
1635 	if (ep->mpa_pkt_len < sizeof(*mpa)) {
1636 		return 0;
1637 	}
1638 	mpa = (struct mpa_message *) ep->mpa_pkt;
1639 
1640 	/* Validate MPA header. */
1641 	if (mpa->revision > mpa_rev) {
1642 
1643 		CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep,
1644 		    mpa->revision, mpa_rev);
1645 		printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, "
1646 				" Received = %d\n", __func__, mpa_rev, mpa->revision);
1647 		err = -EPROTO;
1648 		goto err_stop_timer;
1649 	}
1650 
1651 	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1652 
1653 		CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep);
1654 		err = -EPROTO;
1655 		goto err_stop_timer;
1656 	}
1657 
1658 	plen = ntohs(mpa->private_data_size);
1659 
1660 	/*
1661 	 * Fail if there's too much private data.
1662 	 */
1663 	if (plen > MPA_MAX_PRIVATE_DATA) {
1664 
1665 		CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep);
1666 		err = -EPROTO;
1667 		goto err_stop_timer;
1668 	}
1669 
1670 	/*
1671 	 * If plen does not account for pkt size
1672 	 */
1673 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1674 
1675 		CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep);
1676 		STOP_EP_TIMER(ep);
1677 		err = -EPROTO;
1678 		goto err_stop_timer;
1679 	}
1680 
1681 	ep->plen = (u8) plen;
1682 
1683 	/*
1684 	 * If we don't have all the pdata yet, then bail.
1685 	 * We'll continue process when more data arrives.
1686 	 */
1687 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) {
1688 
1689 		CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep);
1690 		return 0;
1691 	}
1692 
1693 	if (mpa->flags & MPA_REJECT) {
1694 
1695 		CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep);
1696 		err = -ECONNREFUSED;
1697 		goto err_stop_timer;
1698 	}
1699 
1700 	/*
1701 	 * If we get here we have accumulated the entire mpa
1702 	 * start reply message including private data. And
1703 	 * the MPA header is valid.
1704 	 */
1705 	state_set(&ep->com, FPDU_MODE);
1706 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1707 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1708 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1709 	ep->mpa_attr.version = mpa->revision;
1710 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1711 
1712 	if (mpa->revision == 2) {
1713 
1714 		CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep);
1715 		ep->mpa_attr.enhanced_rdma_conn =
1716 			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1717 
1718 		if (ep->mpa_attr.enhanced_rdma_conn) {
1719 
1720 			CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep);
1721 			mpa_v2_params = (struct mpa_v2_conn_params *)
1722 				(ep->mpa_pkt + sizeof(*mpa));
1723 			resp_ird = ntohs(mpa_v2_params->ird) &
1724 				MPA_V2_IRD_ORD_MASK;
1725 			resp_ord = ntohs(mpa_v2_params->ord) &
1726 				MPA_V2_IRD_ORD_MASK;
1727 
1728 			/*
1729 			 * This is a double-check. Ideally, below checks are
1730 			 * not required since ird/ord stuff has been taken
1731 			 * care of in c4iw_accept_cr
1732 			 */
1733 			if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1734 
1735 				CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep);
1736 				err = -ENOMEM;
1737 				ep->ird = resp_ord;
1738 				ep->ord = resp_ird;
1739 				insuff_ird = 1;
1740 			}
1741 
1742 			if (ntohs(mpa_v2_params->ird) &
1743 				MPA_V2_PEER2PEER_MODEL) {
1744 
1745 				CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep);
1746 				if (ntohs(mpa_v2_params->ord) &
1747 					MPA_V2_RDMA_WRITE_RTR) {
1748 
1749 					CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep);
1750 					ep->mpa_attr.p2p_type =
1751 						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1752 				}
1753 				else if (ntohs(mpa_v2_params->ord) &
1754 					MPA_V2_RDMA_READ_RTR) {
1755 
1756 					CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep);
1757 					ep->mpa_attr.p2p_type =
1758 						FW_RI_INIT_P2PTYPE_READ_REQ;
1759 				}
1760 			}
1761 		}
1762 	} else {
1763 
1764 		CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep);
1765 
1766 		if (mpa->revision == 1) {
1767 
1768 			CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep);
1769 
1770 			if (peer2peer) {
1771 
1772 				CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep);
1773 				ep->mpa_attr.p2p_type = p2p_type;
1774 			}
1775 		}
1776 	}
1777 
1778 	if (set_tcpinfo(ep)) {
1779 
1780 		CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep);
1781 		printf("%s set_tcpinfo error\n", __func__);
1782 		err = -ECONNRESET;
1783 		goto err;
1784 	}
1785 
1786 	CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, "
1787 	    "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__,
1788 	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1789 	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1790 	    ep->mpa_attr.p2p_type);
1791 
1792 	/*
1793 	 * If responder's RTR does not match with that of initiator, assign
1794 	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1795 	 * generated when moving QP to RTS state.
1796 	 * A TERM message will be sent after QP has moved to RTS state
1797 	 */
1798 	if ((ep->mpa_attr.version == 2) && peer2peer &&
1799 		(ep->mpa_attr.p2p_type != p2p_type)) {
1800 
1801 		CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep);
1802 		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1803 		rtr_mismatch = 1;
1804 	}
1805 
1806 
1807 	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
1808 	attrs.mpa_attr = ep->mpa_attr;
1809 	attrs.max_ird = ep->ird;
1810 	attrs.max_ord = ep->ord;
1811 	attrs.llp_stream_handle = ep;
1812 	attrs.next_state = C4IW_QP_STATE_RTS;
1813 
1814 	mask = C4IW_QP_ATTR_NEXT_STATE |
1815 		C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1816 		C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1817 
1818 	/* bind QP and TID with INIT_WR */
1819 	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
1820 
1821 	if (err) {
1822 
1823 		CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep);
1824 		goto err;
1825 	}
1826 
1827 	/*
1828 	 * If responder's RTR requirement did not match with what initiator
1829 	 * supports, generate TERM message
1830 	 */
1831 	if (rtr_mismatch) {
1832 
1833 		CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep);
1834 		printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1835 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1836 		attrs.ecode = MPA_NOMATCH_RTR;
1837 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1838 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1839 			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1840 		err = -ENOMEM;
1841 		disconnect = 1;
1842 		goto out;
1843 	}
1844 
1845 	/*
1846 	 * Generate TERM if initiator IRD is not sufficient for responder
1847 	 * provided ORD. Currently, we do the same behaviour even when
1848 	 * responder provided IRD is also not sufficient as regards to
1849 	 * initiator ORD.
1850 	 */
1851 	if (insuff_ird) {
1852 
1853 		CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep);
1854 		printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1855 				__func__);
1856 		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1857 		attrs.ecode = MPA_INSUFF_IRD;
1858 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1859 		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1860 			C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1861 		err = -ENOMEM;
1862 		disconnect = 1;
1863 		goto out;
1864 	}
1865 	goto out;
1866 err_stop_timer:
1867 	STOP_EP_TIMER(ep);
1868 err:
1869 	disconnect = 2;
1870 out:
1871 	connect_reply_upcall(ep, err);
1872 	CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep);
1873 	return disconnect;
1874 }
1875 
1876 /*
1877  * process_mpa_request - process streaming mode MPA request
1878  *
1879  * Returns:
1880  *
1881  * 0 upon success indicating a connect request was delivered to the ULP
1882  * or the mpa request is incomplete but valid so far.
1883  *
1884  * 1 if a failure requires the caller to close the connection.
1885  *
1886  * 2 if a failure requires the caller to abort the connection.
1887  */
1888 static int
1889 process_mpa_request(struct c4iw_ep *ep)
1890 {
1891 	struct mpa_message *mpa;
1892 	u16 plen;
1893 	int flags = MSG_DONTWAIT;
1894 	int rc;
1895 	struct iovec iov;
1896 	struct uio uio;
1897 	enum c4iw_ep_state state = state_read(&ep->com);
1898 
1899 	CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]);
1900 
1901 	if (state != MPA_REQ_WAIT)
1902 		return 0;
1903 
1904 	iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len];
1905 	iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
1906 	uio.uio_iov = &iov;
1907 	uio.uio_iovcnt = 1;
1908 	uio.uio_offset = 0;
1909 	uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len;
1910 	uio.uio_segflg = UIO_SYSSPACE;
1911 	uio.uio_rw = UIO_READ;
1912 	uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */
1913 
1914 	rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags);
1915 	if (rc == EAGAIN)
1916 		return 0;
1917 	else if (rc)
1918 		goto err_stop_timer;
1919 
1920 	KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data",
1921 	    __func__, ep->com.so));
1922 	ep->mpa_pkt_len += uio.uio_offset;
1923 
1924 	/*
1925 	 * If we get more than the supported amount of private data then we must
1926 	 * fail this connection.  XXX: check so_rcv->sb_cc, or peek with another
1927 	 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last
1928 	 * byte is filled by the soreceive above.
1929 	 */
1930 
1931 	/* Don't even have the MPA message.  Wait for more data to arrive. */
1932 	if (ep->mpa_pkt_len < sizeof(*mpa))
1933 		return 0;
1934 	mpa = (struct mpa_message *) ep->mpa_pkt;
1935 
1936 	/*
1937 	 * Validate MPA Header.
1938 	 */
1939 	if (mpa->revision > mpa_rev) {
1940 		log(LOG_ERR, "%s: MPA version mismatch. Local = %d,"
1941 		    " Received = %d\n", __func__, mpa_rev, mpa->revision);
1942 		goto err_stop_timer;
1943 	}
1944 
1945 	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1946 		goto err_stop_timer;
1947 
1948 	/*
1949 	 * Fail if there's too much private data.
1950 	 */
1951 	plen = ntohs(mpa->private_data_size);
1952 	if (plen > MPA_MAX_PRIVATE_DATA)
1953 		goto err_stop_timer;
1954 
1955 	/*
1956 	 * If plen does not account for pkt size
1957 	 */
1958 	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1959 		goto err_stop_timer;
1960 
1961 	ep->plen = (u8) plen;
1962 
1963 	/*
1964 	 * If we don't have all the pdata yet, then bail.
1965 	 */
1966 	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1967 		return 0;
1968 
1969 	/*
1970 	 * If we get here we have accumulated the entire mpa
1971 	 * start reply message including private data.
1972 	 */
1973 	ep->mpa_attr.initiator = 0;
1974 	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1975 	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1976 	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1977 	ep->mpa_attr.version = mpa->revision;
1978 	if (mpa->revision == 1)
1979 		ep->tried_with_mpa_v1 = 1;
1980 	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1981 
1982 	if (mpa->revision == 2) {
1983 		ep->mpa_attr.enhanced_rdma_conn =
1984 		    mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1985 		if (ep->mpa_attr.enhanced_rdma_conn) {
1986 			struct mpa_v2_conn_params *mpa_v2_params;
1987 			u16 ird, ord;
1988 
1989 			mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)];
1990 			ird = ntohs(mpa_v2_params->ird);
1991 			ord = ntohs(mpa_v2_params->ord);
1992 
1993 			ep->ird = ird & MPA_V2_IRD_ORD_MASK;
1994 			ep->ord = ord & MPA_V2_IRD_ORD_MASK;
1995 			if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) {
1996 				if (ord & MPA_V2_RDMA_WRITE_RTR) {
1997 					ep->mpa_attr.p2p_type =
1998 					    FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1999 				} else if (ord & MPA_V2_RDMA_READ_RTR) {
2000 					ep->mpa_attr.p2p_type =
2001 					    FW_RI_INIT_P2PTYPE_READ_REQ;
2002 				}
2003 			}
2004 		}
2005 	} else if (mpa->revision == 1 && peer2peer)
2006 		ep->mpa_attr.p2p_type = p2p_type;
2007 
2008 	if (set_tcpinfo(ep))
2009 		goto err_stop_timer;
2010 
2011 	CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, "
2012 	    "xmit_marker_enabled = %d, version = %d", __func__,
2013 	    ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
2014 	    ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
2015 
2016 	state_set(&ep->com, MPA_REQ_RCVD);
2017 	STOP_EP_TIMER(ep);
2018 
2019 	/* drive upcall */
2020 	mutex_lock(&ep->parent_ep->com.mutex);
2021 	if (ep->parent_ep->com.state != DEAD) {
2022 		if (connect_request_upcall(ep))
2023 			goto err_unlock_parent;
2024 	} else
2025 		goto err_unlock_parent;
2026 	mutex_unlock(&ep->parent_ep->com.mutex);
2027 	return 0;
2028 
2029 err_unlock_parent:
2030 	mutex_unlock(&ep->parent_ep->com.mutex);
2031 	goto err_out;
2032 err_stop_timer:
2033 	STOP_EP_TIMER(ep);
2034 err_out:
2035 	return 2;
2036 }
2037 
2038 /*
2039  * Upcall from the adapter indicating data has been transmitted.
2040  * For us its just the single MPA request or reply.  We can now free
2041  * the skb holding the mpa message.
2042  */
2043 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2044 {
2045 	int err;
2046 	struct c4iw_ep *ep = to_ep(cm_id);
2047 	CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep);
2048 	int abort = 0;
2049 
2050 	if ((state_read(&ep->com) == DEAD) ||
2051 			(state_read(&ep->com) != MPA_REQ_RCVD)) {
2052 
2053 		CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep);
2054 		c4iw_put_ep(&ep->com);
2055 		return -ECONNRESET;
2056 	}
2057 	set_bit(ULP_REJECT, &ep->com.history);
2058 
2059 	if (mpa_rev == 0) {
2060 
2061 		CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep);
2062 		abort = 1;
2063 	}
2064 	else {
2065 
2066 		CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep);
2067 		abort = send_mpa_reject(ep, pdata, pdata_len);
2068 	}
2069 	stop_ep_timer(ep);
2070 	err = c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
2071 	c4iw_put_ep(&ep->com);
2072 	CTR3(KTR_IW_CXGBE, "%s:crc4 %p, err: %d", __func__, ep, err);
2073 	return 0;
2074 }
2075 
2076 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2077 {
2078 	int err;
2079 	struct c4iw_qp_attributes attrs;
2080 	enum c4iw_qp_attr_mask mask;
2081 	struct c4iw_ep *ep = to_ep(cm_id);
2082 	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
2083 	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
2084 	int abort = 0;
2085 
2086 	CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep);
2087 
2088 	if (state_read(&ep->com) == DEAD) {
2089 
2090 		CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep);
2091 		err = -ECONNRESET;
2092 		goto err_out;
2093 	}
2094 
2095 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2096 	BUG_ON(!qp);
2097 
2098 	set_bit(ULP_ACCEPT, &ep->com.history);
2099 
2100 	if ((conn_param->ord > c4iw_max_read_depth) ||
2101 		(conn_param->ird > c4iw_max_read_depth)) {
2102 
2103 		CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep);
2104 		err = -EINVAL;
2105 		goto err_abort;
2106 	}
2107 
2108 	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
2109 
2110 		CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep);
2111 
2112 		if (conn_param->ord > ep->ird) {
2113 
2114 			CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep);
2115 			ep->ird = conn_param->ird;
2116 			ep->ord = conn_param->ord;
2117 			send_mpa_reject(ep, conn_param->private_data,
2118 					conn_param->private_data_len);
2119 			err = -ENOMEM;
2120 			goto err_abort;
2121 		}
2122 
2123 		if (conn_param->ird > ep->ord) {
2124 
2125 			CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep);
2126 
2127 			if (!ep->ord) {
2128 
2129 				CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep);
2130 				conn_param->ird = 1;
2131 			}
2132 			else {
2133 				CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep);
2134 				err = -ENOMEM;
2135 				goto err_abort;
2136 			}
2137 		}
2138 
2139 	}
2140 	ep->ird = conn_param->ird;
2141 	ep->ord = conn_param->ord;
2142 
2143 	if (ep->mpa_attr.version != 2) {
2144 
2145 		CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep);
2146 
2147 		if (peer2peer && ep->ird == 0) {
2148 
2149 			CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep);
2150 			ep->ird = 1;
2151 		}
2152 	}
2153 
2154 
2155 	ep->com.cm_id = cm_id;
2156 	ref_cm_id(&ep->com);
2157 	ep->com.qp = qp;
2158 	ref_qp(ep);
2159 	//ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq;
2160 
2161 	/* bind QP to EP and move to RTS */
2162 	attrs.mpa_attr = ep->mpa_attr;
2163 	attrs.max_ird = ep->ird;
2164 	attrs.max_ord = ep->ord;
2165 	attrs.llp_stream_handle = ep;
2166 	attrs.next_state = C4IW_QP_STATE_RTS;
2167 
2168 	/* bind QP and TID with INIT_WR */
2169 	mask = C4IW_QP_ATTR_NEXT_STATE |
2170 		C4IW_QP_ATTR_LLP_STREAM_HANDLE |
2171 		C4IW_QP_ATTR_MPA_ATTR |
2172 		C4IW_QP_ATTR_MAX_IRD |
2173 		C4IW_QP_ATTR_MAX_ORD;
2174 
2175 	err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1);
2176 
2177 	if (err) {
2178 
2179 		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
2180 		goto err_defef_cm_id;
2181 	}
2182 	err = send_mpa_reply(ep, conn_param->private_data,
2183 			conn_param->private_data_len);
2184 
2185 	if (err) {
2186 
2187 		CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep);
2188 		goto err_defef_cm_id;
2189 	}
2190 
2191 	state_set(&ep->com, FPDU_MODE);
2192 	established_upcall(ep);
2193 	c4iw_put_ep(&ep->com);
2194 	CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep);
2195 	return 0;
2196 err_defef_cm_id:
2197 	deref_cm_id(&ep->com);
2198 err_abort:
2199 	abort = 1;
2200 err_out:
2201 	if (abort)
2202 		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2203 	c4iw_put_ep(&ep->com);
2204 	CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep);
2205 	return err;
2206 }
2207 
2208 
2209 
2210 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2211 {
2212 	int err = 0;
2213 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2214 	struct c4iw_ep *ep = NULL;
2215 	struct nhop4_extended nh4;
2216 	struct toedev *tdev;
2217 
2218 	CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id);
2219 
2220 	if ((conn_param->ord > c4iw_max_read_depth) ||
2221 		(conn_param->ird > c4iw_max_read_depth)) {
2222 
2223 		CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id);
2224 		err = -EINVAL;
2225 		goto out;
2226 	}
2227 	ep = alloc_ep(sizeof(*ep), M_NOWAIT);
2228 
2229 	if (!ep) {
2230 
2231 		CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id);
2232 		printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__);
2233 		err = -ENOMEM;
2234 		goto out;
2235 	}
2236 	init_timer(&ep->timer);
2237 	ep->plen = conn_param->private_data_len;
2238 
2239 	if (ep->plen) {
2240 
2241 		CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep);
2242 		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
2243 				conn_param->private_data, ep->plen);
2244 	}
2245 	ep->ird = conn_param->ird;
2246 	ep->ord = conn_param->ord;
2247 
2248 	if (peer2peer && ep->ord == 0) {
2249 
2250 		CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep);
2251 		ep->ord = 1;
2252 	}
2253 
2254 	ep->com.dev = dev;
2255 	ep->com.cm_id = cm_id;
2256 	ref_cm_id(&ep->com);
2257 	ep->com.qp = get_qhp(dev, conn_param->qpn);
2258 
2259 	if (!ep->com.qp) {
2260 
2261 		CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep);
2262 		err = -EINVAL;
2263 		goto fail2;
2264 	}
2265 	ref_qp(ep);
2266 	ep->com.thread = curthread;
2267 	ep->com.so = cm_id->so;
2268 
2269 	init_sock(&ep->com);
2270 
2271 	/* find a route */
2272 	err = find_route(
2273 		cm_id->local_addr.sin_addr.s_addr,
2274 		cm_id->remote_addr.sin_addr.s_addr,
2275 		cm_id->local_addr.sin_port,
2276 		cm_id->remote_addr.sin_port, 0, &nh4);
2277 
2278 	if (err) {
2279 
2280 		CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep);
2281 		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
2282 		err = -EHOSTUNREACH;
2283 		goto fail2;
2284 	}
2285 
2286 	if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) {
2287 
2288 		CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep);
2289 		printf("%s - interface not TOE capable.\n", __func__);
2290 		close_socket(&ep->com, 0);
2291 		err = -ENOPROTOOPT;
2292 		goto fail3;
2293 	}
2294 	tdev = TOEDEV(nh4.nh_ifp);
2295 
2296 	if (tdev == NULL) {
2297 
2298 		CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep);
2299 		printf("%s - No toedev for interface.\n", __func__);
2300 		goto fail3;
2301 	}
2302 	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
2303 
2304 	state_set(&ep->com, CONNECTING);
2305 	ep->tos = 0;
2306 	ep->com.local_addr = cm_id->local_addr;
2307 	ep->com.remote_addr = cm_id->remote_addr;
2308 	err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr,
2309 		ep->com.thread);
2310 
2311 	if (!err) {
2312 		CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep);
2313 		goto out;
2314 	} else {
2315 		close_socket(&ep->com, 0);
2316 		goto fail2;
2317 	}
2318 
2319 fail3:
2320 	CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep);
2321 	fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4);
2322 fail2:
2323 	deref_cm_id(&ep->com);
2324 	c4iw_put_ep(&ep->com);
2325 out:
2326 	CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep);
2327 	return err;
2328 }
2329 
2330 /*
2331  * iwcm->create_listen_ep.  Returns -errno on failure.
2332  */
2333 int
2334 c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog)
2335 {
2336 	int rc;
2337 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2338 	struct c4iw_listen_ep *ep;
2339 	struct socket *so = cm_id->so;
2340 
2341 	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
2342 	CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__,
2343 	    cm_id, so, ep, so->so_pcb);
2344 	if (ep == NULL) {
2345 		log(LOG_ERR, "%s: failed to alloc memory for endpoint\n",
2346 		    __func__);
2347 		rc = ENOMEM;
2348 		goto failed;
2349 	}
2350 
2351 	ep->com.cm_id = cm_id;
2352 	ref_cm_id(&ep->com);
2353 	ep->com.dev = dev;
2354 	ep->backlog = backlog;
2355 	ep->com.local_addr = cm_id->local_addr;
2356 	ep->com.thread = curthread;
2357 	state_set(&ep->com, LISTEN);
2358 	ep->com.so = so;
2359 
2360 	cm_id->provider_data = ep;
2361 	return (0);
2362 
2363 failed:
2364 	CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc);
2365 	return (-rc);
2366 }
2367 
2368 void
2369 c4iw_destroy_listen_ep(struct iw_cm_id *cm_id)
2370 {
2371 	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
2372 
2373 	CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id,
2374 	    cm_id->so, states[ep->com.state]);
2375 
2376 	state_set(&ep->com, DEAD);
2377 	deref_cm_id(&ep->com);
2378 	c4iw_put_ep(&ep->com);
2379 
2380 	return;
2381 }
2382 
2383 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2384 {
2385 	int ret = 0;
2386 	int close = 0;
2387 	int fatal = 0;
2388 	struct c4iw_rdev *rdev;
2389 
2390 	mutex_lock(&ep->com.mutex);
2391 
2392 	CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep);
2393 
2394 	rdev = &ep->com.dev->rdev;
2395 
2396 	if (c4iw_fatal_error(rdev)) {
2397 
2398 		CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep);
2399 		fatal = 1;
2400 		close_complete_upcall(ep, -ECONNRESET);
2401 		ep->com.state = DEAD;
2402 	}
2403 	CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep,
2404 	    states[ep->com.state]);
2405 
2406 	switch (ep->com.state) {
2407 
2408 		case MPA_REQ_WAIT:
2409 		case MPA_REQ_SENT:
2410 		case MPA_REQ_RCVD:
2411 		case MPA_REP_SENT:
2412 		case FPDU_MODE:
2413 			close = 1;
2414 			if (abrupt)
2415 				ep->com.state = ABORTING;
2416 			else {
2417 				ep->com.state = CLOSING;
2418 				START_EP_TIMER(ep);
2419 			}
2420 			set_bit(CLOSE_SENT, &ep->com.flags);
2421 			break;
2422 
2423 		case CLOSING:
2424 
2425 			if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
2426 
2427 				close = 1;
2428 				if (abrupt) {
2429 					STOP_EP_TIMER(ep);
2430 					ep->com.state = ABORTING;
2431 				} else
2432 					ep->com.state = MORIBUND;
2433 			}
2434 			break;
2435 
2436 		case MORIBUND:
2437 		case ABORTING:
2438 		case DEAD:
2439 			CTR3(KTR_IW_CXGBE,
2440 			    "%s ignoring disconnect ep %p state %u", __func__,
2441 			    ep, ep->com.state);
2442 			break;
2443 
2444 		default:
2445 			BUG();
2446 			break;
2447 	}
2448 
2449 	mutex_unlock(&ep->com.mutex);
2450 
2451 	if (close) {
2452 
2453 		CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep);
2454 
2455 		if (abrupt) {
2456 
2457 			CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep);
2458 			set_bit(EP_DISC_ABORT, &ep->com.history);
2459 			close_complete_upcall(ep, -ECONNRESET);
2460 			ret = send_abort(ep);
2461 		} else {
2462 
2463 			CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep);
2464 			set_bit(EP_DISC_CLOSE, &ep->com.history);
2465 
2466 			if (!ep->parent_ep)
2467 				__state_set(&ep->com, MORIBUND);
2468 			ret = shutdown_socket(&ep->com);
2469 		}
2470 
2471 		if (ret) {
2472 
2473 			fatal = 1;
2474 		}
2475 	}
2476 
2477 	if (fatal) {
2478 		set_bit(EP_DISC_FAIL, &ep->com.history);
2479 		if (!abrupt) {
2480 			STOP_EP_TIMER(ep);
2481 			close_complete_upcall(ep, -EIO);
2482 		}
2483 		if (ep->com.qp) {
2484 			struct c4iw_qp_attributes attrs;
2485 
2486 			attrs.next_state = C4IW_QP_STATE_ERROR;
2487 			ret = c4iw_modify_qp(ep->com.dev, ep->com.qp,
2488 						C4IW_QP_ATTR_NEXT_STATE,
2489 						&attrs, 1);
2490 			if (ret) {
2491 				CTR2(KTR_IW_CXGBE, "%s:ced7 %p", __func__, ep);
2492 				printf("%s - qp <- error failed!\n", __func__);
2493 			}
2494 		}
2495 		release_ep_resources(ep);
2496 		ep->com.state = DEAD;
2497 		CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep);
2498 	}
2499 	CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep);
2500 	return ret;
2501 }
2502 
2503 #ifdef C4IW_EP_REDIRECT
2504 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
2505 		struct l2t_entry *l2t)
2506 {
2507 	struct c4iw_ep *ep = ctx;
2508 
2509 	if (ep->dst != old)
2510 		return 0;
2511 
2512 	PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new,
2513 			l2t);
2514 	dst_hold(new);
2515 	cxgb4_l2t_release(ep->l2t);
2516 	ep->l2t = l2t;
2517 	dst_release(old);
2518 	ep->dst = new;
2519 	return 1;
2520 }
2521 #endif
2522 
2523 
2524 
2525 static void ep_timeout(unsigned long arg)
2526 {
2527 	struct c4iw_ep *ep = (struct c4iw_ep *)arg;
2528 	int kickit = 0;
2529 
2530 	CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep);
2531 	spin_lock(&timeout_lock);
2532 
2533 	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
2534 
2535 		/*
2536 		 * Only insert if it is not already on the list.
2537 		 */
2538 		if (!ep->entry.next) {
2539 			list_add_tail(&ep->entry, &timeout_list);
2540 			kickit = 1;
2541 		}
2542 	}
2543 	spin_unlock(&timeout_lock);
2544 
2545 	if (kickit) {
2546 
2547 		CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep);
2548 		queue_work(c4iw_taskq, &c4iw_task);
2549 	}
2550 	CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep);
2551 }
2552 
2553 static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl)
2554 {
2555 	uint64_t val = be64toh(*rpl);
2556 	int ret;
2557 	struct c4iw_wr_wait *wr_waitp;
2558 
2559 	ret = (int)((val >> 8) & 0xff);
2560 	wr_waitp = (struct c4iw_wr_wait *)rpl[1];
2561 	CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret);
2562 	if (wr_waitp)
2563 		c4iw_wake_up(wr_waitp, ret ? -ret : 0);
2564 
2565 	return (0);
2566 }
2567 
2568 static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl)
2569 {
2570 	struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]);
2571 
2572 	CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl);
2573 	c4iw_ev_dispatch(sc->iwarp_softc, &cqe);
2574 
2575 	return (0);
2576 }
2577 
2578 static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
2579 {
2580 	struct adapter *sc = iq->adapter;
2581 	const struct cpl_rdma_terminate *cpl = mtod(m, const void *);
2582 	unsigned int tid = GET_TID(cpl);
2583 	struct c4iw_qp_attributes attrs;
2584 	struct toepcb *toep = lookup_tid(sc, tid);
2585 	struct socket *so;
2586 	struct c4iw_ep *ep;
2587 
2588 	INP_WLOCK(toep->inp);
2589 	so = inp_inpcbtosocket(toep->inp);
2590 	ep = so->so_rcv.sb_upcallarg;
2591 	INP_WUNLOCK(toep->inp);
2592 
2593 	CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep);
2594 
2595 	if (ep && ep->com.qp) {
2596 
2597 		printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid,
2598 				ep->com.qp->wq.sq.qid);
2599 		attrs.next_state = C4IW_QP_STATE_TERMINATE;
2600 		c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs,
2601 				1);
2602 	} else
2603 		printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid);
2604 	CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep);
2605 
2606 	return 0;
2607 }
2608 
2609 int __init c4iw_cm_init(void)
2610 {
2611 
2612 	t4_register_cpl_handler(CPL_RDMA_TERMINATE, terminate);
2613 	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, fw6_wr_rpl);
2614 	t4_register_fw_msg_handler(FW6_TYPE_CQE, fw6_cqe_handler);
2615 	t4_register_an_handler(c4iw_ev_handler);
2616 
2617 	TAILQ_INIT(&req_list);
2618 	spin_lock_init(&req_lock);
2619 	INIT_LIST_HEAD(&timeout_list);
2620 	spin_lock_init(&timeout_lock);
2621 
2622 	INIT_WORK(&c4iw_task, process_req);
2623 
2624 	c4iw_taskq = create_singlethread_workqueue("iw_cxgbe");
2625 	if (!c4iw_taskq)
2626 		return -ENOMEM;
2627 
2628 	return 0;
2629 }
2630 
2631 void __exit c4iw_cm_term(void)
2632 {
2633 	WARN_ON(!TAILQ_EMPTY(&req_list));
2634 	WARN_ON(!list_empty(&timeout_list));
2635 	flush_workqueue(c4iw_taskq);
2636 	destroy_workqueue(c4iw_taskq);
2637 
2638 	t4_register_cpl_handler(CPL_RDMA_TERMINATE, NULL);
2639 	t4_register_fw_msg_handler(FW6_TYPE_WR_RPL, NULL);
2640 	t4_register_fw_msg_handler(FW6_TYPE_CQE, NULL);
2641 	t4_register_an_handler(NULL);
2642 }
2643 #endif
2644