1 /* 2 * Copyright (c) 2009-2013, 2016 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 37 #ifdef TCP_OFFLOAD 38 #include <sys/types.h> 39 #include <sys/malloc.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <sys/sockio.h> 43 #include <sys/taskqueue.h> 44 #include <netinet/in.h> 45 #include <net/route.h> 46 47 #include <netinet/in_systm.h> 48 #include <netinet/in_pcb.h> 49 #include <netinet/ip.h> 50 #include <netinet/in_fib.h> 51 #include <netinet/ip_var.h> 52 #include <netinet/tcp_var.h> 53 #include <netinet/tcp.h> 54 #include <netinet/tcpip.h> 55 56 #include <netinet/toecore.h> 57 58 struct sge_iq; 59 struct rss_header; 60 #include <linux/types.h> 61 #include "offload.h" 62 #include "tom/t4_tom.h" 63 64 #define TOEPCB(so) ((struct toepcb *)(so_sototcpcb((so))->t_toe)) 65 66 #include "iw_cxgbe.h" 67 #include <linux/module.h> 68 #include <linux/workqueue.h> 69 #include <linux/notifier.h> 70 #include <linux/inetdevice.h> 71 #include <linux/if_vlan.h> 72 #include <net/netevent.h> 73 74 static spinlock_t req_lock; 75 static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; 76 static struct work_struct c4iw_task; 77 static struct workqueue_struct *c4iw_taskq; 78 static LIST_HEAD(timeout_list); 79 static spinlock_t timeout_lock; 80 81 static void process_req(struct work_struct *ctx); 82 static void start_ep_timer(struct c4iw_ep *ep); 83 static void stop_ep_timer(struct c4iw_ep *ep); 84 static int set_tcpinfo(struct c4iw_ep *ep); 85 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); 86 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); 87 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); 88 static void *alloc_ep(int size, gfp_t flags); 89 void __free_ep(struct c4iw_ep_common *epc); 90 static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 91 __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); 92 static int close_socket(struct c4iw_ep_common *epc, int close); 93 static int shutdown_socket(struct c4iw_ep_common *epc); 94 static void abort_socket(struct c4iw_ep *ep); 95 static void send_mpa_req(struct c4iw_ep *ep); 96 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); 97 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); 98 static void close_complete_upcall(struct c4iw_ep *ep, int status); 99 static int abort_connection(struct c4iw_ep *ep); 100 static void peer_close_upcall(struct c4iw_ep *ep); 101 static void peer_abort_upcall(struct c4iw_ep *ep); 102 static void connect_reply_upcall(struct c4iw_ep *ep, int status); 103 static int connect_request_upcall(struct c4iw_ep *ep); 104 static void established_upcall(struct c4iw_ep *ep); 105 static void process_mpa_reply(struct c4iw_ep *ep); 106 static void process_mpa_request(struct c4iw_ep *ep); 107 static void process_peer_close(struct c4iw_ep *ep); 108 static void process_conn_error(struct c4iw_ep *ep); 109 static void process_close_complete(struct c4iw_ep *ep); 110 static void ep_timeout(unsigned long arg); 111 static void init_sock(struct c4iw_ep_common *epc); 112 static void process_data(struct c4iw_ep *ep); 113 static void process_connected(struct c4iw_ep *ep); 114 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); 115 static void process_socket_event(struct c4iw_ep *ep); 116 static void release_ep_resources(struct c4iw_ep *ep); 117 118 #define START_EP_TIMER(ep) \ 119 do { \ 120 CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ 121 __func__, __LINE__, (ep)); \ 122 start_ep_timer(ep); \ 123 } while (0) 124 125 #define STOP_EP_TIMER(ep) \ 126 do { \ 127 CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ 128 __func__, __LINE__, (ep)); \ 129 stop_ep_timer(ep); \ 130 } while (0) 131 132 #ifdef KTR 133 static char *states[] = { 134 "idle", 135 "listen", 136 "connecting", 137 "mpa_wait_req", 138 "mpa_req_sent", 139 "mpa_req_rcvd", 140 "mpa_rep_sent", 141 "fpdu_mode", 142 "aborting", 143 "closing", 144 "moribund", 145 "dead", 146 NULL, 147 }; 148 #endif 149 150 static void 151 process_req(struct work_struct *ctx) 152 { 153 struct c4iw_ep_common *epc; 154 155 spin_lock(&req_lock); 156 while (!TAILQ_EMPTY(&req_list)) { 157 epc = TAILQ_FIRST(&req_list); 158 TAILQ_REMOVE(&req_list, epc, entry); 159 epc->entry.tqe_prev = NULL; 160 spin_unlock(&req_lock); 161 if (epc->so) 162 process_socket_event((struct c4iw_ep *)epc); 163 c4iw_put_ep(epc); 164 spin_lock(&req_lock); 165 } 166 spin_unlock(&req_lock); 167 } 168 169 /* 170 * XXX: doesn't belong here in the iWARP driver. 171 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is 172 * set. Is this a valid assumption for active open? 173 */ 174 static int 175 set_tcpinfo(struct c4iw_ep *ep) 176 { 177 struct socket *so = ep->com.so; 178 struct inpcb *inp = sotoinpcb(so); 179 struct tcpcb *tp; 180 struct toepcb *toep; 181 int rc = 0; 182 183 INP_WLOCK(inp); 184 tp = intotcpcb(inp); 185 if ((tp->t_flags & TF_TOE) == 0) { 186 rc = EINVAL; 187 log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n", 188 __func__, so, ep); 189 goto done; 190 } 191 toep = TOEPCB(so); 192 193 ep->hwtid = toep->tid; 194 ep->snd_seq = tp->snd_nxt; 195 ep->rcv_seq = tp->rcv_nxt; 196 ep->emss = max(tp->t_maxseg, 128); 197 done: 198 INP_WUNLOCK(inp); 199 return (rc); 200 201 } 202 203 static int 204 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 205 __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) 206 { 207 struct in_addr addr; 208 int err; 209 210 CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, 211 peer_ip, ntohs(local_port), ntohs(peer_port)); 212 213 addr.s_addr = peer_ip; 214 err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); 215 216 CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); 217 return err; 218 } 219 220 static int 221 close_socket(struct c4iw_ep_common *epc, int close) 222 { 223 struct socket *so = epc->so; 224 int rc; 225 226 CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so, 227 states[epc->state]); 228 229 SOCK_LOCK(so); 230 soupcall_clear(so, SO_RCV); 231 SOCK_UNLOCK(so); 232 233 if (close) 234 rc = soclose(so); 235 else 236 rc = soshutdown(so, SHUT_WR | SHUT_RD); 237 epc->so = NULL; 238 239 return (rc); 240 } 241 242 static int 243 shutdown_socket(struct c4iw_ep_common *epc) 244 { 245 246 CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc, 247 states[epc->state]); 248 249 return (soshutdown(epc->so, SHUT_WR)); 250 } 251 252 static void 253 abort_socket(struct c4iw_ep *ep) 254 { 255 struct sockopt sopt; 256 int rc; 257 struct linger l; 258 259 CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so, 260 states[ep->com.state]); 261 262 l.l_onoff = 1; 263 l.l_linger = 0; 264 265 /* linger_time of 0 forces RST to be sent */ 266 sopt.sopt_dir = SOPT_SET; 267 sopt.sopt_level = SOL_SOCKET; 268 sopt.sopt_name = SO_LINGER; 269 sopt.sopt_val = (caddr_t)&l; 270 sopt.sopt_valsize = sizeof l; 271 sopt.sopt_td = NULL; 272 rc = sosetopt(ep->com.so, &sopt); 273 if (rc) { 274 log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n", 275 __func__, rc); 276 } 277 } 278 279 static void 280 process_peer_close(struct c4iw_ep *ep) 281 { 282 struct c4iw_qp_attributes attrs; 283 int disconnect = 1; 284 int release = 0; 285 286 CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, 287 ep->com.so, states[ep->com.state]); 288 289 mutex_lock(&ep->com.mutex); 290 switch (ep->com.state) { 291 292 case MPA_REQ_WAIT: 293 CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", 294 __func__, ep); 295 __state_set(&ep->com, CLOSING); 296 break; 297 298 case MPA_REQ_SENT: 299 CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", 300 __func__, ep); 301 __state_set(&ep->com, DEAD); 302 connect_reply_upcall(ep, -ECONNABORTED); 303 304 disconnect = 0; 305 STOP_EP_TIMER(ep); 306 close_socket(&ep->com, 0); 307 ep->com.cm_id->rem_ref(ep->com.cm_id); 308 ep->com.cm_id = NULL; 309 ep->com.qp = NULL; 310 release = 1; 311 break; 312 313 case MPA_REQ_RCVD: 314 315 /* 316 * We're gonna mark this puppy DEAD, but keep 317 * the reference on it until the ULP accepts or 318 * rejects the CR. 319 */ 320 CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", 321 __func__, ep); 322 __state_set(&ep->com, CLOSING); 323 c4iw_get_ep(&ep->com); 324 break; 325 326 case MPA_REP_SENT: 327 CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", 328 __func__, ep); 329 __state_set(&ep->com, CLOSING); 330 break; 331 332 case FPDU_MODE: 333 CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", 334 __func__, ep); 335 START_EP_TIMER(ep); 336 __state_set(&ep->com, CLOSING); 337 attrs.next_state = C4IW_QP_STATE_CLOSING; 338 c4iw_modify_qp(ep->com.dev, ep->com.qp, 339 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 340 peer_close_upcall(ep); 341 break; 342 343 case ABORTING: 344 CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)", 345 __func__, ep); 346 disconnect = 0; 347 break; 348 349 case CLOSING: 350 CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", 351 __func__, ep); 352 __state_set(&ep->com, MORIBUND); 353 disconnect = 0; 354 break; 355 356 case MORIBUND: 357 CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__, 358 ep); 359 STOP_EP_TIMER(ep); 360 if (ep->com.cm_id && ep->com.qp) { 361 attrs.next_state = C4IW_QP_STATE_IDLE; 362 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 363 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 364 } 365 close_socket(&ep->com, 0); 366 close_complete_upcall(ep, 0); 367 __state_set(&ep->com, DEAD); 368 release = 1; 369 disconnect = 0; 370 break; 371 372 case DEAD: 373 CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)", 374 __func__, ep); 375 disconnect = 0; 376 break; 377 378 default: 379 panic("%s: ep %p state %d", __func__, ep, 380 ep->com.state); 381 break; 382 } 383 384 mutex_unlock(&ep->com.mutex); 385 386 if (disconnect) { 387 388 CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep); 389 c4iw_ep_disconnect(ep, 0, M_NOWAIT); 390 } 391 if (release) { 392 393 CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep); 394 c4iw_put_ep(&ep->com); 395 } 396 CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep); 397 return; 398 } 399 400 static void 401 process_conn_error(struct c4iw_ep *ep) 402 { 403 struct c4iw_qp_attributes attrs; 404 int ret; 405 int state; 406 407 state = state_read(&ep->com); 408 CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", 409 __func__, ep, ep->com.so, ep->com.so->so_error, 410 states[ep->com.state]); 411 412 switch (state) { 413 414 case MPA_REQ_WAIT: 415 STOP_EP_TIMER(ep); 416 break; 417 418 case MPA_REQ_SENT: 419 STOP_EP_TIMER(ep); 420 connect_reply_upcall(ep, -ECONNRESET); 421 break; 422 423 case MPA_REP_SENT: 424 ep->com.rpl_err = ECONNRESET; 425 CTR1(KTR_IW_CXGBE, "waking up ep %p", ep); 426 break; 427 428 case MPA_REQ_RCVD: 429 430 /* 431 * We're gonna mark this puppy DEAD, but keep 432 * the reference on it until the ULP accepts or 433 * rejects the CR. 434 */ 435 c4iw_get_ep(&ep->com); 436 break; 437 438 case MORIBUND: 439 case CLOSING: 440 STOP_EP_TIMER(ep); 441 /*FALLTHROUGH*/ 442 case FPDU_MODE: 443 444 if (ep->com.cm_id && ep->com.qp) { 445 446 attrs.next_state = C4IW_QP_STATE_ERROR; 447 ret = c4iw_modify_qp(ep->com.qp->rhp, 448 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 449 &attrs, 1); 450 if (ret) 451 log(LOG_ERR, 452 "%s - qp <- error failed!\n", 453 __func__); 454 } 455 peer_abort_upcall(ep); 456 break; 457 458 case ABORTING: 459 break; 460 461 case DEAD: 462 CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", 463 __func__, ep->com.so->so_error); 464 return; 465 466 default: 467 panic("%s: ep %p state %d", __func__, ep, state); 468 break; 469 } 470 471 if (state != ABORTING) { 472 473 CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep); 474 close_socket(&ep->com, 0); 475 state_set(&ep->com, DEAD); 476 c4iw_put_ep(&ep->com); 477 } 478 CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); 479 return; 480 } 481 482 static void 483 process_close_complete(struct c4iw_ep *ep) 484 { 485 struct c4iw_qp_attributes attrs; 486 int release = 0; 487 488 CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, 489 ep->com.so, states[ep->com.state]); 490 491 /* The cm_id may be null if we failed to connect */ 492 mutex_lock(&ep->com.mutex); 493 494 switch (ep->com.state) { 495 496 case CLOSING: 497 CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", 498 __func__, ep); 499 __state_set(&ep->com, MORIBUND); 500 break; 501 502 case MORIBUND: 503 CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__, 504 ep); 505 STOP_EP_TIMER(ep); 506 507 if ((ep->com.cm_id) && (ep->com.qp)) { 508 509 CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE", 510 __func__, ep); 511 attrs.next_state = C4IW_QP_STATE_IDLE; 512 c4iw_modify_qp(ep->com.dev, 513 ep->com.qp, 514 C4IW_QP_ATTR_NEXT_STATE, 515 &attrs, 1); 516 } 517 518 if (ep->parent_ep) { 519 520 CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep); 521 close_socket(&ep->com, 1); 522 } 523 else { 524 525 CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep); 526 close_socket(&ep->com, 0); 527 } 528 close_complete_upcall(ep, 0); 529 __state_set(&ep->com, DEAD); 530 release = 1; 531 break; 532 533 case ABORTING: 534 CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep); 535 break; 536 537 case DEAD: 538 default: 539 CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep); 540 panic("%s:pcc6 %p DEAD", __func__, ep); 541 break; 542 } 543 mutex_unlock(&ep->com.mutex); 544 545 if (release) { 546 547 CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep); 548 c4iw_put_ep(&ep->com); 549 } 550 CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); 551 return; 552 } 553 554 static void 555 init_sock(struct c4iw_ep_common *epc) 556 { 557 int rc; 558 struct sockopt sopt; 559 struct socket *so = epc->so; 560 int on = 1; 561 562 SOCK_LOCK(so); 563 soupcall_set(so, SO_RCV, c4iw_so_upcall, epc); 564 so->so_state |= SS_NBIO; 565 SOCK_UNLOCK(so); 566 sopt.sopt_dir = SOPT_SET; 567 sopt.sopt_level = IPPROTO_TCP; 568 sopt.sopt_name = TCP_NODELAY; 569 sopt.sopt_val = (caddr_t)&on; 570 sopt.sopt_valsize = sizeof on; 571 sopt.sopt_td = NULL; 572 rc = sosetopt(so, &sopt); 573 if (rc) { 574 log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", 575 __func__, so, rc); 576 } 577 } 578 579 static void 580 process_data(struct c4iw_ep *ep) 581 { 582 struct sockaddr_in *local, *remote; 583 584 CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, 585 ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); 586 587 switch (state_read(&ep->com)) { 588 case MPA_REQ_SENT: 589 process_mpa_reply(ep); 590 break; 591 case MPA_REQ_WAIT: 592 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 593 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 594 ep->com.local_addr = *local; 595 ep->com.remote_addr = *remote; 596 free(local, M_SONAME); 597 free(remote, M_SONAME); 598 process_mpa_request(ep); 599 break; 600 default: 601 if (sbused(&ep->com.so->so_rcv)) 602 log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " 603 "state %d, so %p, so_state 0x%x, sbused %u\n", 604 __func__, ep, state_read(&ep->com), ep->com.so, 605 ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); 606 break; 607 } 608 } 609 610 static void 611 process_connected(struct c4iw_ep *ep) 612 { 613 614 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) 615 send_mpa_req(ep); 616 else { 617 connect_reply_upcall(ep, -ep->com.so->so_error); 618 close_socket(&ep->com, 0); 619 state_set(&ep->com, DEAD); 620 c4iw_put_ep(&ep->com); 621 } 622 } 623 624 void 625 process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) 626 { 627 struct c4iw_ep *child_ep; 628 struct sockaddr_in *local; 629 struct sockaddr_in *remote; 630 struct c4iw_ep *parent_ep = parent_cm_id->provider_data; 631 632 if (!child_so) { 633 CTR4(KTR_IW_CXGBE, 634 "%s: parent so %p, parent ep %p, child so %p, invalid so", 635 __func__, parent_ep->com.so, parent_ep, child_so); 636 log(LOG_ERR, "%s: invalid child socket\n", __func__); 637 return; 638 } 639 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 640 if (!child_ep) { 641 CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", 642 __func__, parent_ep->com.so, parent_ep); 643 log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); 644 return; 645 } 646 SOCKBUF_LOCK(&child_so->so_rcv); 647 soupcall_set(child_so, SO_RCV, c4iw_so_upcall, child_ep); 648 SOCKBUF_UNLOCK(&child_so->so_rcv); 649 650 CTR5(KTR_IW_CXGBE, 651 "%s: parent so %p, parent ep %p, child so %p, child ep %p", 652 __func__, parent_ep->com.so, parent_ep, child_so, child_ep); 653 654 in_getsockaddr(child_so, (struct sockaddr **)&local); 655 in_getpeeraddr(child_so, (struct sockaddr **)&remote); 656 657 child_ep->com.local_addr = *local; 658 child_ep->com.remote_addr = *remote; 659 child_ep->com.dev = parent_ep->com.dev; 660 child_ep->com.so = child_so; 661 child_ep->com.cm_id = NULL; 662 child_ep->com.thread = parent_ep->com.thread; 663 child_ep->parent_ep = parent_ep; 664 665 free(local, M_SONAME); 666 free(remote, M_SONAME); 667 668 c4iw_get_ep(&parent_ep->com); 669 init_timer(&child_ep->timer); 670 state_set(&child_ep->com, MPA_REQ_WAIT); 671 START_EP_TIMER(child_ep); 672 673 /* maybe the request has already been queued up on the socket... */ 674 process_mpa_request(child_ep); 675 return; 676 } 677 678 static int 679 c4iw_so_upcall(struct socket *so, void *arg, int waitflag) 680 { 681 struct c4iw_ep *ep = arg; 682 683 spin_lock(&req_lock); 684 685 CTR6(KTR_IW_CXGBE, 686 "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p", 687 __func__, so, so->so_state, ep, states[ep->com.state], 688 ep->com.entry.tqe_prev); 689 690 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 691 KASSERT(ep->com.so == so, ("%s: XXX review.", __func__)); 692 c4iw_get_ep(&ep->com); 693 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 694 queue_work(c4iw_taskq, &c4iw_task); 695 } 696 697 spin_unlock(&req_lock); 698 return (SU_OK); 699 } 700 701 static void 702 process_socket_event(struct c4iw_ep *ep) 703 { 704 int state = state_read(&ep->com); 705 struct socket *so = ep->com.so; 706 707 CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " 708 "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, 709 so->so_error, so->so_rcv.sb_state, ep, states[state]); 710 711 if (state == CONNECTING) { 712 process_connected(ep); 713 return; 714 } 715 716 if (state == LISTEN) { 717 /* socket listening events are handled at IWCM */ 718 CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__, 719 ep->com.state, ep); 720 BUG(); 721 return; 722 } 723 724 /* connection error */ 725 if (so->so_error) { 726 process_conn_error(ep); 727 return; 728 } 729 730 /* peer close */ 731 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 732 process_peer_close(ep); 733 return; 734 } 735 736 /* close complete */ 737 if (so->so_state & SS_ISDISCONNECTED) { 738 process_close_complete(ep); 739 return; 740 } 741 742 /* rx data */ 743 process_data(ep); 744 } 745 746 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters"); 747 748 int db_delay_usecs = 1; 749 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RWTUN, &db_delay_usecs, 0, 750 "Usecs to delay awaiting db fifo to drain"); 751 752 static int dack_mode = 1; 753 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0, 754 "Delayed ack mode (default = 1)"); 755 756 int c4iw_max_read_depth = 8; 757 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0, 758 "Per-connection max ORD/IRD (default = 8)"); 759 760 static int enable_tcp_timestamps; 761 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0, 762 "Enable tcp timestamps (default = 0)"); 763 764 static int enable_tcp_sack; 765 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0, 766 "Enable tcp SACK (default = 0)"); 767 768 static int enable_tcp_window_scaling = 1; 769 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0, 770 "Enable tcp window scaling (default = 1)"); 771 772 int c4iw_debug = 1; 773 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0, 774 "Enable debug logging (default = 0)"); 775 776 static int peer2peer; 777 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0, 778 "Support peer2peer ULPs (default = 0)"); 779 780 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; 781 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0, 782 "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)"); 783 784 static int ep_timeout_secs = 60; 785 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, 786 "CM Endpoint operation timeout in seconds (default = 60)"); 787 788 static int mpa_rev = 1; 789 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, 790 "MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)"); 791 792 static int markers_enabled; 793 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, 794 "Enable MPA MARKERS (default(0) = disabled)"); 795 796 static int crc_enabled = 1; 797 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, 798 "Enable MPA CRC (default(1) = enabled)"); 799 800 static int rcv_win = 256 * 1024; 801 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, 802 "TCP receive window in bytes (default = 256KB)"); 803 804 static int snd_win = 128 * 1024; 805 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, 806 "TCP send window in bytes (default = 128KB)"); 807 808 int db_fc_threshold = 2000; 809 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RWTUN, &db_fc_threshold, 0, 810 "QP count/threshold that triggers automatic"); 811 812 static void 813 start_ep_timer(struct c4iw_ep *ep) 814 { 815 816 if (timer_pending(&ep->timer)) { 817 CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep); 818 printk(KERN_ERR "%s timer already started! ep %p\n", __func__, 819 ep); 820 return; 821 } 822 clear_bit(TIMEOUT, &ep->com.flags); 823 c4iw_get_ep(&ep->com); 824 ep->timer.expires = jiffies + ep_timeout_secs * HZ; 825 ep->timer.data = (unsigned long)ep; 826 ep->timer.function = ep_timeout; 827 add_timer(&ep->timer); 828 } 829 830 static void 831 stop_ep_timer(struct c4iw_ep *ep) 832 { 833 834 del_timer_sync(&ep->timer); 835 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 836 c4iw_put_ep(&ep->com); 837 } 838 } 839 840 static enum 841 c4iw_ep_state state_read(struct c4iw_ep_common *epc) 842 { 843 enum c4iw_ep_state state; 844 845 mutex_lock(&epc->mutex); 846 state = epc->state; 847 mutex_unlock(&epc->mutex); 848 849 return (state); 850 } 851 852 static void 853 __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 854 { 855 856 epc->state = new; 857 } 858 859 static void 860 state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 861 { 862 863 mutex_lock(&epc->mutex); 864 __state_set(epc, new); 865 mutex_unlock(&epc->mutex); 866 } 867 868 static void * 869 alloc_ep(int size, gfp_t gfp) 870 { 871 struct c4iw_ep_common *epc; 872 873 epc = kzalloc(size, gfp); 874 if (epc == NULL) 875 return (NULL); 876 877 kref_init(&epc->kref); 878 mutex_init(&epc->mutex); 879 c4iw_init_wr_wait(&epc->wr_wait); 880 881 return (epc); 882 } 883 884 void 885 __free_ep(struct c4iw_ep_common *epc) 886 { 887 CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc); 888 KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so)); 889 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc)); 890 free(epc, M_DEVBUF); 891 CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc); 892 } 893 894 void _c4iw_free_ep(struct kref *kref) 895 { 896 struct c4iw_ep *ep; 897 struct c4iw_ep_common *epc; 898 899 ep = container_of(kref, struct c4iw_ep, com.kref); 900 epc = &ep->com; 901 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", 902 __func__, epc)); 903 kfree(ep); 904 } 905 906 static void release_ep_resources(struct c4iw_ep *ep) 907 { 908 CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep); 909 set_bit(RELEASE_RESOURCES, &ep->com.flags); 910 c4iw_put_ep(&ep->com); 911 CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep); 912 } 913 914 static void 915 send_mpa_req(struct c4iw_ep *ep) 916 { 917 int mpalen; 918 struct mpa_message *mpa; 919 struct mpa_v2_conn_params mpa_v2_params; 920 struct mbuf *m; 921 char mpa_rev_to_use = mpa_rev; 922 int err; 923 924 if (ep->retry_with_mpa_v1) 925 mpa_rev_to_use = 1; 926 mpalen = sizeof(*mpa) + ep->plen; 927 if (mpa_rev_to_use == 2) 928 mpalen += sizeof(struct mpa_v2_conn_params); 929 930 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 931 if (mpa == NULL) { 932 failed: 933 connect_reply_upcall(ep, -ENOMEM); 934 return; 935 } 936 937 memset(mpa, 0, mpalen); 938 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 939 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 940 (markers_enabled ? MPA_MARKERS : 0) | 941 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); 942 mpa->private_data_size = htons(ep->plen); 943 mpa->revision = mpa_rev_to_use; 944 945 if (mpa_rev_to_use == 1) { 946 ep->tried_with_mpa_v1 = 1; 947 ep->retry_with_mpa_v1 = 0; 948 } 949 950 if (mpa_rev_to_use == 2) { 951 mpa->private_data_size += 952 htons(sizeof(struct mpa_v2_conn_params)); 953 mpa_v2_params.ird = htons((u16)ep->ird); 954 mpa_v2_params.ord = htons((u16)ep->ord); 955 956 if (peer2peer) { 957 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 958 959 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { 960 mpa_v2_params.ord |= 961 htons(MPA_V2_RDMA_WRITE_RTR); 962 } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { 963 mpa_v2_params.ord |= 964 htons(MPA_V2_RDMA_READ_RTR); 965 } 966 } 967 memcpy(mpa->private_data, &mpa_v2_params, 968 sizeof(struct mpa_v2_conn_params)); 969 970 if (ep->plen) { 971 972 memcpy(mpa->private_data + 973 sizeof(struct mpa_v2_conn_params), 974 ep->mpa_pkt + sizeof(*mpa), ep->plen); 975 } 976 } else { 977 978 if (ep->plen) 979 memcpy(mpa->private_data, 980 ep->mpa_pkt + sizeof(*mpa), ep->plen); 981 CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep); 982 } 983 984 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 985 if (m == NULL) { 986 free(mpa, M_CXGBE); 987 goto failed; 988 } 989 m_copyback(m, 0, mpalen, (void *)mpa); 990 free(mpa, M_CXGBE); 991 992 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 993 ep->com.thread); 994 if (err) 995 goto failed; 996 997 START_EP_TIMER(ep); 998 state_set(&ep->com, MPA_REQ_SENT); 999 ep->mpa_attr.initiator = 1; 1000 } 1001 1002 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) 1003 { 1004 int mpalen ; 1005 struct mpa_message *mpa; 1006 struct mpa_v2_conn_params mpa_v2_params; 1007 struct mbuf *m; 1008 int err; 1009 1010 CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid, 1011 ep->plen); 1012 1013 mpalen = sizeof(*mpa) + plen; 1014 1015 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1016 1017 mpalen += sizeof(struct mpa_v2_conn_params); 1018 CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep, 1019 ep->mpa_attr.version, mpalen); 1020 } 1021 1022 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 1023 if (mpa == NULL) 1024 return (-ENOMEM); 1025 1026 memset(mpa, 0, mpalen); 1027 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1028 mpa->flags = MPA_REJECT; 1029 mpa->revision = mpa_rev; 1030 mpa->private_data_size = htons(plen); 1031 1032 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1033 1034 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1035 mpa->private_data_size += 1036 htons(sizeof(struct mpa_v2_conn_params)); 1037 mpa_v2_params.ird = htons(((u16)ep->ird) | 1038 (peer2peer ? MPA_V2_PEER2PEER_MODEL : 1039 0)); 1040 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? 1041 (p2p_type == 1042 FW_RI_INIT_P2PTYPE_RDMA_WRITE ? 1043 MPA_V2_RDMA_WRITE_RTR : p2p_type == 1044 FW_RI_INIT_P2PTYPE_READ_REQ ? 1045 MPA_V2_RDMA_READ_RTR : 0) : 0)); 1046 memcpy(mpa->private_data, &mpa_v2_params, 1047 sizeof(struct mpa_v2_conn_params)); 1048 1049 if (ep->plen) 1050 memcpy(mpa->private_data + 1051 sizeof(struct mpa_v2_conn_params), pdata, plen); 1052 CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, 1053 mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); 1054 } else 1055 if (plen) 1056 memcpy(mpa->private_data, pdata, plen); 1057 1058 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1059 if (m == NULL) { 1060 free(mpa, M_CXGBE); 1061 return (-ENOMEM); 1062 } 1063 m_copyback(m, 0, mpalen, (void *)mpa); 1064 free(mpa, M_CXGBE); 1065 1066 err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 1067 if (!err) 1068 ep->snd_seq += mpalen; 1069 CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err); 1070 return err; 1071 } 1072 1073 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) 1074 { 1075 int mpalen; 1076 struct mpa_message *mpa; 1077 struct mbuf *m; 1078 struct mpa_v2_conn_params mpa_v2_params; 1079 int err; 1080 1081 CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep); 1082 1083 mpalen = sizeof(*mpa) + plen; 1084 1085 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1086 1087 CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep, 1088 ep->mpa_attr.version); 1089 mpalen += sizeof(struct mpa_v2_conn_params); 1090 } 1091 1092 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 1093 if (mpa == NULL) 1094 return (-ENOMEM); 1095 1096 memset(mpa, 0, sizeof(*mpa)); 1097 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1098 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 1099 (markers_enabled ? MPA_MARKERS : 0); 1100 mpa->revision = ep->mpa_attr.version; 1101 mpa->private_data_size = htons(plen); 1102 1103 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1104 1105 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1106 mpa->private_data_size += 1107 htons(sizeof(struct mpa_v2_conn_params)); 1108 mpa_v2_params.ird = htons((u16)ep->ird); 1109 mpa_v2_params.ord = htons((u16)ep->ord); 1110 CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep, 1111 ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord); 1112 1113 if (peer2peer && (ep->mpa_attr.p2p_type != 1114 FW_RI_INIT_P2PTYPE_DISABLED)) { 1115 1116 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1117 1118 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { 1119 1120 mpa_v2_params.ord |= 1121 htons(MPA_V2_RDMA_WRITE_RTR); 1122 CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d", 1123 __func__, ep, p2p_type, mpa_v2_params.ird, 1124 mpa_v2_params.ord); 1125 } 1126 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { 1127 1128 mpa_v2_params.ord |= 1129 htons(MPA_V2_RDMA_READ_RTR); 1130 CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d", 1131 __func__, ep, p2p_type, mpa_v2_params.ird, 1132 mpa_v2_params.ord); 1133 } 1134 } 1135 1136 memcpy(mpa->private_data, &mpa_v2_params, 1137 sizeof(struct mpa_v2_conn_params)); 1138 1139 if (ep->plen) 1140 memcpy(mpa->private_data + 1141 sizeof(struct mpa_v2_conn_params), pdata, plen); 1142 } else 1143 if (plen) 1144 memcpy(mpa->private_data, pdata, plen); 1145 1146 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1147 if (m == NULL) { 1148 free(mpa, M_CXGBE); 1149 return (-ENOMEM); 1150 } 1151 m_copyback(m, 0, mpalen, (void *)mpa); 1152 free(mpa, M_CXGBE); 1153 1154 1155 state_set(&ep->com, MPA_REP_SENT); 1156 ep->snd_seq += mpalen; 1157 err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 1158 ep->com.thread); 1159 CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err); 1160 return err; 1161 } 1162 1163 1164 1165 static void close_complete_upcall(struct c4iw_ep *ep, int status) 1166 { 1167 struct iw_cm_event event; 1168 1169 CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep); 1170 memset(&event, 0, sizeof(event)); 1171 event.event = IW_CM_EVENT_CLOSE; 1172 event.status = status; 1173 1174 if (ep->com.cm_id) { 1175 1176 CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep); 1177 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1178 ep->com.cm_id->rem_ref(ep->com.cm_id); 1179 ep->com.cm_id = NULL; 1180 ep->com.qp = NULL; 1181 set_bit(CLOSE_UPCALL, &ep->com.history); 1182 } 1183 CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); 1184 } 1185 1186 static int abort_connection(struct c4iw_ep *ep) 1187 { 1188 int err; 1189 1190 CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep); 1191 state_set(&ep->com, ABORTING); 1192 abort_socket(ep); 1193 err = close_socket(&ep->com, 0); 1194 set_bit(ABORT_CONN, &ep->com.history); 1195 CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep); 1196 return err; 1197 } 1198 1199 static void peer_close_upcall(struct c4iw_ep *ep) 1200 { 1201 struct iw_cm_event event; 1202 1203 CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep); 1204 memset(&event, 0, sizeof(event)); 1205 event.event = IW_CM_EVENT_DISCONNECT; 1206 1207 if (ep->com.cm_id) { 1208 1209 CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep); 1210 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1211 set_bit(DISCONN_UPCALL, &ep->com.history); 1212 } 1213 CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep); 1214 } 1215 1216 static void peer_abort_upcall(struct c4iw_ep *ep) 1217 { 1218 struct iw_cm_event event; 1219 1220 CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep); 1221 memset(&event, 0, sizeof(event)); 1222 event.event = IW_CM_EVENT_CLOSE; 1223 event.status = -ECONNRESET; 1224 1225 if (ep->com.cm_id) { 1226 1227 CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep); 1228 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1229 ep->com.cm_id->rem_ref(ep->com.cm_id); 1230 ep->com.cm_id = NULL; 1231 ep->com.qp = NULL; 1232 set_bit(ABORT_UPCALL, &ep->com.history); 1233 } 1234 CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep); 1235 } 1236 1237 static void connect_reply_upcall(struct c4iw_ep *ep, int status) 1238 { 1239 struct iw_cm_event event; 1240 1241 CTR3(KTR_IW_CXGBE, "%s:cruB %p", __func__, ep, status); 1242 memset(&event, 0, sizeof(event)); 1243 event.event = IW_CM_EVENT_CONNECT_REPLY; 1244 event.status = (status ==-ECONNABORTED)?-ECONNRESET: status; 1245 event.local_addr = ep->com.local_addr; 1246 event.remote_addr = ep->com.remote_addr; 1247 1248 if ((status == 0) || (status == -ECONNREFUSED)) { 1249 1250 if (!ep->tried_with_mpa_v1) { 1251 1252 CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); 1253 /* this means MPA_v2 is used */ 1254 event.private_data_len = ep->plen - 1255 sizeof(struct mpa_v2_conn_params); 1256 event.private_data = ep->mpa_pkt + 1257 sizeof(struct mpa_message) + 1258 sizeof(struct mpa_v2_conn_params); 1259 } else { 1260 1261 CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); 1262 /* this means MPA_v1 is used */ 1263 event.private_data_len = ep->plen; 1264 event.private_data = ep->mpa_pkt + 1265 sizeof(struct mpa_message); 1266 } 1267 } 1268 1269 if (ep->com.cm_id) { 1270 1271 CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep); 1272 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1273 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1274 } 1275 1276 if(status == -ECONNABORTED) { 1277 1278 CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status); 1279 return; 1280 } 1281 1282 if (status < 0) { 1283 1284 CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status); 1285 ep->com.cm_id->rem_ref(ep->com.cm_id); 1286 ep->com.cm_id = NULL; 1287 ep->com.qp = NULL; 1288 } 1289 1290 CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep); 1291 } 1292 1293 static int connect_request_upcall(struct c4iw_ep *ep) 1294 { 1295 struct iw_cm_event event; 1296 int ret; 1297 1298 CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep, 1299 ep->tried_with_mpa_v1); 1300 1301 memset(&event, 0, sizeof(event)); 1302 event.event = IW_CM_EVENT_CONNECT_REQUEST; 1303 event.local_addr = ep->com.local_addr; 1304 event.remote_addr = ep->com.remote_addr; 1305 event.provider_data = ep; 1306 event.so = ep->com.so; 1307 1308 if (!ep->tried_with_mpa_v1) { 1309 /* this means MPA_v2 is used */ 1310 event.ord = ep->ord; 1311 event.ird = ep->ird; 1312 event.private_data_len = ep->plen - 1313 sizeof(struct mpa_v2_conn_params); 1314 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + 1315 sizeof(struct mpa_v2_conn_params); 1316 } else { 1317 1318 /* this means MPA_v1 is used. Send max supported */ 1319 event.ord = c4iw_max_read_depth; 1320 event.ird = c4iw_max_read_depth; 1321 event.private_data_len = ep->plen; 1322 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 1323 } 1324 1325 c4iw_get_ep(&ep->com); 1326 ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, 1327 &event); 1328 if(ret) 1329 c4iw_put_ep(&ep->com); 1330 1331 set_bit(CONNREQ_UPCALL, &ep->com.history); 1332 c4iw_put_ep(&ep->parent_ep->com); 1333 return ret; 1334 } 1335 1336 static void established_upcall(struct c4iw_ep *ep) 1337 { 1338 struct iw_cm_event event; 1339 1340 CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep); 1341 memset(&event, 0, sizeof(event)); 1342 event.event = IW_CM_EVENT_ESTABLISHED; 1343 event.ird = ep->ird; 1344 event.ord = ep->ord; 1345 1346 if (ep->com.cm_id) { 1347 1348 CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep); 1349 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1350 set_bit(ESTAB_UPCALL, &ep->com.history); 1351 } 1352 CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep); 1353 } 1354 1355 1356 1357 static void process_mpa_reply(struct c4iw_ep *ep) 1358 { 1359 struct mpa_message *mpa; 1360 struct mpa_v2_conn_params *mpa_v2_params; 1361 u16 plen; 1362 u16 resp_ird, resp_ord; 1363 u8 rtr_mismatch = 0, insuff_ird = 0; 1364 struct c4iw_qp_attributes attrs; 1365 enum c4iw_qp_attr_mask mask; 1366 int err; 1367 struct mbuf *top, *m; 1368 int flags = MSG_DONTWAIT; 1369 struct uio uio; 1370 1371 CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep); 1372 1373 /* 1374 * Stop mpa timer. If it expired, then the state has 1375 * changed and we bail since ep_timeout already aborted 1376 * the connection. 1377 */ 1378 STOP_EP_TIMER(ep); 1379 if (state_read(&ep->com) != MPA_REQ_SENT) 1380 return; 1381 1382 uio.uio_resid = 1000000; 1383 uio.uio_td = ep->com.thread; 1384 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 1385 1386 if (err) { 1387 1388 if (err == EWOULDBLOCK) { 1389 1390 CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep); 1391 START_EP_TIMER(ep); 1392 return; 1393 } 1394 err = -err; 1395 CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep); 1396 goto err; 1397 } 1398 1399 if (ep->com.so->so_rcv.sb_mb) { 1400 1401 CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep); 1402 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 1403 __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 1404 } 1405 1406 m = top; 1407 1408 do { 1409 1410 CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep); 1411 /* 1412 * If we get more than the supported amount of private data 1413 * then we must fail this connection. 1414 */ 1415 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 1416 1417 CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep, 1418 ep->mpa_pkt_len + m->m_len); 1419 err = (-EINVAL); 1420 goto err; 1421 } 1422 1423 /* 1424 * copy the new data into our accumulation buffer. 1425 */ 1426 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 1427 ep->mpa_pkt_len += m->m_len; 1428 if (!m->m_next) 1429 m = m->m_nextpkt; 1430 else 1431 m = m->m_next; 1432 } while (m); 1433 1434 m_freem(top); 1435 /* 1436 * if we don't even have the mpa message, then bail. 1437 */ 1438 if (ep->mpa_pkt_len < sizeof(*mpa)) 1439 return; 1440 mpa = (struct mpa_message *) ep->mpa_pkt; 1441 1442 /* Validate MPA header. */ 1443 if (mpa->revision > mpa_rev) { 1444 1445 CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep, 1446 mpa->revision, mpa_rev); 1447 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, " 1448 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1449 err = -EPROTO; 1450 goto err; 1451 } 1452 1453 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1454 1455 CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep); 1456 err = -EPROTO; 1457 goto err; 1458 } 1459 1460 plen = ntohs(mpa->private_data_size); 1461 1462 /* 1463 * Fail if there's too much private data. 1464 */ 1465 if (plen > MPA_MAX_PRIVATE_DATA) { 1466 1467 CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep); 1468 err = -EPROTO; 1469 goto err; 1470 } 1471 1472 /* 1473 * If plen does not account for pkt size 1474 */ 1475 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1476 1477 CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep); 1478 err = -EPROTO; 1479 goto err; 1480 } 1481 1482 ep->plen = (u8) plen; 1483 1484 /* 1485 * If we don't have all the pdata yet, then bail. 1486 * We'll continue process when more data arrives. 1487 */ 1488 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 1489 1490 CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep); 1491 return; 1492 } 1493 1494 if (mpa->flags & MPA_REJECT) { 1495 1496 CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep); 1497 err = -ECONNREFUSED; 1498 goto err; 1499 } 1500 1501 /* 1502 * If we get here we have accumulated the entire mpa 1503 * start reply message including private data. And 1504 * the MPA header is valid. 1505 */ 1506 state_set(&ep->com, FPDU_MODE); 1507 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1508 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1509 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1510 ep->mpa_attr.version = mpa->revision; 1511 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1512 1513 if (mpa->revision == 2) { 1514 1515 CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep); 1516 ep->mpa_attr.enhanced_rdma_conn = 1517 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1518 1519 if (ep->mpa_attr.enhanced_rdma_conn) { 1520 1521 CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep); 1522 mpa_v2_params = (struct mpa_v2_conn_params *) 1523 (ep->mpa_pkt + sizeof(*mpa)); 1524 resp_ird = ntohs(mpa_v2_params->ird) & 1525 MPA_V2_IRD_ORD_MASK; 1526 resp_ord = ntohs(mpa_v2_params->ord) & 1527 MPA_V2_IRD_ORD_MASK; 1528 1529 /* 1530 * This is a double-check. Ideally, below checks are 1531 * not required since ird/ord stuff has been taken 1532 * care of in c4iw_accept_cr 1533 */ 1534 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { 1535 1536 CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep); 1537 err = -ENOMEM; 1538 ep->ird = resp_ord; 1539 ep->ord = resp_ird; 1540 insuff_ird = 1; 1541 } 1542 1543 if (ntohs(mpa_v2_params->ird) & 1544 MPA_V2_PEER2PEER_MODEL) { 1545 1546 CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep); 1547 if (ntohs(mpa_v2_params->ord) & 1548 MPA_V2_RDMA_WRITE_RTR) { 1549 1550 CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep); 1551 ep->mpa_attr.p2p_type = 1552 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1553 } 1554 else if (ntohs(mpa_v2_params->ord) & 1555 MPA_V2_RDMA_READ_RTR) { 1556 1557 CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep); 1558 ep->mpa_attr.p2p_type = 1559 FW_RI_INIT_P2PTYPE_READ_REQ; 1560 } 1561 } 1562 } 1563 } else { 1564 1565 CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep); 1566 1567 if (mpa->revision == 1) { 1568 1569 CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep); 1570 1571 if (peer2peer) { 1572 1573 CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep); 1574 ep->mpa_attr.p2p_type = p2p_type; 1575 } 1576 } 1577 } 1578 1579 if (set_tcpinfo(ep)) { 1580 1581 CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep); 1582 printf("%s set_tcpinfo error\n", __func__); 1583 goto err; 1584 } 1585 1586 CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, " 1587 "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__, 1588 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1589 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1590 ep->mpa_attr.p2p_type); 1591 1592 /* 1593 * If responder's RTR does not match with that of initiator, assign 1594 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not 1595 * generated when moving QP to RTS state. 1596 * A TERM message will be sent after QP has moved to RTS state 1597 */ 1598 if ((ep->mpa_attr.version == 2) && peer2peer && 1599 (ep->mpa_attr.p2p_type != p2p_type)) { 1600 1601 CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep); 1602 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1603 rtr_mismatch = 1; 1604 } 1605 1606 1607 //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; 1608 attrs.mpa_attr = ep->mpa_attr; 1609 attrs.max_ird = ep->ird; 1610 attrs.max_ord = ep->ord; 1611 attrs.llp_stream_handle = ep; 1612 attrs.next_state = C4IW_QP_STATE_RTS; 1613 1614 mask = C4IW_QP_ATTR_NEXT_STATE | 1615 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | 1616 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; 1617 1618 /* bind QP and TID with INIT_WR */ 1619 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); 1620 1621 if (err) { 1622 1623 CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep); 1624 goto err; 1625 } 1626 1627 /* 1628 * If responder's RTR requirement did not match with what initiator 1629 * supports, generate TERM message 1630 */ 1631 if (rtr_mismatch) { 1632 1633 CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep); 1634 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); 1635 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1636 attrs.ecode = MPA_NOMATCH_RTR; 1637 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1638 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1639 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1640 err = -ENOMEM; 1641 goto out; 1642 } 1643 1644 /* 1645 * Generate TERM if initiator IRD is not sufficient for responder 1646 * provided ORD. Currently, we do the same behaviour even when 1647 * responder provided IRD is also not sufficient as regards to 1648 * initiator ORD. 1649 */ 1650 if (insuff_ird) { 1651 1652 CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep); 1653 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", 1654 __func__); 1655 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1656 attrs.ecode = MPA_INSUFF_IRD; 1657 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1658 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1659 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1660 err = -ENOMEM; 1661 goto out; 1662 } 1663 goto out; 1664 err: 1665 state_set(&ep->com, ABORTING); 1666 abort_connection(ep); 1667 out: 1668 connect_reply_upcall(ep, err); 1669 CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep); 1670 return; 1671 } 1672 1673 static void 1674 process_mpa_request(struct c4iw_ep *ep) 1675 { 1676 struct mpa_message *mpa; 1677 u16 plen; 1678 int flags = MSG_DONTWAIT; 1679 int rc; 1680 struct iovec iov; 1681 struct uio uio; 1682 enum c4iw_ep_state state = state_read(&ep->com); 1683 1684 CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); 1685 1686 if (state != MPA_REQ_WAIT) 1687 return; 1688 1689 iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len]; 1690 iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; 1691 uio.uio_iov = &iov; 1692 uio.uio_iovcnt = 1; 1693 uio.uio_offset = 0; 1694 uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; 1695 uio.uio_segflg = UIO_SYSSPACE; 1696 uio.uio_rw = UIO_READ; 1697 uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */ 1698 1699 rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags); 1700 if (rc == EAGAIN) 1701 return; 1702 else if (rc) { 1703 abort: 1704 STOP_EP_TIMER(ep); 1705 abort_connection(ep); 1706 return; 1707 } 1708 KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data", 1709 __func__, ep->com.so)); 1710 ep->mpa_pkt_len += uio.uio_offset; 1711 1712 /* 1713 * If we get more than the supported amount of private data then we must 1714 * fail this connection. XXX: check so_rcv->sb_cc, or peek with another 1715 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last 1716 * byte is filled by the soreceive above. 1717 */ 1718 1719 /* Don't even have the MPA message. Wait for more data to arrive. */ 1720 if (ep->mpa_pkt_len < sizeof(*mpa)) 1721 return; 1722 mpa = (struct mpa_message *) ep->mpa_pkt; 1723 1724 /* 1725 * Validate MPA Header. 1726 */ 1727 if (mpa->revision > mpa_rev) { 1728 log(LOG_ERR, "%s: MPA version mismatch. Local = %d," 1729 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1730 goto abort; 1731 } 1732 1733 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) 1734 goto abort; 1735 1736 /* 1737 * Fail if there's too much private data. 1738 */ 1739 plen = ntohs(mpa->private_data_size); 1740 if (plen > MPA_MAX_PRIVATE_DATA) 1741 goto abort; 1742 1743 /* 1744 * If plen does not account for pkt size 1745 */ 1746 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) 1747 goto abort; 1748 1749 ep->plen = (u8) plen; 1750 1751 /* 1752 * If we don't have all the pdata yet, then bail. 1753 */ 1754 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1755 return; 1756 1757 /* 1758 * If we get here we have accumulated the entire mpa 1759 * start reply message including private data. 1760 */ 1761 ep->mpa_attr.initiator = 0; 1762 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1763 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1764 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1765 ep->mpa_attr.version = mpa->revision; 1766 if (mpa->revision == 1) 1767 ep->tried_with_mpa_v1 = 1; 1768 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1769 1770 if (mpa->revision == 2) { 1771 ep->mpa_attr.enhanced_rdma_conn = 1772 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1773 if (ep->mpa_attr.enhanced_rdma_conn) { 1774 struct mpa_v2_conn_params *mpa_v2_params; 1775 u16 ird, ord; 1776 1777 mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)]; 1778 ird = ntohs(mpa_v2_params->ird); 1779 ord = ntohs(mpa_v2_params->ord); 1780 1781 ep->ird = ird & MPA_V2_IRD_ORD_MASK; 1782 ep->ord = ord & MPA_V2_IRD_ORD_MASK; 1783 if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) { 1784 if (ord & MPA_V2_RDMA_WRITE_RTR) { 1785 ep->mpa_attr.p2p_type = 1786 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1787 } else if (ord & MPA_V2_RDMA_READ_RTR) { 1788 ep->mpa_attr.p2p_type = 1789 FW_RI_INIT_P2PTYPE_READ_REQ; 1790 } 1791 } 1792 } 1793 } else if (mpa->revision == 1 && peer2peer) 1794 ep->mpa_attr.p2p_type = p2p_type; 1795 1796 if (set_tcpinfo(ep)) 1797 goto abort; 1798 1799 CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, " 1800 "xmit_marker_enabled = %d, version = %d", __func__, 1801 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1802 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 1803 1804 state_set(&ep->com, MPA_REQ_RCVD); 1805 STOP_EP_TIMER(ep); 1806 1807 /* drive upcall */ 1808 mutex_lock(&ep->parent_ep->com.mutex); 1809 if (ep->parent_ep->com.state != DEAD) { 1810 if(connect_request_upcall(ep)) { 1811 abort_connection(ep); 1812 } 1813 }else 1814 abort_connection(ep); 1815 mutex_unlock(&ep->parent_ep->com.mutex); 1816 } 1817 1818 /* 1819 * Upcall from the adapter indicating data has been transmitted. 1820 * For us its just the single MPA request or reply. We can now free 1821 * the skb holding the mpa message. 1822 */ 1823 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1824 { 1825 int err; 1826 struct c4iw_ep *ep = to_ep(cm_id); 1827 CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); 1828 1829 if (state_read(&ep->com) == DEAD) { 1830 1831 CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep); 1832 c4iw_put_ep(&ep->com); 1833 return -ECONNRESET; 1834 } 1835 set_bit(ULP_REJECT, &ep->com.history); 1836 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1837 1838 if (mpa_rev == 0) { 1839 1840 CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep); 1841 abort_connection(ep); 1842 } 1843 else { 1844 1845 CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep); 1846 err = send_mpa_reject(ep, pdata, pdata_len); 1847 err = soshutdown(ep->com.so, 3); 1848 } 1849 c4iw_put_ep(&ep->com); 1850 CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep); 1851 return 0; 1852 } 1853 1854 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1855 { 1856 int err; 1857 struct c4iw_qp_attributes attrs; 1858 enum c4iw_qp_attr_mask mask; 1859 struct c4iw_ep *ep = to_ep(cm_id); 1860 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 1861 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 1862 1863 CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); 1864 1865 if (state_read(&ep->com) == DEAD) { 1866 1867 CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); 1868 err = -ECONNRESET; 1869 goto err; 1870 } 1871 1872 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1873 BUG_ON(!qp); 1874 1875 set_bit(ULP_ACCEPT, &ep->com.history); 1876 1877 if ((conn_param->ord > c4iw_max_read_depth) || 1878 (conn_param->ird > c4iw_max_read_depth)) { 1879 1880 CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep); 1881 abort_connection(ep); 1882 err = -EINVAL; 1883 goto err; 1884 } 1885 1886 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1887 1888 CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep); 1889 1890 if (conn_param->ord > ep->ird) { 1891 1892 CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep); 1893 ep->ird = conn_param->ird; 1894 ep->ord = conn_param->ord; 1895 send_mpa_reject(ep, conn_param->private_data, 1896 conn_param->private_data_len); 1897 abort_connection(ep); 1898 err = -ENOMEM; 1899 goto err; 1900 } 1901 1902 if (conn_param->ird > ep->ord) { 1903 1904 CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep); 1905 1906 if (!ep->ord) { 1907 1908 CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep); 1909 conn_param->ird = 1; 1910 } 1911 else { 1912 CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep); 1913 abort_connection(ep); 1914 err = -ENOMEM; 1915 goto err; 1916 } 1917 } 1918 1919 } 1920 ep->ird = conn_param->ird; 1921 ep->ord = conn_param->ord; 1922 1923 if (ep->mpa_attr.version != 2) { 1924 1925 CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep); 1926 1927 if (peer2peer && ep->ird == 0) { 1928 1929 CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep); 1930 ep->ird = 1; 1931 } 1932 } 1933 1934 1935 cm_id->add_ref(cm_id); 1936 ep->com.cm_id = cm_id; 1937 ep->com.qp = qp; 1938 //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; 1939 1940 /* bind QP to EP and move to RTS */ 1941 attrs.mpa_attr = ep->mpa_attr; 1942 attrs.max_ird = ep->ird; 1943 attrs.max_ord = ep->ord; 1944 attrs.llp_stream_handle = ep; 1945 attrs.next_state = C4IW_QP_STATE_RTS; 1946 1947 /* bind QP and TID with INIT_WR */ 1948 mask = C4IW_QP_ATTR_NEXT_STATE | 1949 C4IW_QP_ATTR_LLP_STREAM_HANDLE | 1950 C4IW_QP_ATTR_MPA_ATTR | 1951 C4IW_QP_ATTR_MAX_IRD | 1952 C4IW_QP_ATTR_MAX_ORD; 1953 1954 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); 1955 1956 if (err) { 1957 1958 CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); 1959 goto err1; 1960 } 1961 err = send_mpa_reply(ep, conn_param->private_data, 1962 conn_param->private_data_len); 1963 1964 if (err) { 1965 1966 CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); 1967 goto err1; 1968 } 1969 1970 state_set(&ep->com, FPDU_MODE); 1971 established_upcall(ep); 1972 c4iw_put_ep(&ep->com); 1973 CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); 1974 return 0; 1975 err1: 1976 ep->com.cm_id = NULL; 1977 ep->com.qp = NULL; 1978 cm_id->rem_ref(cm_id); 1979 err: 1980 c4iw_put_ep(&ep->com); 1981 CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); 1982 return err; 1983 } 1984 1985 1986 1987 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1988 { 1989 int err = 0; 1990 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 1991 struct c4iw_ep *ep = NULL; 1992 struct nhop4_extended nh4; 1993 struct toedev *tdev; 1994 1995 CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); 1996 1997 if ((conn_param->ord > c4iw_max_read_depth) || 1998 (conn_param->ird > c4iw_max_read_depth)) { 1999 2000 CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id); 2001 err = -EINVAL; 2002 goto out; 2003 } 2004 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 2005 2006 if (!ep) { 2007 2008 CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id); 2009 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); 2010 err = -ENOMEM; 2011 goto out; 2012 } 2013 init_timer(&ep->timer); 2014 ep->plen = conn_param->private_data_len; 2015 2016 if (ep->plen) { 2017 2018 CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep); 2019 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 2020 conn_param->private_data, ep->plen); 2021 } 2022 ep->ird = conn_param->ird; 2023 ep->ord = conn_param->ord; 2024 2025 if (peer2peer && ep->ord == 0) { 2026 2027 CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep); 2028 ep->ord = 1; 2029 } 2030 2031 cm_id->add_ref(cm_id); 2032 ep->com.dev = dev; 2033 ep->com.cm_id = cm_id; 2034 ep->com.qp = get_qhp(dev, conn_param->qpn); 2035 2036 if (!ep->com.qp) { 2037 2038 CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep); 2039 err = -EINVAL; 2040 goto fail2; 2041 } 2042 ep->com.thread = curthread; 2043 ep->com.so = cm_id->so; 2044 2045 init_sock(&ep->com); 2046 2047 /* find a route */ 2048 err = find_route( 2049 cm_id->local_addr.sin_addr.s_addr, 2050 cm_id->remote_addr.sin_addr.s_addr, 2051 cm_id->local_addr.sin_port, 2052 cm_id->remote_addr.sin_port, 0, &nh4); 2053 2054 if (err) { 2055 2056 CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); 2057 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); 2058 err = -EHOSTUNREACH; 2059 goto fail2; 2060 } 2061 2062 if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) { 2063 2064 CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep); 2065 printf("%s - interface not TOE capable.\n", __func__); 2066 close_socket(&ep->com, 0); 2067 err = -ENOPROTOOPT; 2068 goto fail3; 2069 } 2070 tdev = TOEDEV(nh4.nh_ifp); 2071 2072 if (tdev == NULL) { 2073 2074 CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep); 2075 printf("%s - No toedev for interface.\n", __func__); 2076 goto fail3; 2077 } 2078 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 2079 2080 state_set(&ep->com, CONNECTING); 2081 ep->tos = 0; 2082 ep->com.local_addr = cm_id->local_addr; 2083 ep->com.remote_addr = cm_id->remote_addr; 2084 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 2085 ep->com.thread); 2086 2087 if (!err) { 2088 CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep); 2089 goto out; 2090 } else { 2091 close_socket(&ep->com, 0); 2092 goto fail2; 2093 } 2094 2095 fail3: 2096 CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); 2097 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 2098 fail2: 2099 cm_id->rem_ref(cm_id); 2100 c4iw_put_ep(&ep->com); 2101 out: 2102 CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); 2103 return err; 2104 } 2105 2106 /* 2107 * iwcm->create_listen_ep. Returns -errno on failure. 2108 */ 2109 int 2110 c4iw_create_listen_ep(struct iw_cm_id *cm_id, int backlog) 2111 { 2112 int rc; 2113 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 2114 struct c4iw_listen_ep *ep; 2115 struct socket *so = cm_id->so; 2116 2117 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 2118 CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__, 2119 cm_id, so, ep, so->so_pcb); 2120 if (ep == NULL) { 2121 log(LOG_ERR, "%s: failed to alloc memory for endpoint\n", 2122 __func__); 2123 rc = ENOMEM; 2124 goto failed; 2125 } 2126 2127 cm_id->add_ref(cm_id); 2128 ep->com.cm_id = cm_id; 2129 ep->com.dev = dev; 2130 ep->backlog = backlog; 2131 ep->com.local_addr = cm_id->local_addr; 2132 ep->com.thread = curthread; 2133 state_set(&ep->com, LISTEN); 2134 ep->com.so = so; 2135 2136 cm_id->provider_data = ep; 2137 return (0); 2138 2139 failed: 2140 CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc); 2141 return (-rc); 2142 } 2143 2144 void 2145 c4iw_destroy_listen_ep(struct iw_cm_id *cm_id) 2146 { 2147 struct c4iw_listen_ep *ep = to_listen_ep(cm_id); 2148 2149 CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, state %s", __func__, cm_id, 2150 cm_id->so, states[ep->com.state]); 2151 2152 state_set(&ep->com, DEAD); 2153 cm_id->rem_ref(cm_id); 2154 c4iw_put_ep(&ep->com); 2155 2156 return; 2157 } 2158 2159 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) 2160 { 2161 int ret = 0; 2162 int close = 0; 2163 int fatal = 0; 2164 struct c4iw_rdev *rdev; 2165 2166 mutex_lock(&ep->com.mutex); 2167 2168 CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep); 2169 2170 rdev = &ep->com.dev->rdev; 2171 2172 if (c4iw_fatal_error(rdev)) { 2173 2174 CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep); 2175 fatal = 1; 2176 close_complete_upcall(ep, -ECONNRESET); 2177 ep->com.state = DEAD; 2178 } 2179 CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, 2180 states[ep->com.state]); 2181 2182 switch (ep->com.state) { 2183 2184 case MPA_REQ_WAIT: 2185 case MPA_REQ_SENT: 2186 case MPA_REQ_RCVD: 2187 case MPA_REP_SENT: 2188 case FPDU_MODE: 2189 close = 1; 2190 if (abrupt) 2191 ep->com.state = ABORTING; 2192 else { 2193 ep->com.state = CLOSING; 2194 START_EP_TIMER(ep); 2195 } 2196 set_bit(CLOSE_SENT, &ep->com.flags); 2197 break; 2198 2199 case CLOSING: 2200 2201 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { 2202 2203 close = 1; 2204 if (abrupt) { 2205 STOP_EP_TIMER(ep); 2206 ep->com.state = ABORTING; 2207 } else 2208 ep->com.state = MORIBUND; 2209 } 2210 break; 2211 2212 case MORIBUND: 2213 case ABORTING: 2214 case DEAD: 2215 CTR3(KTR_IW_CXGBE, 2216 "%s ignoring disconnect ep %p state %u", __func__, 2217 ep, ep->com.state); 2218 break; 2219 2220 default: 2221 BUG(); 2222 break; 2223 } 2224 2225 mutex_unlock(&ep->com.mutex); 2226 2227 if (close) { 2228 2229 CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep); 2230 2231 if (abrupt) { 2232 2233 CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); 2234 set_bit(EP_DISC_ABORT, &ep->com.history); 2235 ret = abort_connection(ep); 2236 } else { 2237 2238 CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); 2239 set_bit(EP_DISC_CLOSE, &ep->com.history); 2240 2241 if (!ep->parent_ep) 2242 __state_set(&ep->com, MORIBUND); 2243 ret = shutdown_socket(&ep->com); 2244 } 2245 2246 if (ret) { 2247 2248 fatal = 1; 2249 } 2250 } 2251 2252 if (fatal) { 2253 2254 release_ep_resources(ep); 2255 CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); 2256 } 2257 CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); 2258 return ret; 2259 } 2260 2261 #ifdef C4IW_EP_REDIRECT 2262 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, 2263 struct l2t_entry *l2t) 2264 { 2265 struct c4iw_ep *ep = ctx; 2266 2267 if (ep->dst != old) 2268 return 0; 2269 2270 PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, 2271 l2t); 2272 dst_hold(new); 2273 cxgb4_l2t_release(ep->l2t); 2274 ep->l2t = l2t; 2275 dst_release(old); 2276 ep->dst = new; 2277 return 1; 2278 } 2279 #endif 2280 2281 2282 2283 static void ep_timeout(unsigned long arg) 2284 { 2285 struct c4iw_ep *ep = (struct c4iw_ep *)arg; 2286 int kickit = 0; 2287 2288 CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep); 2289 spin_lock(&timeout_lock); 2290 2291 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 2292 2293 list_add_tail(&ep->entry, &timeout_list); 2294 kickit = 1; 2295 } 2296 spin_unlock(&timeout_lock); 2297 2298 if (kickit) { 2299 2300 CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep); 2301 queue_work(c4iw_taskq, &c4iw_task); 2302 } 2303 CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep); 2304 } 2305 2306 static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl) 2307 { 2308 uint64_t val = be64toh(*rpl); 2309 int ret; 2310 struct c4iw_wr_wait *wr_waitp; 2311 2312 ret = (int)((val >> 8) & 0xff); 2313 wr_waitp = (struct c4iw_wr_wait *)rpl[1]; 2314 CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret); 2315 if (wr_waitp) 2316 c4iw_wake_up(wr_waitp, ret ? -ret : 0); 2317 2318 return (0); 2319 } 2320 2321 static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl) 2322 { 2323 struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]); 2324 2325 CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl); 2326 c4iw_ev_dispatch(sc->iwarp_softc, &cqe); 2327 2328 return (0); 2329 } 2330 2331 static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 2332 { 2333 struct adapter *sc = iq->adapter; 2334 const struct cpl_rdma_terminate *cpl = mtod(m, const void *); 2335 unsigned int tid = GET_TID(cpl); 2336 struct c4iw_qp_attributes attrs; 2337 struct toepcb *toep = lookup_tid(sc, tid); 2338 struct socket *so; 2339 struct c4iw_ep *ep; 2340 2341 INP_WLOCK(toep->inp); 2342 so = inp_inpcbtosocket(toep->inp); 2343 ep = so->so_rcv.sb_upcallarg; 2344 INP_WUNLOCK(toep->inp); 2345 2346 CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep); 2347 2348 if (ep && ep->com.qp) { 2349 2350 printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, 2351 ep->com.qp->wq.sq.qid); 2352 attrs.next_state = C4IW_QP_STATE_TERMINATE; 2353 c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 2354 1); 2355 } else 2356 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); 2357 CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep); 2358 2359 return 0; 2360 } 2361 2362 void 2363 c4iw_cm_init_cpl(struct adapter *sc) 2364 { 2365 2366 t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); 2367 t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, fw6_wr_rpl); 2368 t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, fw6_cqe_handler); 2369 t4_register_an_handler(sc, c4iw_ev_handler); 2370 } 2371 2372 void 2373 c4iw_cm_term_cpl(struct adapter *sc) 2374 { 2375 2376 t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); 2377 t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, NULL); 2378 t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, NULL); 2379 } 2380 2381 int __init c4iw_cm_init(void) 2382 { 2383 2384 TAILQ_INIT(&req_list); 2385 spin_lock_init(&req_lock); 2386 INIT_LIST_HEAD(&timeout_list); 2387 spin_lock_init(&timeout_lock); 2388 2389 INIT_WORK(&c4iw_task, process_req); 2390 2391 c4iw_taskq = create_singlethread_workqueue("iw_cxgbe"); 2392 if (!c4iw_taskq) 2393 return -ENOMEM; 2394 2395 2396 return 0; 2397 } 2398 2399 void __exit c4iw_cm_term(void) 2400 { 2401 WARN_ON(!TAILQ_EMPTY(&req_list)); 2402 WARN_ON(!list_empty(&timeout_list)); 2403 flush_workqueue(c4iw_taskq); 2404 destroy_workqueue(c4iw_taskq); 2405 } 2406 #endif 2407