1 /* 2 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 37 #ifdef TCP_OFFLOAD 38 #include <sys/types.h> 39 #include <sys/malloc.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <sys/sockio.h> 43 #include <sys/taskqueue.h> 44 #include <netinet/in.h> 45 #include <net/route.h> 46 47 #include <netinet/in_systm.h> 48 #include <netinet/in_pcb.h> 49 #include <netinet/ip.h> 50 #include <netinet/ip_var.h> 51 #include <netinet/tcp_var.h> 52 #include <netinet/tcp.h> 53 #include <netinet/tcpip.h> 54 55 #include <netinet/toecore.h> 56 57 struct sge_iq; 58 struct rss_header; 59 #include <linux/types.h> 60 #include "offload.h" 61 #include "tom/t4_tom.h" 62 63 #define TOEPCB(so) ((struct toepcb *)(so_sototcpcb((so))->t_toe)) 64 65 #include "iw_cxgbe.h" 66 #include <linux/module.h> 67 #include <linux/workqueue.h> 68 #include <linux/notifier.h> 69 #include <linux/inetdevice.h> 70 #include <linux/if_vlan.h> 71 #include <net/netevent.h> 72 73 static spinlock_t req_lock; 74 static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; 75 static struct work_struct c4iw_task; 76 static struct workqueue_struct *c4iw_taskq; 77 static LIST_HEAD(timeout_list); 78 static spinlock_t timeout_lock; 79 80 static void process_req(struct work_struct *ctx); 81 static void start_ep_timer(struct c4iw_ep *ep); 82 static void stop_ep_timer(struct c4iw_ep *ep); 83 static int set_tcpinfo(struct c4iw_ep *ep); 84 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); 85 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); 86 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); 87 static void *alloc_ep(int size, gfp_t flags); 88 void __free_ep(struct c4iw_ep_common *epc); 89 static struct rtentry * find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 90 __be16 peer_port, u8 tos); 91 static int close_socket(struct c4iw_ep_common *epc, int close); 92 static int shutdown_socket(struct c4iw_ep_common *epc); 93 static void abort_socket(struct c4iw_ep *ep); 94 static void send_mpa_req(struct c4iw_ep *ep); 95 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); 96 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); 97 static void close_complete_upcall(struct c4iw_ep *ep, int status); 98 static int abort_connection(struct c4iw_ep *ep); 99 static void peer_close_upcall(struct c4iw_ep *ep); 100 static void peer_abort_upcall(struct c4iw_ep *ep); 101 static void connect_reply_upcall(struct c4iw_ep *ep, int status); 102 static int connect_request_upcall(struct c4iw_ep *ep); 103 static void established_upcall(struct c4iw_ep *ep); 104 static void process_mpa_reply(struct c4iw_ep *ep); 105 static void process_mpa_request(struct c4iw_ep *ep); 106 static void process_peer_close(struct c4iw_ep *ep); 107 static void process_conn_error(struct c4iw_ep *ep); 108 static void process_close_complete(struct c4iw_ep *ep); 109 static void ep_timeout(unsigned long arg); 110 static void init_sock(struct c4iw_ep_common *epc); 111 static void process_data(struct c4iw_ep *ep); 112 static void process_connected(struct c4iw_ep *ep); 113 static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep); 114 static void process_newconn(struct c4iw_ep *parent_ep); 115 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); 116 static void process_socket_event(struct c4iw_ep *ep); 117 static void release_ep_resources(struct c4iw_ep *ep); 118 119 #define START_EP_TIMER(ep) \ 120 do { \ 121 CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ 122 __func__, __LINE__, (ep)); \ 123 start_ep_timer(ep); \ 124 } while (0) 125 126 #define STOP_EP_TIMER(ep) \ 127 do { \ 128 CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ 129 __func__, __LINE__, (ep)); \ 130 stop_ep_timer(ep); \ 131 } while (0) 132 133 #ifdef KTR 134 static char *states[] = { 135 "idle", 136 "listen", 137 "connecting", 138 "mpa_wait_req", 139 "mpa_req_sent", 140 "mpa_req_rcvd", 141 "mpa_rep_sent", 142 "fpdu_mode", 143 "aborting", 144 "closing", 145 "moribund", 146 "dead", 147 NULL, 148 }; 149 #endif 150 151 static void 152 process_req(struct work_struct *ctx) 153 { 154 struct c4iw_ep_common *epc; 155 156 spin_lock(&req_lock); 157 while (!TAILQ_EMPTY(&req_list)) { 158 epc = TAILQ_FIRST(&req_list); 159 TAILQ_REMOVE(&req_list, epc, entry); 160 epc->entry.tqe_prev = NULL; 161 spin_unlock(&req_lock); 162 if (epc->so) 163 process_socket_event((struct c4iw_ep *)epc); 164 c4iw_put_ep(epc); 165 spin_lock(&req_lock); 166 } 167 spin_unlock(&req_lock); 168 } 169 170 /* 171 * XXX: doesn't belong here in the iWARP driver. 172 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is 173 * set. Is this a valid assumption for active open? 174 */ 175 static int 176 set_tcpinfo(struct c4iw_ep *ep) 177 { 178 struct socket *so = ep->com.so; 179 struct inpcb *inp = sotoinpcb(so); 180 struct tcpcb *tp; 181 struct toepcb *toep; 182 int rc = 0; 183 184 INP_WLOCK(inp); 185 tp = intotcpcb(inp); 186 if ((tp->t_flags & TF_TOE) == 0) { 187 rc = EINVAL; 188 log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n", 189 __func__, so, ep); 190 goto done; 191 } 192 toep = TOEPCB(so); 193 194 ep->hwtid = toep->tid; 195 ep->snd_seq = tp->snd_nxt; 196 ep->rcv_seq = tp->rcv_nxt; 197 ep->emss = max(tp->t_maxseg, 128); 198 done: 199 INP_WUNLOCK(inp); 200 return (rc); 201 202 } 203 204 static struct rtentry * 205 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 206 __be16 peer_port, u8 tos) 207 { 208 struct route iproute; 209 struct sockaddr_in *dst = (struct sockaddr_in *)&iproute.ro_dst; 210 211 CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, 212 peer_ip, ntohs(local_port), ntohs(peer_port)); 213 bzero(&iproute, sizeof iproute); 214 dst->sin_family = AF_INET; 215 dst->sin_len = sizeof *dst; 216 dst->sin_addr.s_addr = peer_ip; 217 218 rtalloc(&iproute); 219 CTR2(KTR_IW_CXGBE, "%s:frtE %p", __func__, (uint64_t)iproute.ro_rt); 220 return iproute.ro_rt; 221 } 222 223 static int 224 close_socket(struct c4iw_ep_common *epc, int close) 225 { 226 struct socket *so = epc->so; 227 int rc; 228 229 CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so, 230 states[epc->state]); 231 232 SOCK_LOCK(so); 233 soupcall_clear(so, SO_RCV); 234 SOCK_UNLOCK(so); 235 236 if (close) 237 rc = soclose(so); 238 else 239 rc = soshutdown(so, SHUT_WR | SHUT_RD); 240 epc->so = NULL; 241 242 return (rc); 243 } 244 245 static int 246 shutdown_socket(struct c4iw_ep_common *epc) 247 { 248 249 CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc, 250 states[epc->state]); 251 252 return (soshutdown(epc->so, SHUT_WR)); 253 } 254 255 static void 256 abort_socket(struct c4iw_ep *ep) 257 { 258 struct sockopt sopt; 259 int rc; 260 struct linger l; 261 262 CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so, 263 states[ep->com.state]); 264 265 l.l_onoff = 1; 266 l.l_linger = 0; 267 268 /* linger_time of 0 forces RST to be sent */ 269 sopt.sopt_dir = SOPT_SET; 270 sopt.sopt_level = SOL_SOCKET; 271 sopt.sopt_name = SO_LINGER; 272 sopt.sopt_val = (caddr_t)&l; 273 sopt.sopt_valsize = sizeof l; 274 sopt.sopt_td = NULL; 275 rc = sosetopt(ep->com.so, &sopt); 276 if (rc) { 277 log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n", 278 __func__, rc); 279 } 280 } 281 282 static void 283 process_peer_close(struct c4iw_ep *ep) 284 { 285 struct c4iw_qp_attributes attrs; 286 int disconnect = 1; 287 int release = 0; 288 289 CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, 290 ep->com.so, states[ep->com.state]); 291 292 mutex_lock(&ep->com.mutex); 293 switch (ep->com.state) { 294 295 case MPA_REQ_WAIT: 296 CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", 297 __func__, ep); 298 __state_set(&ep->com, CLOSING); 299 break; 300 301 case MPA_REQ_SENT: 302 CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", 303 __func__, ep); 304 __state_set(&ep->com, DEAD); 305 connect_reply_upcall(ep, -ECONNABORTED); 306 307 disconnect = 0; 308 STOP_EP_TIMER(ep); 309 close_socket(&ep->com, 0); 310 ep->com.cm_id->rem_ref(ep->com.cm_id); 311 ep->com.cm_id = NULL; 312 ep->com.qp = NULL; 313 release = 1; 314 break; 315 316 case MPA_REQ_RCVD: 317 318 /* 319 * We're gonna mark this puppy DEAD, but keep 320 * the reference on it until the ULP accepts or 321 * rejects the CR. 322 */ 323 CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", 324 __func__, ep); 325 __state_set(&ep->com, CLOSING); 326 c4iw_get_ep(&ep->com); 327 break; 328 329 case MPA_REP_SENT: 330 CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", 331 __func__, ep); 332 __state_set(&ep->com, CLOSING); 333 break; 334 335 case FPDU_MODE: 336 CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", 337 __func__, ep); 338 START_EP_TIMER(ep); 339 __state_set(&ep->com, CLOSING); 340 attrs.next_state = C4IW_QP_STATE_CLOSING; 341 c4iw_modify_qp(ep->com.dev, ep->com.qp, 342 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 343 peer_close_upcall(ep); 344 break; 345 346 case ABORTING: 347 CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)", 348 __func__, ep); 349 disconnect = 0; 350 break; 351 352 case CLOSING: 353 CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", 354 __func__, ep); 355 __state_set(&ep->com, MORIBUND); 356 disconnect = 0; 357 break; 358 359 case MORIBUND: 360 CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__, 361 ep); 362 STOP_EP_TIMER(ep); 363 if (ep->com.cm_id && ep->com.qp) { 364 attrs.next_state = C4IW_QP_STATE_IDLE; 365 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 366 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 367 } 368 close_socket(&ep->com, 0); 369 close_complete_upcall(ep, 0); 370 __state_set(&ep->com, DEAD); 371 release = 1; 372 disconnect = 0; 373 break; 374 375 case DEAD: 376 CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)", 377 __func__, ep); 378 disconnect = 0; 379 break; 380 381 default: 382 panic("%s: ep %p state %d", __func__, ep, 383 ep->com.state); 384 break; 385 } 386 387 mutex_unlock(&ep->com.mutex); 388 389 if (disconnect) { 390 391 CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep); 392 c4iw_ep_disconnect(ep, 0, M_NOWAIT); 393 } 394 if (release) { 395 396 CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep); 397 c4iw_put_ep(&ep->com); 398 } 399 CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep); 400 return; 401 } 402 403 static void 404 process_conn_error(struct c4iw_ep *ep) 405 { 406 struct c4iw_qp_attributes attrs; 407 int ret; 408 int state; 409 410 state = state_read(&ep->com); 411 CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", 412 __func__, ep, ep->com.so, ep->com.so->so_error, 413 states[ep->com.state]); 414 415 switch (state) { 416 417 case MPA_REQ_WAIT: 418 STOP_EP_TIMER(ep); 419 break; 420 421 case MPA_REQ_SENT: 422 STOP_EP_TIMER(ep); 423 connect_reply_upcall(ep, -ECONNRESET); 424 break; 425 426 case MPA_REP_SENT: 427 ep->com.rpl_err = ECONNRESET; 428 CTR1(KTR_IW_CXGBE, "waking up ep %p", ep); 429 break; 430 431 case MPA_REQ_RCVD: 432 433 /* 434 * We're gonna mark this puppy DEAD, but keep 435 * the reference on it until the ULP accepts or 436 * rejects the CR. 437 */ 438 c4iw_get_ep(&ep->com); 439 break; 440 441 case MORIBUND: 442 case CLOSING: 443 STOP_EP_TIMER(ep); 444 /*FALLTHROUGH*/ 445 case FPDU_MODE: 446 447 if (ep->com.cm_id && ep->com.qp) { 448 449 attrs.next_state = C4IW_QP_STATE_ERROR; 450 ret = c4iw_modify_qp(ep->com.qp->rhp, 451 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 452 &attrs, 1); 453 if (ret) 454 log(LOG_ERR, 455 "%s - qp <- error failed!\n", 456 __func__); 457 } 458 peer_abort_upcall(ep); 459 break; 460 461 case ABORTING: 462 break; 463 464 case DEAD: 465 CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", 466 __func__, ep->com.so->so_error); 467 return; 468 469 default: 470 panic("%s: ep %p state %d", __func__, ep, state); 471 break; 472 } 473 474 if (state != ABORTING) { 475 476 CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep); 477 close_socket(&ep->com, 1); 478 state_set(&ep->com, DEAD); 479 c4iw_put_ep(&ep->com); 480 } 481 CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); 482 return; 483 } 484 485 static void 486 process_close_complete(struct c4iw_ep *ep) 487 { 488 struct c4iw_qp_attributes attrs; 489 int release = 0; 490 491 CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, 492 ep->com.so, states[ep->com.state]); 493 494 /* The cm_id may be null if we failed to connect */ 495 mutex_lock(&ep->com.mutex); 496 497 switch (ep->com.state) { 498 499 case CLOSING: 500 CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", 501 __func__, ep); 502 __state_set(&ep->com, MORIBUND); 503 break; 504 505 case MORIBUND: 506 CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__, 507 ep); 508 STOP_EP_TIMER(ep); 509 510 if ((ep->com.cm_id) && (ep->com.qp)) { 511 512 CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE", 513 __func__, ep); 514 attrs.next_state = C4IW_QP_STATE_IDLE; 515 c4iw_modify_qp(ep->com.dev, 516 ep->com.qp, 517 C4IW_QP_ATTR_NEXT_STATE, 518 &attrs, 1); 519 } 520 521 if (ep->parent_ep) { 522 523 CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep); 524 close_socket(&ep->com, 1); 525 } 526 else { 527 528 CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep); 529 close_socket(&ep->com, 0); 530 } 531 close_complete_upcall(ep, 0); 532 __state_set(&ep->com, DEAD); 533 release = 1; 534 break; 535 536 case ABORTING: 537 CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep); 538 break; 539 540 case DEAD: 541 default: 542 CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep); 543 panic("%s:pcc6 %p DEAD", __func__, ep); 544 break; 545 } 546 mutex_unlock(&ep->com.mutex); 547 548 if (release) { 549 550 CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep); 551 c4iw_put_ep(&ep->com); 552 } 553 CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); 554 return; 555 } 556 557 static void 558 init_sock(struct c4iw_ep_common *epc) 559 { 560 int rc; 561 struct sockopt sopt; 562 struct socket *so = epc->so; 563 int on = 1; 564 565 SOCK_LOCK(so); 566 soupcall_set(so, SO_RCV, c4iw_so_upcall, epc); 567 so->so_state |= SS_NBIO; 568 SOCK_UNLOCK(so); 569 sopt.sopt_dir = SOPT_SET; 570 sopt.sopt_level = IPPROTO_TCP; 571 sopt.sopt_name = TCP_NODELAY; 572 sopt.sopt_val = (caddr_t)&on; 573 sopt.sopt_valsize = sizeof on; 574 sopt.sopt_td = NULL; 575 rc = sosetopt(so, &sopt); 576 if (rc) { 577 log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", 578 __func__, so, rc); 579 } 580 } 581 582 static void 583 process_data(struct c4iw_ep *ep) 584 { 585 struct sockaddr_in *local, *remote; 586 587 CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, 588 ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); 589 590 switch (state_read(&ep->com)) { 591 case MPA_REQ_SENT: 592 process_mpa_reply(ep); 593 break; 594 case MPA_REQ_WAIT: 595 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 596 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 597 ep->com.local_addr = *local; 598 ep->com.remote_addr = *remote; 599 free(local, M_SONAME); 600 free(remote, M_SONAME); 601 process_mpa_request(ep); 602 break; 603 default: 604 if (sbused(&ep->com.so->so_rcv)) 605 log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " 606 "state %d, so %p, so_state 0x%x, sbused %u\n", 607 __func__, ep, state_read(&ep->com), ep->com.so, 608 ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); 609 break; 610 } 611 } 612 613 static void 614 process_connected(struct c4iw_ep *ep) 615 { 616 617 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) 618 send_mpa_req(ep); 619 else { 620 connect_reply_upcall(ep, -ep->com.so->so_error); 621 close_socket(&ep->com, 0); 622 state_set(&ep->com, DEAD); 623 c4iw_put_ep(&ep->com); 624 } 625 } 626 627 static struct socket * 628 dequeue_socket(struct socket *head, struct sockaddr_in **remote, 629 struct c4iw_ep *child_ep) 630 { 631 struct socket *so; 632 633 ACCEPT_LOCK(); 634 so = TAILQ_FIRST(&head->so_comp); 635 if (!so) { 636 ACCEPT_UNLOCK(); 637 return (NULL); 638 } 639 TAILQ_REMOVE(&head->so_comp, so, so_list); 640 head->so_qlen--; 641 SOCK_LOCK(so); 642 so->so_qstate &= ~SQ_COMP; 643 so->so_head = NULL; 644 soref(so); 645 soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep); 646 so->so_state |= SS_NBIO; 647 SOCK_UNLOCK(so); 648 ACCEPT_UNLOCK(); 649 soaccept(so, (struct sockaddr **)remote); 650 651 return (so); 652 } 653 654 static void 655 process_newconn(struct c4iw_ep *parent_ep) 656 { 657 struct socket *child_so; 658 struct c4iw_ep *child_ep; 659 struct sockaddr_in *remote; 660 661 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 662 if (!child_ep) { 663 CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", 664 __func__, parent_ep->com.so, parent_ep); 665 log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); 666 return; 667 } 668 669 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 670 if (!child_so) { 671 CTR4(KTR_IW_CXGBE, 672 "%s: parent so %p, parent ep %p, child ep %p, dequeue err", 673 __func__, parent_ep->com.so, parent_ep, child_ep); 674 log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__); 675 __free_ep(&child_ep->com); 676 return; 677 678 } 679 680 CTR5(KTR_IW_CXGBE, 681 "%s: parent so %p, parent ep %p, child so %p, child ep %p", 682 __func__, parent_ep->com.so, parent_ep, child_so, child_ep); 683 684 child_ep->com.local_addr = parent_ep->com.local_addr; 685 child_ep->com.remote_addr = *remote; 686 child_ep->com.dev = parent_ep->com.dev; 687 child_ep->com.so = child_so; 688 child_ep->com.cm_id = NULL; 689 child_ep->com.thread = parent_ep->com.thread; 690 child_ep->parent_ep = parent_ep; 691 692 free(remote, M_SONAME); 693 c4iw_get_ep(&parent_ep->com); 694 child_ep->parent_ep = parent_ep; 695 init_timer(&child_ep->timer); 696 state_set(&child_ep->com, MPA_REQ_WAIT); 697 START_EP_TIMER(child_ep); 698 699 /* maybe the request has already been queued up on the socket... */ 700 process_mpa_request(child_ep); 701 } 702 703 static int 704 c4iw_so_upcall(struct socket *so, void *arg, int waitflag) 705 { 706 struct c4iw_ep *ep = arg; 707 708 spin_lock(&req_lock); 709 710 CTR6(KTR_IW_CXGBE, 711 "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p", 712 __func__, so, so->so_state, ep, states[ep->com.state], 713 ep->com.entry.tqe_prev); 714 715 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 716 KASSERT(ep->com.so == so, ("%s: XXX review.", __func__)); 717 c4iw_get_ep(&ep->com); 718 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 719 queue_work(c4iw_taskq, &c4iw_task); 720 } 721 722 spin_unlock(&req_lock); 723 return (SU_OK); 724 } 725 726 static void 727 process_socket_event(struct c4iw_ep *ep) 728 { 729 int state = state_read(&ep->com); 730 struct socket *so = ep->com.so; 731 732 CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " 733 "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, 734 so->so_error, so->so_rcv.sb_state, ep, states[state]); 735 736 if (state == CONNECTING) { 737 process_connected(ep); 738 return; 739 } 740 741 if (state == LISTEN) { 742 process_newconn(ep); 743 return; 744 } 745 746 /* connection error */ 747 if (so->so_error) { 748 process_conn_error(ep); 749 return; 750 } 751 752 /* peer close */ 753 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 754 process_peer_close(ep); 755 return; 756 } 757 758 /* close complete */ 759 if (so->so_state & SS_ISDISCONNECTED) { 760 process_close_complete(ep); 761 return; 762 } 763 764 /* rx data */ 765 process_data(ep); 766 } 767 768 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters"); 769 770 int db_delay_usecs = 1; 771 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RWTUN, &db_delay_usecs, 0, 772 "Usecs to delay awaiting db fifo to drain"); 773 774 static int dack_mode = 1; 775 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0, 776 "Delayed ack mode (default = 1)"); 777 778 int c4iw_max_read_depth = 8; 779 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0, 780 "Per-connection max ORD/IRD (default = 8)"); 781 782 static int enable_tcp_timestamps; 783 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0, 784 "Enable tcp timestamps (default = 0)"); 785 786 static int enable_tcp_sack; 787 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0, 788 "Enable tcp SACK (default = 0)"); 789 790 static int enable_tcp_window_scaling = 1; 791 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0, 792 "Enable tcp window scaling (default = 1)"); 793 794 int c4iw_debug = 1; 795 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0, 796 "Enable debug logging (default = 0)"); 797 798 static int peer2peer; 799 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0, 800 "Support peer2peer ULPs (default = 0)"); 801 802 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; 803 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0, 804 "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)"); 805 806 static int ep_timeout_secs = 60; 807 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, 808 "CM Endpoint operation timeout in seconds (default = 60)"); 809 810 static int mpa_rev = 1; 811 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, 812 "MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)"); 813 814 static int markers_enabled; 815 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, 816 "Enable MPA MARKERS (default(0) = disabled)"); 817 818 static int crc_enabled = 1; 819 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, 820 "Enable MPA CRC (default(1) = enabled)"); 821 822 static int rcv_win = 256 * 1024; 823 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, 824 "TCP receive window in bytes (default = 256KB)"); 825 826 static int snd_win = 128 * 1024; 827 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, 828 "TCP send window in bytes (default = 128KB)"); 829 830 int db_fc_threshold = 2000; 831 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RWTUN, &db_fc_threshold, 0, 832 "QP count/threshold that triggers automatic"); 833 834 static void 835 start_ep_timer(struct c4iw_ep *ep) 836 { 837 838 if (timer_pending(&ep->timer)) { 839 CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep); 840 printk(KERN_ERR "%s timer already started! ep %p\n", __func__, 841 ep); 842 return; 843 } 844 clear_bit(TIMEOUT, &ep->com.flags); 845 c4iw_get_ep(&ep->com); 846 ep->timer.expires = jiffies + ep_timeout_secs * HZ; 847 ep->timer.data = (unsigned long)ep; 848 ep->timer.function = ep_timeout; 849 add_timer(&ep->timer); 850 } 851 852 static void 853 stop_ep_timer(struct c4iw_ep *ep) 854 { 855 856 del_timer_sync(&ep->timer); 857 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 858 c4iw_put_ep(&ep->com); 859 } 860 } 861 862 static enum 863 c4iw_ep_state state_read(struct c4iw_ep_common *epc) 864 { 865 enum c4iw_ep_state state; 866 867 mutex_lock(&epc->mutex); 868 state = epc->state; 869 mutex_unlock(&epc->mutex); 870 871 return (state); 872 } 873 874 static void 875 __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 876 { 877 878 epc->state = new; 879 } 880 881 static void 882 state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 883 { 884 885 mutex_lock(&epc->mutex); 886 __state_set(epc, new); 887 mutex_unlock(&epc->mutex); 888 } 889 890 static void * 891 alloc_ep(int size, gfp_t gfp) 892 { 893 struct c4iw_ep_common *epc; 894 895 epc = kzalloc(size, gfp); 896 if (epc == NULL) 897 return (NULL); 898 899 kref_init(&epc->kref); 900 mutex_init(&epc->mutex); 901 c4iw_init_wr_wait(&epc->wr_wait); 902 903 return (epc); 904 } 905 906 void 907 __free_ep(struct c4iw_ep_common *epc) 908 { 909 CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc); 910 KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so)); 911 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc)); 912 free(epc, M_DEVBUF); 913 CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc); 914 } 915 916 void _c4iw_free_ep(struct kref *kref) 917 { 918 struct c4iw_ep *ep; 919 struct c4iw_ep_common *epc; 920 921 ep = container_of(kref, struct c4iw_ep, com.kref); 922 epc = &ep->com; 923 KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so)); 924 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", 925 __func__, epc)); 926 kfree(ep); 927 } 928 929 static void release_ep_resources(struct c4iw_ep *ep) 930 { 931 CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep); 932 set_bit(RELEASE_RESOURCES, &ep->com.flags); 933 c4iw_put_ep(&ep->com); 934 CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep); 935 } 936 937 static void 938 send_mpa_req(struct c4iw_ep *ep) 939 { 940 int mpalen; 941 struct mpa_message *mpa; 942 struct mpa_v2_conn_params mpa_v2_params; 943 struct mbuf *m; 944 char mpa_rev_to_use = mpa_rev; 945 int err; 946 947 if (ep->retry_with_mpa_v1) 948 mpa_rev_to_use = 1; 949 mpalen = sizeof(*mpa) + ep->plen; 950 if (mpa_rev_to_use == 2) 951 mpalen += sizeof(struct mpa_v2_conn_params); 952 953 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 954 if (mpa == NULL) { 955 failed: 956 connect_reply_upcall(ep, -ENOMEM); 957 return; 958 } 959 960 memset(mpa, 0, mpalen); 961 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 962 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 963 (markers_enabled ? MPA_MARKERS : 0) | 964 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); 965 mpa->private_data_size = htons(ep->plen); 966 mpa->revision = mpa_rev_to_use; 967 968 if (mpa_rev_to_use == 1) { 969 ep->tried_with_mpa_v1 = 1; 970 ep->retry_with_mpa_v1 = 0; 971 } 972 973 if (mpa_rev_to_use == 2) { 974 mpa->private_data_size += 975 htons(sizeof(struct mpa_v2_conn_params)); 976 mpa_v2_params.ird = htons((u16)ep->ird); 977 mpa_v2_params.ord = htons((u16)ep->ord); 978 979 if (peer2peer) { 980 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 981 982 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { 983 mpa_v2_params.ord |= 984 htons(MPA_V2_RDMA_WRITE_RTR); 985 } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { 986 mpa_v2_params.ord |= 987 htons(MPA_V2_RDMA_READ_RTR); 988 } 989 } 990 memcpy(mpa->private_data, &mpa_v2_params, 991 sizeof(struct mpa_v2_conn_params)); 992 993 if (ep->plen) { 994 995 memcpy(mpa->private_data + 996 sizeof(struct mpa_v2_conn_params), 997 ep->mpa_pkt + sizeof(*mpa), ep->plen); 998 } 999 } else { 1000 1001 if (ep->plen) 1002 memcpy(mpa->private_data, 1003 ep->mpa_pkt + sizeof(*mpa), ep->plen); 1004 CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep); 1005 } 1006 1007 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1008 if (m == NULL) { 1009 free(mpa, M_CXGBE); 1010 goto failed; 1011 } 1012 m_copyback(m, 0, mpalen, (void *)mpa); 1013 free(mpa, M_CXGBE); 1014 1015 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 1016 ep->com.thread); 1017 if (err) 1018 goto failed; 1019 1020 START_EP_TIMER(ep); 1021 state_set(&ep->com, MPA_REQ_SENT); 1022 ep->mpa_attr.initiator = 1; 1023 } 1024 1025 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) 1026 { 1027 int mpalen ; 1028 struct mpa_message *mpa; 1029 struct mpa_v2_conn_params mpa_v2_params; 1030 struct mbuf *m; 1031 int err; 1032 1033 CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid, 1034 ep->plen); 1035 1036 mpalen = sizeof(*mpa) + plen; 1037 1038 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1039 1040 mpalen += sizeof(struct mpa_v2_conn_params); 1041 CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep, 1042 ep->mpa_attr.version, mpalen); 1043 } 1044 1045 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 1046 if (mpa == NULL) 1047 return (-ENOMEM); 1048 1049 memset(mpa, 0, mpalen); 1050 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1051 mpa->flags = MPA_REJECT; 1052 mpa->revision = mpa_rev; 1053 mpa->private_data_size = htons(plen); 1054 1055 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1056 1057 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1058 mpa->private_data_size += 1059 htons(sizeof(struct mpa_v2_conn_params)); 1060 mpa_v2_params.ird = htons(((u16)ep->ird) | 1061 (peer2peer ? MPA_V2_PEER2PEER_MODEL : 1062 0)); 1063 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? 1064 (p2p_type == 1065 FW_RI_INIT_P2PTYPE_RDMA_WRITE ? 1066 MPA_V2_RDMA_WRITE_RTR : p2p_type == 1067 FW_RI_INIT_P2PTYPE_READ_REQ ? 1068 MPA_V2_RDMA_READ_RTR : 0) : 0)); 1069 memcpy(mpa->private_data, &mpa_v2_params, 1070 sizeof(struct mpa_v2_conn_params)); 1071 1072 if (ep->plen) 1073 memcpy(mpa->private_data + 1074 sizeof(struct mpa_v2_conn_params), pdata, plen); 1075 CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, 1076 mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); 1077 } else 1078 if (plen) 1079 memcpy(mpa->private_data, pdata, plen); 1080 1081 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1082 if (m == NULL) { 1083 free(mpa, M_CXGBE); 1084 return (-ENOMEM); 1085 } 1086 m_copyback(m, 0, mpalen, (void *)mpa); 1087 free(mpa, M_CXGBE); 1088 1089 err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 1090 if (!err) 1091 ep->snd_seq += mpalen; 1092 CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err); 1093 return err; 1094 } 1095 1096 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) 1097 { 1098 int mpalen; 1099 struct mpa_message *mpa; 1100 struct mbuf *m; 1101 struct mpa_v2_conn_params mpa_v2_params; 1102 int err; 1103 1104 CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep); 1105 1106 mpalen = sizeof(*mpa) + plen; 1107 1108 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1109 1110 CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep, 1111 ep->mpa_attr.version); 1112 mpalen += sizeof(struct mpa_v2_conn_params); 1113 } 1114 1115 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 1116 if (mpa == NULL) 1117 return (-ENOMEM); 1118 1119 memset(mpa, 0, sizeof(*mpa)); 1120 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1121 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 1122 (markers_enabled ? MPA_MARKERS : 0); 1123 mpa->revision = ep->mpa_attr.version; 1124 mpa->private_data_size = htons(plen); 1125 1126 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1127 1128 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1129 mpa->private_data_size += 1130 htons(sizeof(struct mpa_v2_conn_params)); 1131 mpa_v2_params.ird = htons((u16)ep->ird); 1132 mpa_v2_params.ord = htons((u16)ep->ord); 1133 CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep, 1134 ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord); 1135 1136 if (peer2peer && (ep->mpa_attr.p2p_type != 1137 FW_RI_INIT_P2PTYPE_DISABLED)) { 1138 1139 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1140 1141 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { 1142 1143 mpa_v2_params.ord |= 1144 htons(MPA_V2_RDMA_WRITE_RTR); 1145 CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d", 1146 __func__, ep, p2p_type, mpa_v2_params.ird, 1147 mpa_v2_params.ord); 1148 } 1149 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { 1150 1151 mpa_v2_params.ord |= 1152 htons(MPA_V2_RDMA_READ_RTR); 1153 CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d", 1154 __func__, ep, p2p_type, mpa_v2_params.ird, 1155 mpa_v2_params.ord); 1156 } 1157 } 1158 1159 memcpy(mpa->private_data, &mpa_v2_params, 1160 sizeof(struct mpa_v2_conn_params)); 1161 1162 if (ep->plen) 1163 memcpy(mpa->private_data + 1164 sizeof(struct mpa_v2_conn_params), pdata, plen); 1165 } else 1166 if (plen) 1167 memcpy(mpa->private_data, pdata, plen); 1168 1169 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1170 if (m == NULL) { 1171 free(mpa, M_CXGBE); 1172 return (-ENOMEM); 1173 } 1174 m_copyback(m, 0, mpalen, (void *)mpa); 1175 free(mpa, M_CXGBE); 1176 1177 1178 state_set(&ep->com, MPA_REP_SENT); 1179 ep->snd_seq += mpalen; 1180 err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 1181 ep->com.thread); 1182 CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err); 1183 return err; 1184 } 1185 1186 1187 1188 static void close_complete_upcall(struct c4iw_ep *ep, int status) 1189 { 1190 struct iw_cm_event event; 1191 1192 CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep); 1193 memset(&event, 0, sizeof(event)); 1194 event.event = IW_CM_EVENT_CLOSE; 1195 event.status = status; 1196 1197 if (ep->com.cm_id) { 1198 1199 CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep); 1200 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1201 ep->com.cm_id->rem_ref(ep->com.cm_id); 1202 ep->com.cm_id = NULL; 1203 ep->com.qp = NULL; 1204 set_bit(CLOSE_UPCALL, &ep->com.history); 1205 } 1206 CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); 1207 } 1208 1209 static int abort_connection(struct c4iw_ep *ep) 1210 { 1211 int err; 1212 1213 CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep); 1214 state_set(&ep->com, ABORTING); 1215 abort_socket(ep); 1216 err = close_socket(&ep->com, 0); 1217 set_bit(ABORT_CONN, &ep->com.history); 1218 CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep); 1219 return err; 1220 } 1221 1222 static void peer_close_upcall(struct c4iw_ep *ep) 1223 { 1224 struct iw_cm_event event; 1225 1226 CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep); 1227 memset(&event, 0, sizeof(event)); 1228 event.event = IW_CM_EVENT_DISCONNECT; 1229 1230 if (ep->com.cm_id) { 1231 1232 CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep); 1233 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1234 set_bit(DISCONN_UPCALL, &ep->com.history); 1235 } 1236 CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep); 1237 } 1238 1239 static void peer_abort_upcall(struct c4iw_ep *ep) 1240 { 1241 struct iw_cm_event event; 1242 1243 CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep); 1244 memset(&event, 0, sizeof(event)); 1245 event.event = IW_CM_EVENT_CLOSE; 1246 event.status = -ECONNRESET; 1247 1248 if (ep->com.cm_id) { 1249 1250 CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep); 1251 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1252 ep->com.cm_id->rem_ref(ep->com.cm_id); 1253 ep->com.cm_id = NULL; 1254 ep->com.qp = NULL; 1255 set_bit(ABORT_UPCALL, &ep->com.history); 1256 } 1257 CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep); 1258 } 1259 1260 static void connect_reply_upcall(struct c4iw_ep *ep, int status) 1261 { 1262 struct iw_cm_event event; 1263 1264 CTR3(KTR_IW_CXGBE, "%s:cruB %p", __func__, ep, status); 1265 memset(&event, 0, sizeof(event)); 1266 event.event = IW_CM_EVENT_CONNECT_REPLY; 1267 event.status = (status ==-ECONNABORTED)?-ECONNRESET: status; 1268 event.local_addr = ep->com.local_addr; 1269 event.remote_addr = ep->com.remote_addr; 1270 1271 if ((status == 0) || (status == -ECONNREFUSED)) { 1272 1273 if (!ep->tried_with_mpa_v1) { 1274 1275 CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); 1276 /* this means MPA_v2 is used */ 1277 event.private_data_len = ep->plen - 1278 sizeof(struct mpa_v2_conn_params); 1279 event.private_data = ep->mpa_pkt + 1280 sizeof(struct mpa_message) + 1281 sizeof(struct mpa_v2_conn_params); 1282 } else { 1283 1284 CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); 1285 /* this means MPA_v1 is used */ 1286 event.private_data_len = ep->plen; 1287 event.private_data = ep->mpa_pkt + 1288 sizeof(struct mpa_message); 1289 } 1290 } 1291 1292 if (ep->com.cm_id) { 1293 1294 CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep); 1295 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1296 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1297 } 1298 1299 if(status == -ECONNABORTED) { 1300 1301 CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status); 1302 return; 1303 } 1304 1305 if (status < 0) { 1306 1307 CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status); 1308 ep->com.cm_id->rem_ref(ep->com.cm_id); 1309 ep->com.cm_id = NULL; 1310 ep->com.qp = NULL; 1311 } 1312 1313 CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep); 1314 } 1315 1316 static int connect_request_upcall(struct c4iw_ep *ep) 1317 { 1318 struct iw_cm_event event; 1319 int ret; 1320 1321 CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep, 1322 ep->tried_with_mpa_v1); 1323 1324 memset(&event, 0, sizeof(event)); 1325 event.event = IW_CM_EVENT_CONNECT_REQUEST; 1326 event.local_addr = ep->com.local_addr; 1327 event.remote_addr = ep->com.remote_addr; 1328 event.provider_data = ep; 1329 event.so = ep->com.so; 1330 1331 if (!ep->tried_with_mpa_v1) { 1332 /* this means MPA_v2 is used */ 1333 event.ord = ep->ord; 1334 event.ird = ep->ird; 1335 event.private_data_len = ep->plen - 1336 sizeof(struct mpa_v2_conn_params); 1337 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + 1338 sizeof(struct mpa_v2_conn_params); 1339 } else { 1340 1341 /* this means MPA_v1 is used. Send max supported */ 1342 event.ord = c4iw_max_read_depth; 1343 event.ird = c4iw_max_read_depth; 1344 event.private_data_len = ep->plen; 1345 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 1346 } 1347 1348 c4iw_get_ep(&ep->com); 1349 ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, 1350 &event); 1351 if(ret) 1352 c4iw_put_ep(&ep->com); 1353 1354 set_bit(CONNREQ_UPCALL, &ep->com.history); 1355 c4iw_put_ep(&ep->parent_ep->com); 1356 return ret; 1357 } 1358 1359 static void established_upcall(struct c4iw_ep *ep) 1360 { 1361 struct iw_cm_event event; 1362 1363 CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep); 1364 memset(&event, 0, sizeof(event)); 1365 event.event = IW_CM_EVENT_ESTABLISHED; 1366 event.ird = ep->ird; 1367 event.ord = ep->ord; 1368 1369 if (ep->com.cm_id) { 1370 1371 CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep); 1372 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1373 set_bit(ESTAB_UPCALL, &ep->com.history); 1374 } 1375 CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep); 1376 } 1377 1378 1379 1380 static void process_mpa_reply(struct c4iw_ep *ep) 1381 { 1382 struct mpa_message *mpa; 1383 struct mpa_v2_conn_params *mpa_v2_params; 1384 u16 plen; 1385 u16 resp_ird, resp_ord; 1386 u8 rtr_mismatch = 0, insuff_ird = 0; 1387 struct c4iw_qp_attributes attrs; 1388 enum c4iw_qp_attr_mask mask; 1389 int err; 1390 struct mbuf *top, *m; 1391 int flags = MSG_DONTWAIT; 1392 struct uio uio; 1393 1394 CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep); 1395 1396 /* 1397 * Stop mpa timer. If it expired, then the state has 1398 * changed and we bail since ep_timeout already aborted 1399 * the connection. 1400 */ 1401 STOP_EP_TIMER(ep); 1402 if (state_read(&ep->com) != MPA_REQ_SENT) 1403 return; 1404 1405 uio.uio_resid = 1000000; 1406 uio.uio_td = ep->com.thread; 1407 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 1408 1409 if (err) { 1410 1411 if (err == EWOULDBLOCK) { 1412 1413 CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep); 1414 START_EP_TIMER(ep); 1415 return; 1416 } 1417 err = -err; 1418 CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep); 1419 goto err; 1420 } 1421 1422 if (ep->com.so->so_rcv.sb_mb) { 1423 1424 CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep); 1425 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 1426 __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 1427 } 1428 1429 m = top; 1430 1431 do { 1432 1433 CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep); 1434 /* 1435 * If we get more than the supported amount of private data 1436 * then we must fail this connection. 1437 */ 1438 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 1439 1440 CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep, 1441 ep->mpa_pkt_len + m->m_len); 1442 err = (-EINVAL); 1443 goto err; 1444 } 1445 1446 /* 1447 * copy the new data into our accumulation buffer. 1448 */ 1449 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 1450 ep->mpa_pkt_len += m->m_len; 1451 if (!m->m_next) 1452 m = m->m_nextpkt; 1453 else 1454 m = m->m_next; 1455 } while (m); 1456 1457 m_freem(top); 1458 /* 1459 * if we don't even have the mpa message, then bail. 1460 */ 1461 if (ep->mpa_pkt_len < sizeof(*mpa)) 1462 return; 1463 mpa = (struct mpa_message *) ep->mpa_pkt; 1464 1465 /* Validate MPA header. */ 1466 if (mpa->revision > mpa_rev) { 1467 1468 CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep, 1469 mpa->revision, mpa_rev); 1470 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, " 1471 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1472 err = -EPROTO; 1473 goto err; 1474 } 1475 1476 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1477 1478 CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep); 1479 err = -EPROTO; 1480 goto err; 1481 } 1482 1483 plen = ntohs(mpa->private_data_size); 1484 1485 /* 1486 * Fail if there's too much private data. 1487 */ 1488 if (plen > MPA_MAX_PRIVATE_DATA) { 1489 1490 CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep); 1491 err = -EPROTO; 1492 goto err; 1493 } 1494 1495 /* 1496 * If plen does not account for pkt size 1497 */ 1498 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1499 1500 CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep); 1501 err = -EPROTO; 1502 goto err; 1503 } 1504 1505 ep->plen = (u8) plen; 1506 1507 /* 1508 * If we don't have all the pdata yet, then bail. 1509 * We'll continue process when more data arrives. 1510 */ 1511 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 1512 1513 CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep); 1514 return; 1515 } 1516 1517 if (mpa->flags & MPA_REJECT) { 1518 1519 CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep); 1520 err = -ECONNREFUSED; 1521 goto err; 1522 } 1523 1524 /* 1525 * If we get here we have accumulated the entire mpa 1526 * start reply message including private data. And 1527 * the MPA header is valid. 1528 */ 1529 state_set(&ep->com, FPDU_MODE); 1530 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1531 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1532 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1533 ep->mpa_attr.version = mpa->revision; 1534 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1535 1536 if (mpa->revision == 2) { 1537 1538 CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep); 1539 ep->mpa_attr.enhanced_rdma_conn = 1540 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1541 1542 if (ep->mpa_attr.enhanced_rdma_conn) { 1543 1544 CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep); 1545 mpa_v2_params = (struct mpa_v2_conn_params *) 1546 (ep->mpa_pkt + sizeof(*mpa)); 1547 resp_ird = ntohs(mpa_v2_params->ird) & 1548 MPA_V2_IRD_ORD_MASK; 1549 resp_ord = ntohs(mpa_v2_params->ord) & 1550 MPA_V2_IRD_ORD_MASK; 1551 1552 /* 1553 * This is a double-check. Ideally, below checks are 1554 * not required since ird/ord stuff has been taken 1555 * care of in c4iw_accept_cr 1556 */ 1557 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { 1558 1559 CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep); 1560 err = -ENOMEM; 1561 ep->ird = resp_ord; 1562 ep->ord = resp_ird; 1563 insuff_ird = 1; 1564 } 1565 1566 if (ntohs(mpa_v2_params->ird) & 1567 MPA_V2_PEER2PEER_MODEL) { 1568 1569 CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep); 1570 if (ntohs(mpa_v2_params->ord) & 1571 MPA_V2_RDMA_WRITE_RTR) { 1572 1573 CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep); 1574 ep->mpa_attr.p2p_type = 1575 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1576 } 1577 else if (ntohs(mpa_v2_params->ord) & 1578 MPA_V2_RDMA_READ_RTR) { 1579 1580 CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep); 1581 ep->mpa_attr.p2p_type = 1582 FW_RI_INIT_P2PTYPE_READ_REQ; 1583 } 1584 } 1585 } 1586 } else { 1587 1588 CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep); 1589 1590 if (mpa->revision == 1) { 1591 1592 CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep); 1593 1594 if (peer2peer) { 1595 1596 CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep); 1597 ep->mpa_attr.p2p_type = p2p_type; 1598 } 1599 } 1600 } 1601 1602 if (set_tcpinfo(ep)) { 1603 1604 CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep); 1605 printf("%s set_tcpinfo error\n", __func__); 1606 goto err; 1607 } 1608 1609 CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, " 1610 "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__, 1611 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1612 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1613 ep->mpa_attr.p2p_type); 1614 1615 /* 1616 * If responder's RTR does not match with that of initiator, assign 1617 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not 1618 * generated when moving QP to RTS state. 1619 * A TERM message will be sent after QP has moved to RTS state 1620 */ 1621 if ((ep->mpa_attr.version == 2) && peer2peer && 1622 (ep->mpa_attr.p2p_type != p2p_type)) { 1623 1624 CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep); 1625 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1626 rtr_mismatch = 1; 1627 } 1628 1629 1630 //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; 1631 attrs.mpa_attr = ep->mpa_attr; 1632 attrs.max_ird = ep->ird; 1633 attrs.max_ord = ep->ord; 1634 attrs.llp_stream_handle = ep; 1635 attrs.next_state = C4IW_QP_STATE_RTS; 1636 1637 mask = C4IW_QP_ATTR_NEXT_STATE | 1638 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | 1639 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; 1640 1641 /* bind QP and TID with INIT_WR */ 1642 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); 1643 1644 if (err) { 1645 1646 CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep); 1647 goto err; 1648 } 1649 1650 /* 1651 * If responder's RTR requirement did not match with what initiator 1652 * supports, generate TERM message 1653 */ 1654 if (rtr_mismatch) { 1655 1656 CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep); 1657 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); 1658 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1659 attrs.ecode = MPA_NOMATCH_RTR; 1660 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1661 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1662 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1663 err = -ENOMEM; 1664 goto out; 1665 } 1666 1667 /* 1668 * Generate TERM if initiator IRD is not sufficient for responder 1669 * provided ORD. Currently, we do the same behaviour even when 1670 * responder provided IRD is also not sufficient as regards to 1671 * initiator ORD. 1672 */ 1673 if (insuff_ird) { 1674 1675 CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep); 1676 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", 1677 __func__); 1678 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1679 attrs.ecode = MPA_INSUFF_IRD; 1680 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1681 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1682 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1683 err = -ENOMEM; 1684 goto out; 1685 } 1686 goto out; 1687 err: 1688 state_set(&ep->com, ABORTING); 1689 abort_connection(ep); 1690 out: 1691 connect_reply_upcall(ep, err); 1692 CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep); 1693 return; 1694 } 1695 1696 static void 1697 process_mpa_request(struct c4iw_ep *ep) 1698 { 1699 struct mpa_message *mpa; 1700 u16 plen; 1701 int flags = MSG_DONTWAIT; 1702 int rc; 1703 struct iovec iov; 1704 struct uio uio; 1705 enum c4iw_ep_state state = state_read(&ep->com); 1706 1707 CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); 1708 1709 if (state != MPA_REQ_WAIT) 1710 return; 1711 1712 iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len]; 1713 iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; 1714 uio.uio_iov = &iov; 1715 uio.uio_iovcnt = 1; 1716 uio.uio_offset = 0; 1717 uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; 1718 uio.uio_segflg = UIO_SYSSPACE; 1719 uio.uio_rw = UIO_READ; 1720 uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */ 1721 1722 rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags); 1723 if (rc == EAGAIN) 1724 return; 1725 else if (rc) { 1726 abort: 1727 STOP_EP_TIMER(ep); 1728 abort_connection(ep); 1729 return; 1730 } 1731 KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data", 1732 __func__, ep->com.so)); 1733 ep->mpa_pkt_len += uio.uio_offset; 1734 1735 /* 1736 * If we get more than the supported amount of private data then we must 1737 * fail this connection. XXX: check so_rcv->sb_cc, or peek with another 1738 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last 1739 * byte is filled by the soreceive above. 1740 */ 1741 1742 /* Don't even have the MPA message. Wait for more data to arrive. */ 1743 if (ep->mpa_pkt_len < sizeof(*mpa)) 1744 return; 1745 mpa = (struct mpa_message *) ep->mpa_pkt; 1746 1747 /* 1748 * Validate MPA Header. 1749 */ 1750 if (mpa->revision > mpa_rev) { 1751 log(LOG_ERR, "%s: MPA version mismatch. Local = %d," 1752 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1753 goto abort; 1754 } 1755 1756 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) 1757 goto abort; 1758 1759 /* 1760 * Fail if there's too much private data. 1761 */ 1762 plen = ntohs(mpa->private_data_size); 1763 if (plen > MPA_MAX_PRIVATE_DATA) 1764 goto abort; 1765 1766 /* 1767 * If plen does not account for pkt size 1768 */ 1769 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) 1770 goto abort; 1771 1772 ep->plen = (u8) plen; 1773 1774 /* 1775 * If we don't have all the pdata yet, then bail. 1776 */ 1777 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1778 return; 1779 1780 /* 1781 * If we get here we have accumulated the entire mpa 1782 * start reply message including private data. 1783 */ 1784 ep->mpa_attr.initiator = 0; 1785 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1786 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1787 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1788 ep->mpa_attr.version = mpa->revision; 1789 if (mpa->revision == 1) 1790 ep->tried_with_mpa_v1 = 1; 1791 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1792 1793 if (mpa->revision == 2) { 1794 ep->mpa_attr.enhanced_rdma_conn = 1795 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1796 if (ep->mpa_attr.enhanced_rdma_conn) { 1797 struct mpa_v2_conn_params *mpa_v2_params; 1798 u16 ird, ord; 1799 1800 mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)]; 1801 ird = ntohs(mpa_v2_params->ird); 1802 ord = ntohs(mpa_v2_params->ord); 1803 1804 ep->ird = ird & MPA_V2_IRD_ORD_MASK; 1805 ep->ord = ord & MPA_V2_IRD_ORD_MASK; 1806 if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) { 1807 if (ord & MPA_V2_RDMA_WRITE_RTR) { 1808 ep->mpa_attr.p2p_type = 1809 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1810 } else if (ord & MPA_V2_RDMA_READ_RTR) { 1811 ep->mpa_attr.p2p_type = 1812 FW_RI_INIT_P2PTYPE_READ_REQ; 1813 } 1814 } 1815 } 1816 } else if (mpa->revision == 1 && peer2peer) 1817 ep->mpa_attr.p2p_type = p2p_type; 1818 1819 if (set_tcpinfo(ep)) 1820 goto abort; 1821 1822 CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, " 1823 "xmit_marker_enabled = %d, version = %d", __func__, 1824 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1825 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 1826 1827 state_set(&ep->com, MPA_REQ_RCVD); 1828 STOP_EP_TIMER(ep); 1829 1830 /* drive upcall */ 1831 mutex_lock(&ep->parent_ep->com.mutex); 1832 if (ep->parent_ep->com.state != DEAD) { 1833 if(connect_request_upcall(ep)) { 1834 abort_connection(ep); 1835 } 1836 }else 1837 abort_connection(ep); 1838 mutex_unlock(&ep->parent_ep->com.mutex); 1839 } 1840 1841 /* 1842 * Upcall from the adapter indicating data has been transmitted. 1843 * For us its just the single MPA request or reply. We can now free 1844 * the skb holding the mpa message. 1845 */ 1846 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1847 { 1848 int err; 1849 struct c4iw_ep *ep = to_ep(cm_id); 1850 CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); 1851 1852 if (state_read(&ep->com) == DEAD) { 1853 1854 CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep); 1855 c4iw_put_ep(&ep->com); 1856 return -ECONNRESET; 1857 } 1858 set_bit(ULP_REJECT, &ep->com.history); 1859 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1860 1861 if (mpa_rev == 0) { 1862 1863 CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep); 1864 abort_connection(ep); 1865 } 1866 else { 1867 1868 CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep); 1869 err = send_mpa_reject(ep, pdata, pdata_len); 1870 err = soshutdown(ep->com.so, 3); 1871 } 1872 c4iw_put_ep(&ep->com); 1873 CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep); 1874 return 0; 1875 } 1876 1877 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1878 { 1879 int err; 1880 struct c4iw_qp_attributes attrs; 1881 enum c4iw_qp_attr_mask mask; 1882 struct c4iw_ep *ep = to_ep(cm_id); 1883 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 1884 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 1885 1886 CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); 1887 1888 if (state_read(&ep->com) == DEAD) { 1889 1890 CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); 1891 err = -ECONNRESET; 1892 goto err; 1893 } 1894 1895 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1896 BUG_ON(!qp); 1897 1898 set_bit(ULP_ACCEPT, &ep->com.history); 1899 1900 if ((conn_param->ord > c4iw_max_read_depth) || 1901 (conn_param->ird > c4iw_max_read_depth)) { 1902 1903 CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep); 1904 abort_connection(ep); 1905 err = -EINVAL; 1906 goto err; 1907 } 1908 1909 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1910 1911 CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep); 1912 1913 if (conn_param->ord > ep->ird) { 1914 1915 CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep); 1916 ep->ird = conn_param->ird; 1917 ep->ord = conn_param->ord; 1918 send_mpa_reject(ep, conn_param->private_data, 1919 conn_param->private_data_len); 1920 abort_connection(ep); 1921 err = -ENOMEM; 1922 goto err; 1923 } 1924 1925 if (conn_param->ird > ep->ord) { 1926 1927 CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep); 1928 1929 if (!ep->ord) { 1930 1931 CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep); 1932 conn_param->ird = 1; 1933 } 1934 else { 1935 CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep); 1936 abort_connection(ep); 1937 err = -ENOMEM; 1938 goto err; 1939 } 1940 } 1941 1942 } 1943 ep->ird = conn_param->ird; 1944 ep->ord = conn_param->ord; 1945 1946 if (ep->mpa_attr.version != 2) { 1947 1948 CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep); 1949 1950 if (peer2peer && ep->ird == 0) { 1951 1952 CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep); 1953 ep->ird = 1; 1954 } 1955 } 1956 1957 1958 cm_id->add_ref(cm_id); 1959 ep->com.cm_id = cm_id; 1960 ep->com.qp = qp; 1961 //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; 1962 1963 /* bind QP to EP and move to RTS */ 1964 attrs.mpa_attr = ep->mpa_attr; 1965 attrs.max_ird = ep->ird; 1966 attrs.max_ord = ep->ord; 1967 attrs.llp_stream_handle = ep; 1968 attrs.next_state = C4IW_QP_STATE_RTS; 1969 1970 /* bind QP and TID with INIT_WR */ 1971 mask = C4IW_QP_ATTR_NEXT_STATE | 1972 C4IW_QP_ATTR_LLP_STREAM_HANDLE | 1973 C4IW_QP_ATTR_MPA_ATTR | 1974 C4IW_QP_ATTR_MAX_IRD | 1975 C4IW_QP_ATTR_MAX_ORD; 1976 1977 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); 1978 1979 if (err) { 1980 1981 CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); 1982 goto err1; 1983 } 1984 err = send_mpa_reply(ep, conn_param->private_data, 1985 conn_param->private_data_len); 1986 1987 if (err) { 1988 1989 CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); 1990 goto err1; 1991 } 1992 1993 state_set(&ep->com, FPDU_MODE); 1994 established_upcall(ep); 1995 c4iw_put_ep(&ep->com); 1996 CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); 1997 return 0; 1998 err1: 1999 ep->com.cm_id = NULL; 2000 ep->com.qp = NULL; 2001 cm_id->rem_ref(cm_id); 2002 err: 2003 c4iw_put_ep(&ep->com); 2004 CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); 2005 return err; 2006 } 2007 2008 2009 2010 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 2011 { 2012 int err = 0; 2013 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 2014 struct c4iw_ep *ep = NULL; 2015 struct rtentry *rt; 2016 struct toedev *tdev; 2017 2018 CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); 2019 2020 if ((conn_param->ord > c4iw_max_read_depth) || 2021 (conn_param->ird > c4iw_max_read_depth)) { 2022 2023 CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id); 2024 err = -EINVAL; 2025 goto out; 2026 } 2027 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 2028 2029 if (!ep) { 2030 2031 CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id); 2032 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); 2033 err = -ENOMEM; 2034 goto out; 2035 } 2036 init_timer(&ep->timer); 2037 ep->plen = conn_param->private_data_len; 2038 2039 if (ep->plen) { 2040 2041 CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep); 2042 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 2043 conn_param->private_data, ep->plen); 2044 } 2045 ep->ird = conn_param->ird; 2046 ep->ord = conn_param->ord; 2047 2048 if (peer2peer && ep->ord == 0) { 2049 2050 CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep); 2051 ep->ord = 1; 2052 } 2053 2054 cm_id->add_ref(cm_id); 2055 ep->com.dev = dev; 2056 ep->com.cm_id = cm_id; 2057 ep->com.qp = get_qhp(dev, conn_param->qpn); 2058 2059 if (!ep->com.qp) { 2060 2061 CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep); 2062 err = -EINVAL; 2063 goto fail2; 2064 } 2065 ep->com.thread = curthread; 2066 ep->com.so = cm_id->so; 2067 2068 init_sock(&ep->com); 2069 2070 /* find a route */ 2071 rt = find_route( 2072 cm_id->local_addr.sin_addr.s_addr, 2073 cm_id->remote_addr.sin_addr.s_addr, 2074 cm_id->local_addr.sin_port, 2075 cm_id->remote_addr.sin_port, 0); 2076 2077 if (!rt) { 2078 2079 CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); 2080 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); 2081 err = -EHOSTUNREACH; 2082 goto fail2; 2083 } 2084 2085 if (!(rt->rt_ifp->if_capenable & IFCAP_TOE)) { 2086 2087 CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep); 2088 printf("%s - interface not TOE capable.\n", __func__); 2089 close_socket(&ep->com, 0); 2090 err = -ENOPROTOOPT; 2091 goto fail3; 2092 } 2093 tdev = TOEDEV(rt->rt_ifp); 2094 2095 if (tdev == NULL) { 2096 2097 CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep); 2098 printf("%s - No toedev for interface.\n", __func__); 2099 goto fail3; 2100 } 2101 RTFREE(rt); 2102 2103 state_set(&ep->com, CONNECTING); 2104 ep->tos = 0; 2105 ep->com.local_addr = cm_id->local_addr; 2106 ep->com.remote_addr = cm_id->remote_addr; 2107 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 2108 ep->com.thread); 2109 2110 if (!err) { 2111 CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep); 2112 goto out; 2113 } else { 2114 close_socket(&ep->com, 0); 2115 goto fail2; 2116 } 2117 2118 fail3: 2119 CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); 2120 RTFREE(rt); 2121 fail2: 2122 cm_id->rem_ref(cm_id); 2123 c4iw_put_ep(&ep->com); 2124 out: 2125 CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); 2126 return err; 2127 } 2128 2129 /* 2130 * iwcm->create_listen. Returns -errno on failure. 2131 */ 2132 int 2133 c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) 2134 { 2135 int rc; 2136 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 2137 struct c4iw_listen_ep *ep; 2138 struct socket *so = cm_id->so; 2139 2140 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 2141 CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__, 2142 cm_id, so, ep, so->so_pcb); 2143 if (ep == NULL) { 2144 log(LOG_ERR, "%s: failed to alloc memory for endpoint\n", 2145 __func__); 2146 rc = ENOMEM; 2147 goto failed; 2148 } 2149 2150 cm_id->add_ref(cm_id); 2151 ep->com.cm_id = cm_id; 2152 ep->com.dev = dev; 2153 ep->backlog = backlog; 2154 ep->com.local_addr = cm_id->local_addr; 2155 ep->com.thread = curthread; 2156 state_set(&ep->com, LISTEN); 2157 ep->com.so = so; 2158 init_sock(&ep->com); 2159 2160 rc = solisten(so, ep->backlog, ep->com.thread); 2161 if (rc != 0) { 2162 log(LOG_ERR, "%s: failed to start listener: %d\n", __func__, 2163 rc); 2164 close_socket(&ep->com, 0); 2165 cm_id->rem_ref(cm_id); 2166 c4iw_put_ep(&ep->com); 2167 goto failed; 2168 } 2169 2170 cm_id->provider_data = ep; 2171 return (0); 2172 2173 failed: 2174 CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc); 2175 return (-rc); 2176 } 2177 2178 int 2179 c4iw_destroy_listen(struct iw_cm_id *cm_id) 2180 { 2181 int rc; 2182 struct c4iw_listen_ep *ep = to_listen_ep(cm_id); 2183 2184 CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id, 2185 cm_id->so, cm_id->so->so_pcb); 2186 2187 state_set(&ep->com, DEAD); 2188 rc = close_socket(&ep->com, 0); 2189 cm_id->rem_ref(cm_id); 2190 c4iw_put_ep(&ep->com); 2191 2192 return (rc); 2193 } 2194 2195 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) 2196 { 2197 int ret = 0; 2198 int close = 0; 2199 int fatal = 0; 2200 struct c4iw_rdev *rdev; 2201 2202 mutex_lock(&ep->com.mutex); 2203 2204 CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep); 2205 2206 rdev = &ep->com.dev->rdev; 2207 2208 if (c4iw_fatal_error(rdev)) { 2209 2210 CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep); 2211 fatal = 1; 2212 close_complete_upcall(ep, -ECONNRESET); 2213 ep->com.state = DEAD; 2214 } 2215 CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, 2216 states[ep->com.state]); 2217 2218 switch (ep->com.state) { 2219 2220 case MPA_REQ_WAIT: 2221 case MPA_REQ_SENT: 2222 case MPA_REQ_RCVD: 2223 case MPA_REP_SENT: 2224 case FPDU_MODE: 2225 close = 1; 2226 if (abrupt) 2227 ep->com.state = ABORTING; 2228 else { 2229 ep->com.state = CLOSING; 2230 START_EP_TIMER(ep); 2231 } 2232 set_bit(CLOSE_SENT, &ep->com.flags); 2233 break; 2234 2235 case CLOSING: 2236 2237 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { 2238 2239 close = 1; 2240 if (abrupt) { 2241 STOP_EP_TIMER(ep); 2242 ep->com.state = ABORTING; 2243 } else 2244 ep->com.state = MORIBUND; 2245 } 2246 break; 2247 2248 case MORIBUND: 2249 case ABORTING: 2250 case DEAD: 2251 CTR3(KTR_IW_CXGBE, 2252 "%s ignoring disconnect ep %p state %u", __func__, 2253 ep, ep->com.state); 2254 break; 2255 2256 default: 2257 BUG(); 2258 break; 2259 } 2260 2261 mutex_unlock(&ep->com.mutex); 2262 2263 if (close) { 2264 2265 CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep); 2266 2267 if (abrupt) { 2268 2269 CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); 2270 set_bit(EP_DISC_ABORT, &ep->com.history); 2271 ret = abort_connection(ep); 2272 } else { 2273 2274 CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); 2275 set_bit(EP_DISC_CLOSE, &ep->com.history); 2276 2277 if (!ep->parent_ep) 2278 __state_set(&ep->com, MORIBUND); 2279 ret = shutdown_socket(&ep->com); 2280 } 2281 2282 if (ret) { 2283 2284 fatal = 1; 2285 } 2286 } 2287 2288 if (fatal) { 2289 2290 release_ep_resources(ep); 2291 CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); 2292 } 2293 CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); 2294 return ret; 2295 } 2296 2297 #ifdef C4IW_EP_REDIRECT 2298 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, 2299 struct l2t_entry *l2t) 2300 { 2301 struct c4iw_ep *ep = ctx; 2302 2303 if (ep->dst != old) 2304 return 0; 2305 2306 PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, 2307 l2t); 2308 dst_hold(new); 2309 cxgb4_l2t_release(ep->l2t); 2310 ep->l2t = l2t; 2311 dst_release(old); 2312 ep->dst = new; 2313 return 1; 2314 } 2315 #endif 2316 2317 2318 2319 static void ep_timeout(unsigned long arg) 2320 { 2321 struct c4iw_ep *ep = (struct c4iw_ep *)arg; 2322 int kickit = 0; 2323 2324 CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep); 2325 spin_lock(&timeout_lock); 2326 2327 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 2328 2329 list_add_tail(&ep->entry, &timeout_list); 2330 kickit = 1; 2331 } 2332 spin_unlock(&timeout_lock); 2333 2334 if (kickit) { 2335 2336 CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep); 2337 queue_work(c4iw_taskq, &c4iw_task); 2338 } 2339 CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep); 2340 } 2341 2342 static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl) 2343 { 2344 uint64_t val = be64toh(*rpl); 2345 int ret; 2346 struct c4iw_wr_wait *wr_waitp; 2347 2348 ret = (int)((val >> 8) & 0xff); 2349 wr_waitp = (struct c4iw_wr_wait *)rpl[1]; 2350 CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret); 2351 if (wr_waitp) 2352 c4iw_wake_up(wr_waitp, ret ? -ret : 0); 2353 2354 return (0); 2355 } 2356 2357 static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl) 2358 { 2359 struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]); 2360 2361 CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl); 2362 c4iw_ev_dispatch(sc->iwarp_softc, &cqe); 2363 2364 return (0); 2365 } 2366 2367 static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 2368 { 2369 2370 struct adapter *sc = iq->adapter; 2371 2372 const struct cpl_rdma_terminate *rpl = (const void *)(rss + 1); 2373 unsigned int tid = GET_TID(rpl); 2374 struct c4iw_qp_attributes attrs; 2375 struct toepcb *toep = lookup_tid(sc, tid); 2376 struct socket *so = inp_inpcbtosocket(toep->inp); 2377 struct c4iw_ep *ep = so->so_rcv.sb_upcallarg; 2378 2379 CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep); 2380 2381 if (ep && ep->com.qp) { 2382 2383 printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, 2384 ep->com.qp->wq.sq.qid); 2385 attrs.next_state = C4IW_QP_STATE_TERMINATE; 2386 c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 2387 1); 2388 } else 2389 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); 2390 CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep); 2391 2392 return 0; 2393 } 2394 2395 void 2396 c4iw_cm_init_cpl(struct adapter *sc) 2397 { 2398 2399 t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); 2400 t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, fw6_wr_rpl); 2401 t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, fw6_cqe_handler); 2402 t4_register_an_handler(sc, c4iw_ev_handler); 2403 } 2404 2405 void 2406 c4iw_cm_term_cpl(struct adapter *sc) 2407 { 2408 2409 t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); 2410 t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, NULL); 2411 t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, NULL); 2412 } 2413 2414 int __init c4iw_cm_init(void) 2415 { 2416 2417 TAILQ_INIT(&req_list); 2418 spin_lock_init(&req_lock); 2419 INIT_LIST_HEAD(&timeout_list); 2420 spin_lock_init(&timeout_lock); 2421 2422 INIT_WORK(&c4iw_task, process_req); 2423 2424 c4iw_taskq = create_singlethread_workqueue("iw_cxgbe"); 2425 if (!c4iw_taskq) 2426 return -ENOMEM; 2427 2428 2429 return 0; 2430 } 2431 2432 void __exit c4iw_cm_term(void) 2433 { 2434 WARN_ON(!TAILQ_EMPTY(&req_list)); 2435 WARN_ON(!list_empty(&timeout_list)); 2436 flush_workqueue(c4iw_taskq); 2437 destroy_workqueue(c4iw_taskq); 2438 } 2439 #endif 2440