1 /* 2 * Copyright (c) 2009-2013 Chelsio, Inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 37 #ifdef TCP_OFFLOAD 38 #include <sys/types.h> 39 #include <sys/malloc.h> 40 #include <sys/socket.h> 41 #include <sys/socketvar.h> 42 #include <sys/sockio.h> 43 #include <sys/taskqueue.h> 44 #include <netinet/in.h> 45 #include <net/route.h> 46 47 #include <netinet/in_systm.h> 48 #include <netinet/in_pcb.h> 49 #include <netinet/ip.h> 50 #include <netinet/in_fib.h> 51 #include <netinet/ip_var.h> 52 #include <netinet/tcp_var.h> 53 #include <netinet/tcp.h> 54 #include <netinet/tcpip.h> 55 56 #include <netinet/toecore.h> 57 58 struct sge_iq; 59 struct rss_header; 60 #include <linux/types.h> 61 #include "offload.h" 62 #include "tom/t4_tom.h" 63 64 #define TOEPCB(so) ((struct toepcb *)(so_sototcpcb((so))->t_toe)) 65 66 #include "iw_cxgbe.h" 67 #include <linux/module.h> 68 #include <linux/workqueue.h> 69 #include <linux/notifier.h> 70 #include <linux/inetdevice.h> 71 #include <linux/if_vlan.h> 72 #include <net/netevent.h> 73 74 static spinlock_t req_lock; 75 static TAILQ_HEAD(c4iw_ep_list, c4iw_ep_common) req_list; 76 static struct work_struct c4iw_task; 77 static struct workqueue_struct *c4iw_taskq; 78 static LIST_HEAD(timeout_list); 79 static spinlock_t timeout_lock; 80 81 static void process_req(struct work_struct *ctx); 82 static void start_ep_timer(struct c4iw_ep *ep); 83 static void stop_ep_timer(struct c4iw_ep *ep); 84 static int set_tcpinfo(struct c4iw_ep *ep); 85 static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); 86 static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); 87 static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); 88 static void *alloc_ep(int size, gfp_t flags); 89 void __free_ep(struct c4iw_ep_common *epc); 90 static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 91 __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); 92 static int close_socket(struct c4iw_ep_common *epc, int close); 93 static int shutdown_socket(struct c4iw_ep_common *epc); 94 static void abort_socket(struct c4iw_ep *ep); 95 static void send_mpa_req(struct c4iw_ep *ep); 96 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); 97 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen); 98 static void close_complete_upcall(struct c4iw_ep *ep, int status); 99 static int abort_connection(struct c4iw_ep *ep); 100 static void peer_close_upcall(struct c4iw_ep *ep); 101 static void peer_abort_upcall(struct c4iw_ep *ep); 102 static void connect_reply_upcall(struct c4iw_ep *ep, int status); 103 static int connect_request_upcall(struct c4iw_ep *ep); 104 static void established_upcall(struct c4iw_ep *ep); 105 static void process_mpa_reply(struct c4iw_ep *ep); 106 static void process_mpa_request(struct c4iw_ep *ep); 107 static void process_peer_close(struct c4iw_ep *ep); 108 static void process_conn_error(struct c4iw_ep *ep); 109 static void process_close_complete(struct c4iw_ep *ep); 110 static void ep_timeout(unsigned long arg); 111 static void init_sock(struct c4iw_ep_common *epc); 112 static void process_data(struct c4iw_ep *ep); 113 static void process_connected(struct c4iw_ep *ep); 114 static struct socket * dequeue_socket(struct socket *head, struct sockaddr_in **remote, struct c4iw_ep *child_ep); 115 static void process_newconn(struct c4iw_ep *parent_ep); 116 static int c4iw_so_upcall(struct socket *so, void *arg, int waitflag); 117 static void process_socket_event(struct c4iw_ep *ep); 118 static void release_ep_resources(struct c4iw_ep *ep); 119 120 #define START_EP_TIMER(ep) \ 121 do { \ 122 CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ 123 __func__, __LINE__, (ep)); \ 124 start_ep_timer(ep); \ 125 } while (0) 126 127 #define STOP_EP_TIMER(ep) \ 128 do { \ 129 CTR3(KTR_IW_CXGBE, "stop_ep_timer (%s:%d) ep %p", \ 130 __func__, __LINE__, (ep)); \ 131 stop_ep_timer(ep); \ 132 } while (0) 133 134 #ifdef KTR 135 static char *states[] = { 136 "idle", 137 "listen", 138 "connecting", 139 "mpa_wait_req", 140 "mpa_req_sent", 141 "mpa_req_rcvd", 142 "mpa_rep_sent", 143 "fpdu_mode", 144 "aborting", 145 "closing", 146 "moribund", 147 "dead", 148 NULL, 149 }; 150 #endif 151 152 static void 153 process_req(struct work_struct *ctx) 154 { 155 struct c4iw_ep_common *epc; 156 157 spin_lock(&req_lock); 158 while (!TAILQ_EMPTY(&req_list)) { 159 epc = TAILQ_FIRST(&req_list); 160 TAILQ_REMOVE(&req_list, epc, entry); 161 epc->entry.tqe_prev = NULL; 162 spin_unlock(&req_lock); 163 if (epc->so) 164 process_socket_event((struct c4iw_ep *)epc); 165 c4iw_put_ep(epc); 166 spin_lock(&req_lock); 167 } 168 spin_unlock(&req_lock); 169 } 170 171 /* 172 * XXX: doesn't belong here in the iWARP driver. 173 * XXX: assumes that the connection was offloaded by cxgbe/t4_tom if TF_TOE is 174 * set. Is this a valid assumption for active open? 175 */ 176 static int 177 set_tcpinfo(struct c4iw_ep *ep) 178 { 179 struct socket *so = ep->com.so; 180 struct inpcb *inp = sotoinpcb(so); 181 struct tcpcb *tp; 182 struct toepcb *toep; 183 int rc = 0; 184 185 INP_WLOCK(inp); 186 tp = intotcpcb(inp); 187 if ((tp->t_flags & TF_TOE) == 0) { 188 rc = EINVAL; 189 log(LOG_ERR, "%s: connection not offloaded (so %p, ep %p)\n", 190 __func__, so, ep); 191 goto done; 192 } 193 toep = TOEPCB(so); 194 195 ep->hwtid = toep->tid; 196 ep->snd_seq = tp->snd_nxt; 197 ep->rcv_seq = tp->rcv_nxt; 198 ep->emss = max(tp->t_maxseg, 128); 199 done: 200 INP_WUNLOCK(inp); 201 return (rc); 202 203 } 204 205 static int 206 find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, 207 __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) 208 { 209 struct in_addr addr; 210 int err; 211 212 CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, 213 peer_ip, ntohs(local_port), ntohs(peer_port)); 214 215 addr.s_addr = peer_ip; 216 err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); 217 218 CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); 219 return err; 220 } 221 222 static int 223 close_socket(struct c4iw_ep_common *epc, int close) 224 { 225 struct socket *so = epc->so; 226 int rc; 227 228 CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc, so, 229 states[epc->state]); 230 231 SOCK_LOCK(so); 232 soupcall_clear(so, SO_RCV); 233 SOCK_UNLOCK(so); 234 235 if (close) 236 rc = soclose(so); 237 else 238 rc = soshutdown(so, SHUT_WR | SHUT_RD); 239 epc->so = NULL; 240 241 return (rc); 242 } 243 244 static int 245 shutdown_socket(struct c4iw_ep_common *epc) 246 { 247 248 CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s", __func__, epc->so, epc, 249 states[epc->state]); 250 251 return (soshutdown(epc->so, SHUT_WR)); 252 } 253 254 static void 255 abort_socket(struct c4iw_ep *ep) 256 { 257 struct sockopt sopt; 258 int rc; 259 struct linger l; 260 261 CTR4(KTR_IW_CXGBE, "%s ep %p so %p state %s", __func__, ep, ep->com.so, 262 states[ep->com.state]); 263 264 l.l_onoff = 1; 265 l.l_linger = 0; 266 267 /* linger_time of 0 forces RST to be sent */ 268 sopt.sopt_dir = SOPT_SET; 269 sopt.sopt_level = SOL_SOCKET; 270 sopt.sopt_name = SO_LINGER; 271 sopt.sopt_val = (caddr_t)&l; 272 sopt.sopt_valsize = sizeof l; 273 sopt.sopt_td = NULL; 274 rc = sosetopt(ep->com.so, &sopt); 275 if (rc) { 276 log(LOG_ERR, "%s: can't set linger to 0, no RST! err %d\n", 277 __func__, rc); 278 } 279 } 280 281 static void 282 process_peer_close(struct c4iw_ep *ep) 283 { 284 struct c4iw_qp_attributes attrs; 285 int disconnect = 1; 286 int release = 0; 287 288 CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, 289 ep->com.so, states[ep->com.state]); 290 291 mutex_lock(&ep->com.mutex); 292 switch (ep->com.state) { 293 294 case MPA_REQ_WAIT: 295 CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", 296 __func__, ep); 297 __state_set(&ep->com, CLOSING); 298 break; 299 300 case MPA_REQ_SENT: 301 CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", 302 __func__, ep); 303 __state_set(&ep->com, DEAD); 304 connect_reply_upcall(ep, -ECONNABORTED); 305 306 disconnect = 0; 307 STOP_EP_TIMER(ep); 308 close_socket(&ep->com, 0); 309 ep->com.cm_id->rem_ref(ep->com.cm_id); 310 ep->com.cm_id = NULL; 311 ep->com.qp = NULL; 312 release = 1; 313 break; 314 315 case MPA_REQ_RCVD: 316 317 /* 318 * We're gonna mark this puppy DEAD, but keep 319 * the reference on it until the ULP accepts or 320 * rejects the CR. 321 */ 322 CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", 323 __func__, ep); 324 __state_set(&ep->com, CLOSING); 325 c4iw_get_ep(&ep->com); 326 break; 327 328 case MPA_REP_SENT: 329 CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", 330 __func__, ep); 331 __state_set(&ep->com, CLOSING); 332 break; 333 334 case FPDU_MODE: 335 CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", 336 __func__, ep); 337 START_EP_TIMER(ep); 338 __state_set(&ep->com, CLOSING); 339 attrs.next_state = C4IW_QP_STATE_CLOSING; 340 c4iw_modify_qp(ep->com.dev, ep->com.qp, 341 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 342 peer_close_upcall(ep); 343 break; 344 345 case ABORTING: 346 CTR2(KTR_IW_CXGBE, "%s:ppc6 %p ABORTING (disconn)", 347 __func__, ep); 348 disconnect = 0; 349 break; 350 351 case CLOSING: 352 CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", 353 __func__, ep); 354 __state_set(&ep->com, MORIBUND); 355 disconnect = 0; 356 break; 357 358 case MORIBUND: 359 CTR2(KTR_IW_CXGBE, "%s:ppc8 %p MORIBUND DEAD", __func__, 360 ep); 361 STOP_EP_TIMER(ep); 362 if (ep->com.cm_id && ep->com.qp) { 363 attrs.next_state = C4IW_QP_STATE_IDLE; 364 c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 365 C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); 366 } 367 close_socket(&ep->com, 0); 368 close_complete_upcall(ep, 0); 369 __state_set(&ep->com, DEAD); 370 release = 1; 371 disconnect = 0; 372 break; 373 374 case DEAD: 375 CTR2(KTR_IW_CXGBE, "%s:ppc9 %p DEAD (disconn)", 376 __func__, ep); 377 disconnect = 0; 378 break; 379 380 default: 381 panic("%s: ep %p state %d", __func__, ep, 382 ep->com.state); 383 break; 384 } 385 386 mutex_unlock(&ep->com.mutex); 387 388 if (disconnect) { 389 390 CTR2(KTR_IW_CXGBE, "%s:ppca %p", __func__, ep); 391 c4iw_ep_disconnect(ep, 0, M_NOWAIT); 392 } 393 if (release) { 394 395 CTR2(KTR_IW_CXGBE, "%s:ppcb %p", __func__, ep); 396 c4iw_put_ep(&ep->com); 397 } 398 CTR2(KTR_IW_CXGBE, "%s:ppcE %p", __func__, ep); 399 return; 400 } 401 402 static void 403 process_conn_error(struct c4iw_ep *ep) 404 { 405 struct c4iw_qp_attributes attrs; 406 int ret; 407 int state; 408 409 state = state_read(&ep->com); 410 CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", 411 __func__, ep, ep->com.so, ep->com.so->so_error, 412 states[ep->com.state]); 413 414 switch (state) { 415 416 case MPA_REQ_WAIT: 417 STOP_EP_TIMER(ep); 418 break; 419 420 case MPA_REQ_SENT: 421 STOP_EP_TIMER(ep); 422 connect_reply_upcall(ep, -ECONNRESET); 423 break; 424 425 case MPA_REP_SENT: 426 ep->com.rpl_err = ECONNRESET; 427 CTR1(KTR_IW_CXGBE, "waking up ep %p", ep); 428 break; 429 430 case MPA_REQ_RCVD: 431 432 /* 433 * We're gonna mark this puppy DEAD, but keep 434 * the reference on it until the ULP accepts or 435 * rejects the CR. 436 */ 437 c4iw_get_ep(&ep->com); 438 break; 439 440 case MORIBUND: 441 case CLOSING: 442 STOP_EP_TIMER(ep); 443 /*FALLTHROUGH*/ 444 case FPDU_MODE: 445 446 if (ep->com.cm_id && ep->com.qp) { 447 448 attrs.next_state = C4IW_QP_STATE_ERROR; 449 ret = c4iw_modify_qp(ep->com.qp->rhp, 450 ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, 451 &attrs, 1); 452 if (ret) 453 log(LOG_ERR, 454 "%s - qp <- error failed!\n", 455 __func__); 456 } 457 peer_abort_upcall(ep); 458 break; 459 460 case ABORTING: 461 break; 462 463 case DEAD: 464 CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", 465 __func__, ep->com.so->so_error); 466 return; 467 468 default: 469 panic("%s: ep %p state %d", __func__, ep, state); 470 break; 471 } 472 473 if (state != ABORTING) { 474 475 CTR2(KTR_IW_CXGBE, "%s:pce1 %p", __func__, ep); 476 close_socket(&ep->com, 0); 477 state_set(&ep->com, DEAD); 478 c4iw_put_ep(&ep->com); 479 } 480 CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); 481 return; 482 } 483 484 static void 485 process_close_complete(struct c4iw_ep *ep) 486 { 487 struct c4iw_qp_attributes attrs; 488 int release = 0; 489 490 CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, 491 ep->com.so, states[ep->com.state]); 492 493 /* The cm_id may be null if we failed to connect */ 494 mutex_lock(&ep->com.mutex); 495 496 switch (ep->com.state) { 497 498 case CLOSING: 499 CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", 500 __func__, ep); 501 __state_set(&ep->com, MORIBUND); 502 break; 503 504 case MORIBUND: 505 CTR2(KTR_IW_CXGBE, "%s:pcc1 %p MORIBUND DEAD", __func__, 506 ep); 507 STOP_EP_TIMER(ep); 508 509 if ((ep->com.cm_id) && (ep->com.qp)) { 510 511 CTR2(KTR_IW_CXGBE, "%s:pcc2 %p QP_STATE_IDLE", 512 __func__, ep); 513 attrs.next_state = C4IW_QP_STATE_IDLE; 514 c4iw_modify_qp(ep->com.dev, 515 ep->com.qp, 516 C4IW_QP_ATTR_NEXT_STATE, 517 &attrs, 1); 518 } 519 520 if (ep->parent_ep) { 521 522 CTR2(KTR_IW_CXGBE, "%s:pcc3 %p", __func__, ep); 523 close_socket(&ep->com, 1); 524 } 525 else { 526 527 CTR2(KTR_IW_CXGBE, "%s:pcc4 %p", __func__, ep); 528 close_socket(&ep->com, 0); 529 } 530 close_complete_upcall(ep, 0); 531 __state_set(&ep->com, DEAD); 532 release = 1; 533 break; 534 535 case ABORTING: 536 CTR2(KTR_IW_CXGBE, "%s:pcc5 %p ABORTING", __func__, ep); 537 break; 538 539 case DEAD: 540 default: 541 CTR2(KTR_IW_CXGBE, "%s:pcc6 %p DEAD", __func__, ep); 542 panic("%s:pcc6 %p DEAD", __func__, ep); 543 break; 544 } 545 mutex_unlock(&ep->com.mutex); 546 547 if (release) { 548 549 CTR2(KTR_IW_CXGBE, "%s:pcc7 %p", __func__, ep); 550 c4iw_put_ep(&ep->com); 551 } 552 CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); 553 return; 554 } 555 556 static void 557 init_sock(struct c4iw_ep_common *epc) 558 { 559 int rc; 560 struct sockopt sopt; 561 struct socket *so = epc->so; 562 int on = 1; 563 564 SOCK_LOCK(so); 565 soupcall_set(so, SO_RCV, c4iw_so_upcall, epc); 566 so->so_state |= SS_NBIO; 567 SOCK_UNLOCK(so); 568 sopt.sopt_dir = SOPT_SET; 569 sopt.sopt_level = IPPROTO_TCP; 570 sopt.sopt_name = TCP_NODELAY; 571 sopt.sopt_val = (caddr_t)&on; 572 sopt.sopt_valsize = sizeof on; 573 sopt.sopt_td = NULL; 574 rc = sosetopt(so, &sopt); 575 if (rc) { 576 log(LOG_ERR, "%s: can't set TCP_NODELAY on so %p (%d)\n", 577 __func__, so, rc); 578 } 579 } 580 581 static void 582 process_data(struct c4iw_ep *ep) 583 { 584 struct sockaddr_in *local, *remote; 585 586 CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, 587 ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); 588 589 switch (state_read(&ep->com)) { 590 case MPA_REQ_SENT: 591 process_mpa_reply(ep); 592 break; 593 case MPA_REQ_WAIT: 594 in_getsockaddr(ep->com.so, (struct sockaddr **)&local); 595 in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); 596 ep->com.local_addr = *local; 597 ep->com.remote_addr = *remote; 598 free(local, M_SONAME); 599 free(remote, M_SONAME); 600 process_mpa_request(ep); 601 break; 602 default: 603 if (sbused(&ep->com.so->so_rcv)) 604 log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " 605 "state %d, so %p, so_state 0x%x, sbused %u\n", 606 __func__, ep, state_read(&ep->com), ep->com.so, 607 ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); 608 break; 609 } 610 } 611 612 static void 613 process_connected(struct c4iw_ep *ep) 614 { 615 616 if ((ep->com.so->so_state & SS_ISCONNECTED) && !ep->com.so->so_error) 617 send_mpa_req(ep); 618 else { 619 connect_reply_upcall(ep, -ep->com.so->so_error); 620 close_socket(&ep->com, 0); 621 state_set(&ep->com, DEAD); 622 c4iw_put_ep(&ep->com); 623 } 624 } 625 626 static struct socket * 627 dequeue_socket(struct socket *head, struct sockaddr_in **remote, 628 struct c4iw_ep *child_ep) 629 { 630 struct socket *so; 631 632 ACCEPT_LOCK(); 633 so = TAILQ_FIRST(&head->so_comp); 634 if (!so) { 635 ACCEPT_UNLOCK(); 636 return (NULL); 637 } 638 TAILQ_REMOVE(&head->so_comp, so, so_list); 639 head->so_qlen--; 640 SOCK_LOCK(so); 641 so->so_qstate &= ~SQ_COMP; 642 so->so_head = NULL; 643 soref(so); 644 soupcall_set(so, SO_RCV, c4iw_so_upcall, child_ep); 645 so->so_state |= SS_NBIO; 646 SOCK_UNLOCK(so); 647 ACCEPT_UNLOCK(); 648 soaccept(so, (struct sockaddr **)remote); 649 650 return (so); 651 } 652 653 static void 654 process_newconn(struct c4iw_ep *parent_ep) 655 { 656 struct socket *child_so; 657 struct c4iw_ep *child_ep; 658 struct sockaddr_in *remote; 659 660 child_ep = alloc_ep(sizeof(*child_ep), M_NOWAIT); 661 if (!child_ep) { 662 CTR3(KTR_IW_CXGBE, "%s: parent so %p, parent ep %p, ENOMEM", 663 __func__, parent_ep->com.so, parent_ep); 664 log(LOG_ERR, "%s: failed to allocate ep entry\n", __func__); 665 return; 666 } 667 668 child_so = dequeue_socket(parent_ep->com.so, &remote, child_ep); 669 if (!child_so) { 670 CTR4(KTR_IW_CXGBE, 671 "%s: parent so %p, parent ep %p, child ep %p, dequeue err", 672 __func__, parent_ep->com.so, parent_ep, child_ep); 673 log(LOG_ERR, "%s: failed to dequeue child socket\n", __func__); 674 __free_ep(&child_ep->com); 675 return; 676 677 } 678 679 CTR5(KTR_IW_CXGBE, 680 "%s: parent so %p, parent ep %p, child so %p, child ep %p", 681 __func__, parent_ep->com.so, parent_ep, child_so, child_ep); 682 683 child_ep->com.local_addr = parent_ep->com.local_addr; 684 child_ep->com.remote_addr = *remote; 685 child_ep->com.dev = parent_ep->com.dev; 686 child_ep->com.so = child_so; 687 child_ep->com.cm_id = NULL; 688 child_ep->com.thread = parent_ep->com.thread; 689 child_ep->parent_ep = parent_ep; 690 691 free(remote, M_SONAME); 692 c4iw_get_ep(&parent_ep->com); 693 child_ep->parent_ep = parent_ep; 694 init_timer(&child_ep->timer); 695 state_set(&child_ep->com, MPA_REQ_WAIT); 696 START_EP_TIMER(child_ep); 697 698 /* maybe the request has already been queued up on the socket... */ 699 process_mpa_request(child_ep); 700 } 701 702 static int 703 c4iw_so_upcall(struct socket *so, void *arg, int waitflag) 704 { 705 struct c4iw_ep *ep = arg; 706 707 spin_lock(&req_lock); 708 709 CTR6(KTR_IW_CXGBE, 710 "%s: so %p, so_state 0x%x, ep %p, ep_state %s, tqe_prev %p", 711 __func__, so, so->so_state, ep, states[ep->com.state], 712 ep->com.entry.tqe_prev); 713 714 if (ep && ep->com.so && !ep->com.entry.tqe_prev) { 715 KASSERT(ep->com.so == so, ("%s: XXX review.", __func__)); 716 c4iw_get_ep(&ep->com); 717 TAILQ_INSERT_TAIL(&req_list, &ep->com, entry); 718 queue_work(c4iw_taskq, &c4iw_task); 719 } 720 721 spin_unlock(&req_lock); 722 return (SU_OK); 723 } 724 725 static void 726 process_socket_event(struct c4iw_ep *ep) 727 { 728 int state = state_read(&ep->com); 729 struct socket *so = ep->com.so; 730 731 CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " 732 "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, 733 so->so_error, so->so_rcv.sb_state, ep, states[state]); 734 735 if (state == CONNECTING) { 736 process_connected(ep); 737 return; 738 } 739 740 if (state == LISTEN) { 741 process_newconn(ep); 742 return; 743 } 744 745 /* connection error */ 746 if (so->so_error) { 747 process_conn_error(ep); 748 return; 749 } 750 751 /* peer close */ 752 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) && state < CLOSING) { 753 process_peer_close(ep); 754 return; 755 } 756 757 /* close complete */ 758 if (so->so_state & SS_ISDISCONNECTED) { 759 process_close_complete(ep); 760 return; 761 } 762 763 /* rx data */ 764 process_data(ep); 765 } 766 767 SYSCTL_NODE(_hw, OID_AUTO, iw_cxgbe, CTLFLAG_RD, 0, "iw_cxgbe driver parameters"); 768 769 int db_delay_usecs = 1; 770 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_delay_usecs, CTLFLAG_RWTUN, &db_delay_usecs, 0, 771 "Usecs to delay awaiting db fifo to drain"); 772 773 static int dack_mode = 1; 774 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, dack_mode, CTLFLAG_RWTUN, &dack_mode, 0, 775 "Delayed ack mode (default = 1)"); 776 777 int c4iw_max_read_depth = 8; 778 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_max_read_depth, CTLFLAG_RWTUN, &c4iw_max_read_depth, 0, 779 "Per-connection max ORD/IRD (default = 8)"); 780 781 static int enable_tcp_timestamps; 782 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_timestamps, CTLFLAG_RWTUN, &enable_tcp_timestamps, 0, 783 "Enable tcp timestamps (default = 0)"); 784 785 static int enable_tcp_sack; 786 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_sack, CTLFLAG_RWTUN, &enable_tcp_sack, 0, 787 "Enable tcp SACK (default = 0)"); 788 789 static int enable_tcp_window_scaling = 1; 790 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, enable_tcp_window_scaling, CTLFLAG_RWTUN, &enable_tcp_window_scaling, 0, 791 "Enable tcp window scaling (default = 1)"); 792 793 int c4iw_debug = 1; 794 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, c4iw_debug, CTLFLAG_RWTUN, &c4iw_debug, 0, 795 "Enable debug logging (default = 0)"); 796 797 static int peer2peer; 798 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, peer2peer, CTLFLAG_RWTUN, &peer2peer, 0, 799 "Support peer2peer ULPs (default = 0)"); 800 801 static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ; 802 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, p2p_type, CTLFLAG_RWTUN, &p2p_type, 0, 803 "RDMAP opcode to use for the RTR message: 1 = RDMA_READ 0 = RDMA_WRITE (default 1)"); 804 805 static int ep_timeout_secs = 60; 806 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, ep_timeout_secs, CTLFLAG_RWTUN, &ep_timeout_secs, 0, 807 "CM Endpoint operation timeout in seconds (default = 60)"); 808 809 static int mpa_rev = 1; 810 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, mpa_rev, CTLFLAG_RWTUN, &mpa_rev, 0, 811 "MPA Revision, 0 supports amso1100, 1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft compliant (default = 1)"); 812 813 static int markers_enabled; 814 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, markers_enabled, CTLFLAG_RWTUN, &markers_enabled, 0, 815 "Enable MPA MARKERS (default(0) = disabled)"); 816 817 static int crc_enabled = 1; 818 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, crc_enabled, CTLFLAG_RWTUN, &crc_enabled, 0, 819 "Enable MPA CRC (default(1) = enabled)"); 820 821 static int rcv_win = 256 * 1024; 822 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, rcv_win, CTLFLAG_RWTUN, &rcv_win, 0, 823 "TCP receive window in bytes (default = 256KB)"); 824 825 static int snd_win = 128 * 1024; 826 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, snd_win, CTLFLAG_RWTUN, &snd_win, 0, 827 "TCP send window in bytes (default = 128KB)"); 828 829 int db_fc_threshold = 2000; 830 SYSCTL_INT(_hw_iw_cxgbe, OID_AUTO, db_fc_threshold, CTLFLAG_RWTUN, &db_fc_threshold, 0, 831 "QP count/threshold that triggers automatic"); 832 833 static void 834 start_ep_timer(struct c4iw_ep *ep) 835 { 836 837 if (timer_pending(&ep->timer)) { 838 CTR2(KTR_IW_CXGBE, "%s: ep %p, already started", __func__, ep); 839 printk(KERN_ERR "%s timer already started! ep %p\n", __func__, 840 ep); 841 return; 842 } 843 clear_bit(TIMEOUT, &ep->com.flags); 844 c4iw_get_ep(&ep->com); 845 ep->timer.expires = jiffies + ep_timeout_secs * HZ; 846 ep->timer.data = (unsigned long)ep; 847 ep->timer.function = ep_timeout; 848 add_timer(&ep->timer); 849 } 850 851 static void 852 stop_ep_timer(struct c4iw_ep *ep) 853 { 854 855 del_timer_sync(&ep->timer); 856 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 857 c4iw_put_ep(&ep->com); 858 } 859 } 860 861 static enum 862 c4iw_ep_state state_read(struct c4iw_ep_common *epc) 863 { 864 enum c4iw_ep_state state; 865 866 mutex_lock(&epc->mutex); 867 state = epc->state; 868 mutex_unlock(&epc->mutex); 869 870 return (state); 871 } 872 873 static void 874 __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 875 { 876 877 epc->state = new; 878 } 879 880 static void 881 state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) 882 { 883 884 mutex_lock(&epc->mutex); 885 __state_set(epc, new); 886 mutex_unlock(&epc->mutex); 887 } 888 889 static void * 890 alloc_ep(int size, gfp_t gfp) 891 { 892 struct c4iw_ep_common *epc; 893 894 epc = kzalloc(size, gfp); 895 if (epc == NULL) 896 return (NULL); 897 898 kref_init(&epc->kref); 899 mutex_init(&epc->mutex); 900 c4iw_init_wr_wait(&epc->wr_wait); 901 902 return (epc); 903 } 904 905 void 906 __free_ep(struct c4iw_ep_common *epc) 907 { 908 CTR2(KTR_IW_CXGBE, "%s:feB %p", __func__, epc); 909 KASSERT(!epc->so, ("%s warning ep->so %p \n", __func__, epc->so)); 910 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list!\n", __func__, epc)); 911 free(epc, M_DEVBUF); 912 CTR2(KTR_IW_CXGBE, "%s:feE %p", __func__, epc); 913 } 914 915 void _c4iw_free_ep(struct kref *kref) 916 { 917 struct c4iw_ep *ep; 918 struct c4iw_ep_common *epc; 919 920 ep = container_of(kref, struct c4iw_ep, com.kref); 921 epc = &ep->com; 922 KASSERT(!epc->so, ("%s ep->so %p", __func__, epc->so)); 923 KASSERT(!epc->entry.tqe_prev, ("%s epc %p still on req list", 924 __func__, epc)); 925 kfree(ep); 926 } 927 928 static void release_ep_resources(struct c4iw_ep *ep) 929 { 930 CTR2(KTR_IW_CXGBE, "%s:rerB %p", __func__, ep); 931 set_bit(RELEASE_RESOURCES, &ep->com.flags); 932 c4iw_put_ep(&ep->com); 933 CTR2(KTR_IW_CXGBE, "%s:rerE %p", __func__, ep); 934 } 935 936 static void 937 send_mpa_req(struct c4iw_ep *ep) 938 { 939 int mpalen; 940 struct mpa_message *mpa; 941 struct mpa_v2_conn_params mpa_v2_params; 942 struct mbuf *m; 943 char mpa_rev_to_use = mpa_rev; 944 int err; 945 946 if (ep->retry_with_mpa_v1) 947 mpa_rev_to_use = 1; 948 mpalen = sizeof(*mpa) + ep->plen; 949 if (mpa_rev_to_use == 2) 950 mpalen += sizeof(struct mpa_v2_conn_params); 951 952 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 953 if (mpa == NULL) { 954 failed: 955 connect_reply_upcall(ep, -ENOMEM); 956 return; 957 } 958 959 memset(mpa, 0, mpalen); 960 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 961 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 962 (markers_enabled ? MPA_MARKERS : 0) | 963 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); 964 mpa->private_data_size = htons(ep->plen); 965 mpa->revision = mpa_rev_to_use; 966 967 if (mpa_rev_to_use == 1) { 968 ep->tried_with_mpa_v1 = 1; 969 ep->retry_with_mpa_v1 = 0; 970 } 971 972 if (mpa_rev_to_use == 2) { 973 mpa->private_data_size += 974 htons(sizeof(struct mpa_v2_conn_params)); 975 mpa_v2_params.ird = htons((u16)ep->ird); 976 mpa_v2_params.ord = htons((u16)ep->ord); 977 978 if (peer2peer) { 979 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 980 981 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { 982 mpa_v2_params.ord |= 983 htons(MPA_V2_RDMA_WRITE_RTR); 984 } else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { 985 mpa_v2_params.ord |= 986 htons(MPA_V2_RDMA_READ_RTR); 987 } 988 } 989 memcpy(mpa->private_data, &mpa_v2_params, 990 sizeof(struct mpa_v2_conn_params)); 991 992 if (ep->plen) { 993 994 memcpy(mpa->private_data + 995 sizeof(struct mpa_v2_conn_params), 996 ep->mpa_pkt + sizeof(*mpa), ep->plen); 997 } 998 } else { 999 1000 if (ep->plen) 1001 memcpy(mpa->private_data, 1002 ep->mpa_pkt + sizeof(*mpa), ep->plen); 1003 CTR2(KTR_IW_CXGBE, "%s:smr7 %p", __func__, ep); 1004 } 1005 1006 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1007 if (m == NULL) { 1008 free(mpa, M_CXGBE); 1009 goto failed; 1010 } 1011 m_copyback(m, 0, mpalen, (void *)mpa); 1012 free(mpa, M_CXGBE); 1013 1014 err = sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 1015 ep->com.thread); 1016 if (err) 1017 goto failed; 1018 1019 START_EP_TIMER(ep); 1020 state_set(&ep->com, MPA_REQ_SENT); 1021 ep->mpa_attr.initiator = 1; 1022 } 1023 1024 static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen) 1025 { 1026 int mpalen ; 1027 struct mpa_message *mpa; 1028 struct mpa_v2_conn_params mpa_v2_params; 1029 struct mbuf *m; 1030 int err; 1031 1032 CTR4(KTR_IW_CXGBE, "%s:smrejB %p %u %d", __func__, ep, ep->hwtid, 1033 ep->plen); 1034 1035 mpalen = sizeof(*mpa) + plen; 1036 1037 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1038 1039 mpalen += sizeof(struct mpa_v2_conn_params); 1040 CTR4(KTR_IW_CXGBE, "%s:smrej1 %p %u %d", __func__, ep, 1041 ep->mpa_attr.version, mpalen); 1042 } 1043 1044 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 1045 if (mpa == NULL) 1046 return (-ENOMEM); 1047 1048 memset(mpa, 0, mpalen); 1049 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1050 mpa->flags = MPA_REJECT; 1051 mpa->revision = mpa_rev; 1052 mpa->private_data_size = htons(plen); 1053 1054 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1055 1056 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1057 mpa->private_data_size += 1058 htons(sizeof(struct mpa_v2_conn_params)); 1059 mpa_v2_params.ird = htons(((u16)ep->ird) | 1060 (peer2peer ? MPA_V2_PEER2PEER_MODEL : 1061 0)); 1062 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ? 1063 (p2p_type == 1064 FW_RI_INIT_P2PTYPE_RDMA_WRITE ? 1065 MPA_V2_RDMA_WRITE_RTR : p2p_type == 1066 FW_RI_INIT_P2PTYPE_READ_REQ ? 1067 MPA_V2_RDMA_READ_RTR : 0) : 0)); 1068 memcpy(mpa->private_data, &mpa_v2_params, 1069 sizeof(struct mpa_v2_conn_params)); 1070 1071 if (ep->plen) 1072 memcpy(mpa->private_data + 1073 sizeof(struct mpa_v2_conn_params), pdata, plen); 1074 CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, 1075 mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); 1076 } else 1077 if (plen) 1078 memcpy(mpa->private_data, pdata, plen); 1079 1080 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1081 if (m == NULL) { 1082 free(mpa, M_CXGBE); 1083 return (-ENOMEM); 1084 } 1085 m_copyback(m, 0, mpalen, (void *)mpa); 1086 free(mpa, M_CXGBE); 1087 1088 err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); 1089 if (!err) 1090 ep->snd_seq += mpalen; 1091 CTR4(KTR_IW_CXGBE, "%s:smrejE %p %u %d", __func__, ep, ep->hwtid, err); 1092 return err; 1093 } 1094 1095 static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen) 1096 { 1097 int mpalen; 1098 struct mpa_message *mpa; 1099 struct mbuf *m; 1100 struct mpa_v2_conn_params mpa_v2_params; 1101 int err; 1102 1103 CTR2(KTR_IW_CXGBE, "%s:smrepB %p", __func__, ep); 1104 1105 mpalen = sizeof(*mpa) + plen; 1106 1107 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1108 1109 CTR3(KTR_IW_CXGBE, "%s:smrep1 %p %d", __func__, ep, 1110 ep->mpa_attr.version); 1111 mpalen += sizeof(struct mpa_v2_conn_params); 1112 } 1113 1114 mpa = malloc(mpalen, M_CXGBE, M_NOWAIT); 1115 if (mpa == NULL) 1116 return (-ENOMEM); 1117 1118 memset(mpa, 0, sizeof(*mpa)); 1119 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 1120 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 1121 (markers_enabled ? MPA_MARKERS : 0); 1122 mpa->revision = ep->mpa_attr.version; 1123 mpa->private_data_size = htons(plen); 1124 1125 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1126 1127 mpa->flags |= MPA_ENHANCED_RDMA_CONN; 1128 mpa->private_data_size += 1129 htons(sizeof(struct mpa_v2_conn_params)); 1130 mpa_v2_params.ird = htons((u16)ep->ird); 1131 mpa_v2_params.ord = htons((u16)ep->ord); 1132 CTR5(KTR_IW_CXGBE, "%s:smrep3 %p %d %d %d", __func__, ep, 1133 ep->mpa_attr.version, mpa_v2_params.ird, mpa_v2_params.ord); 1134 1135 if (peer2peer && (ep->mpa_attr.p2p_type != 1136 FW_RI_INIT_P2PTYPE_DISABLED)) { 1137 1138 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL); 1139 1140 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE) { 1141 1142 mpa_v2_params.ord |= 1143 htons(MPA_V2_RDMA_WRITE_RTR); 1144 CTR5(KTR_IW_CXGBE, "%s:smrep4 %p %d %d %d", 1145 __func__, ep, p2p_type, mpa_v2_params.ird, 1146 mpa_v2_params.ord); 1147 } 1148 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) { 1149 1150 mpa_v2_params.ord |= 1151 htons(MPA_V2_RDMA_READ_RTR); 1152 CTR5(KTR_IW_CXGBE, "%s:smrep5 %p %d %d %d", 1153 __func__, ep, p2p_type, mpa_v2_params.ird, 1154 mpa_v2_params.ord); 1155 } 1156 } 1157 1158 memcpy(mpa->private_data, &mpa_v2_params, 1159 sizeof(struct mpa_v2_conn_params)); 1160 1161 if (ep->plen) 1162 memcpy(mpa->private_data + 1163 sizeof(struct mpa_v2_conn_params), pdata, plen); 1164 } else 1165 if (plen) 1166 memcpy(mpa->private_data, pdata, plen); 1167 1168 m = m_getm(NULL, mpalen, M_NOWAIT, MT_DATA); 1169 if (m == NULL) { 1170 free(mpa, M_CXGBE); 1171 return (-ENOMEM); 1172 } 1173 m_copyback(m, 0, mpalen, (void *)mpa); 1174 free(mpa, M_CXGBE); 1175 1176 1177 state_set(&ep->com, MPA_REP_SENT); 1178 ep->snd_seq += mpalen; 1179 err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, 1180 ep->com.thread); 1181 CTR3(KTR_IW_CXGBE, "%s:smrepE %p %d", __func__, ep, err); 1182 return err; 1183 } 1184 1185 1186 1187 static void close_complete_upcall(struct c4iw_ep *ep, int status) 1188 { 1189 struct iw_cm_event event; 1190 1191 CTR2(KTR_IW_CXGBE, "%s:ccuB %p", __func__, ep); 1192 memset(&event, 0, sizeof(event)); 1193 event.event = IW_CM_EVENT_CLOSE; 1194 event.status = status; 1195 1196 if (ep->com.cm_id) { 1197 1198 CTR2(KTR_IW_CXGBE, "%s:ccu1 %1", __func__, ep); 1199 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1200 ep->com.cm_id->rem_ref(ep->com.cm_id); 1201 ep->com.cm_id = NULL; 1202 ep->com.qp = NULL; 1203 set_bit(CLOSE_UPCALL, &ep->com.history); 1204 } 1205 CTR2(KTR_IW_CXGBE, "%s:ccuE %p", __func__, ep); 1206 } 1207 1208 static int abort_connection(struct c4iw_ep *ep) 1209 { 1210 int err; 1211 1212 CTR2(KTR_IW_CXGBE, "%s:abB %p", __func__, ep); 1213 state_set(&ep->com, ABORTING); 1214 abort_socket(ep); 1215 err = close_socket(&ep->com, 0); 1216 set_bit(ABORT_CONN, &ep->com.history); 1217 CTR2(KTR_IW_CXGBE, "%s:abE %p", __func__, ep); 1218 return err; 1219 } 1220 1221 static void peer_close_upcall(struct c4iw_ep *ep) 1222 { 1223 struct iw_cm_event event; 1224 1225 CTR2(KTR_IW_CXGBE, "%s:pcuB %p", __func__, ep); 1226 memset(&event, 0, sizeof(event)); 1227 event.event = IW_CM_EVENT_DISCONNECT; 1228 1229 if (ep->com.cm_id) { 1230 1231 CTR2(KTR_IW_CXGBE, "%s:pcu1 %p", __func__, ep); 1232 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1233 set_bit(DISCONN_UPCALL, &ep->com.history); 1234 } 1235 CTR2(KTR_IW_CXGBE, "%s:pcuE %p", __func__, ep); 1236 } 1237 1238 static void peer_abort_upcall(struct c4iw_ep *ep) 1239 { 1240 struct iw_cm_event event; 1241 1242 CTR2(KTR_IW_CXGBE, "%s:pauB %p", __func__, ep); 1243 memset(&event, 0, sizeof(event)); 1244 event.event = IW_CM_EVENT_CLOSE; 1245 event.status = -ECONNRESET; 1246 1247 if (ep->com.cm_id) { 1248 1249 CTR2(KTR_IW_CXGBE, "%s:pau1 %p", __func__, ep); 1250 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1251 ep->com.cm_id->rem_ref(ep->com.cm_id); 1252 ep->com.cm_id = NULL; 1253 ep->com.qp = NULL; 1254 set_bit(ABORT_UPCALL, &ep->com.history); 1255 } 1256 CTR2(KTR_IW_CXGBE, "%s:pauE %p", __func__, ep); 1257 } 1258 1259 static void connect_reply_upcall(struct c4iw_ep *ep, int status) 1260 { 1261 struct iw_cm_event event; 1262 1263 CTR3(KTR_IW_CXGBE, "%s:cruB %p", __func__, ep, status); 1264 memset(&event, 0, sizeof(event)); 1265 event.event = IW_CM_EVENT_CONNECT_REPLY; 1266 event.status = (status ==-ECONNABORTED)?-ECONNRESET: status; 1267 event.local_addr = ep->com.local_addr; 1268 event.remote_addr = ep->com.remote_addr; 1269 1270 if ((status == 0) || (status == -ECONNREFUSED)) { 1271 1272 if (!ep->tried_with_mpa_v1) { 1273 1274 CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); 1275 /* this means MPA_v2 is used */ 1276 event.private_data_len = ep->plen - 1277 sizeof(struct mpa_v2_conn_params); 1278 event.private_data = ep->mpa_pkt + 1279 sizeof(struct mpa_message) + 1280 sizeof(struct mpa_v2_conn_params); 1281 } else { 1282 1283 CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); 1284 /* this means MPA_v1 is used */ 1285 event.private_data_len = ep->plen; 1286 event.private_data = ep->mpa_pkt + 1287 sizeof(struct mpa_message); 1288 } 1289 } 1290 1291 if (ep->com.cm_id) { 1292 1293 CTR2(KTR_IW_CXGBE, "%s:cru3 %p", __func__, ep); 1294 set_bit(CONN_RPL_UPCALL, &ep->com.history); 1295 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1296 } 1297 1298 if(status == -ECONNABORTED) { 1299 1300 CTR3(KTR_IW_CXGBE, "%s:cruE %p %d", __func__, ep, status); 1301 return; 1302 } 1303 1304 if (status < 0) { 1305 1306 CTR3(KTR_IW_CXGBE, "%s:cru4 %p %d", __func__, ep, status); 1307 ep->com.cm_id->rem_ref(ep->com.cm_id); 1308 ep->com.cm_id = NULL; 1309 ep->com.qp = NULL; 1310 } 1311 1312 CTR2(KTR_IW_CXGBE, "%s:cruE %p", __func__, ep); 1313 } 1314 1315 static int connect_request_upcall(struct c4iw_ep *ep) 1316 { 1317 struct iw_cm_event event; 1318 int ret; 1319 1320 CTR3(KTR_IW_CXGBE, "%s: ep %p, mpa_v1 %d", __func__, ep, 1321 ep->tried_with_mpa_v1); 1322 1323 memset(&event, 0, sizeof(event)); 1324 event.event = IW_CM_EVENT_CONNECT_REQUEST; 1325 event.local_addr = ep->com.local_addr; 1326 event.remote_addr = ep->com.remote_addr; 1327 event.provider_data = ep; 1328 event.so = ep->com.so; 1329 1330 if (!ep->tried_with_mpa_v1) { 1331 /* this means MPA_v2 is used */ 1332 event.ord = ep->ord; 1333 event.ird = ep->ird; 1334 event.private_data_len = ep->plen - 1335 sizeof(struct mpa_v2_conn_params); 1336 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) + 1337 sizeof(struct mpa_v2_conn_params); 1338 } else { 1339 1340 /* this means MPA_v1 is used. Send max supported */ 1341 event.ord = c4iw_max_read_depth; 1342 event.ird = c4iw_max_read_depth; 1343 event.private_data_len = ep->plen; 1344 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 1345 } 1346 1347 c4iw_get_ep(&ep->com); 1348 ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, 1349 &event); 1350 if(ret) 1351 c4iw_put_ep(&ep->com); 1352 1353 set_bit(CONNREQ_UPCALL, &ep->com.history); 1354 c4iw_put_ep(&ep->parent_ep->com); 1355 return ret; 1356 } 1357 1358 static void established_upcall(struct c4iw_ep *ep) 1359 { 1360 struct iw_cm_event event; 1361 1362 CTR2(KTR_IW_CXGBE, "%s:euB %p", __func__, ep); 1363 memset(&event, 0, sizeof(event)); 1364 event.event = IW_CM_EVENT_ESTABLISHED; 1365 event.ird = ep->ird; 1366 event.ord = ep->ord; 1367 1368 if (ep->com.cm_id) { 1369 1370 CTR2(KTR_IW_CXGBE, "%s:eu1 %p", __func__, ep); 1371 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 1372 set_bit(ESTAB_UPCALL, &ep->com.history); 1373 } 1374 CTR2(KTR_IW_CXGBE, "%s:euE %p", __func__, ep); 1375 } 1376 1377 1378 1379 static void process_mpa_reply(struct c4iw_ep *ep) 1380 { 1381 struct mpa_message *mpa; 1382 struct mpa_v2_conn_params *mpa_v2_params; 1383 u16 plen; 1384 u16 resp_ird, resp_ord; 1385 u8 rtr_mismatch = 0, insuff_ird = 0; 1386 struct c4iw_qp_attributes attrs; 1387 enum c4iw_qp_attr_mask mask; 1388 int err; 1389 struct mbuf *top, *m; 1390 int flags = MSG_DONTWAIT; 1391 struct uio uio; 1392 1393 CTR2(KTR_IW_CXGBE, "%s:pmrB %p", __func__, ep); 1394 1395 /* 1396 * Stop mpa timer. If it expired, then the state has 1397 * changed and we bail since ep_timeout already aborted 1398 * the connection. 1399 */ 1400 STOP_EP_TIMER(ep); 1401 if (state_read(&ep->com) != MPA_REQ_SENT) 1402 return; 1403 1404 uio.uio_resid = 1000000; 1405 uio.uio_td = ep->com.thread; 1406 err = soreceive(ep->com.so, NULL, &uio, &top, NULL, &flags); 1407 1408 if (err) { 1409 1410 if (err == EWOULDBLOCK) { 1411 1412 CTR2(KTR_IW_CXGBE, "%s:pmr1 %p", __func__, ep); 1413 START_EP_TIMER(ep); 1414 return; 1415 } 1416 err = -err; 1417 CTR2(KTR_IW_CXGBE, "%s:pmr2 %p", __func__, ep); 1418 goto err; 1419 } 1420 1421 if (ep->com.so->so_rcv.sb_mb) { 1422 1423 CTR2(KTR_IW_CXGBE, "%s:pmr3 %p", __func__, ep); 1424 printf("%s data after soreceive called! so %p sb_mb %p top %p\n", 1425 __func__, ep->com.so, ep->com.so->so_rcv.sb_mb, top); 1426 } 1427 1428 m = top; 1429 1430 do { 1431 1432 CTR2(KTR_IW_CXGBE, "%s:pmr4 %p", __func__, ep); 1433 /* 1434 * If we get more than the supported amount of private data 1435 * then we must fail this connection. 1436 */ 1437 if (ep->mpa_pkt_len + m->m_len > sizeof(ep->mpa_pkt)) { 1438 1439 CTR3(KTR_IW_CXGBE, "%s:pmr5 %p %d", __func__, ep, 1440 ep->mpa_pkt_len + m->m_len); 1441 err = (-EINVAL); 1442 goto err; 1443 } 1444 1445 /* 1446 * copy the new data into our accumulation buffer. 1447 */ 1448 m_copydata(m, 0, m->m_len, &(ep->mpa_pkt[ep->mpa_pkt_len])); 1449 ep->mpa_pkt_len += m->m_len; 1450 if (!m->m_next) 1451 m = m->m_nextpkt; 1452 else 1453 m = m->m_next; 1454 } while (m); 1455 1456 m_freem(top); 1457 /* 1458 * if we don't even have the mpa message, then bail. 1459 */ 1460 if (ep->mpa_pkt_len < sizeof(*mpa)) 1461 return; 1462 mpa = (struct mpa_message *) ep->mpa_pkt; 1463 1464 /* Validate MPA header. */ 1465 if (mpa->revision > mpa_rev) { 1466 1467 CTR4(KTR_IW_CXGBE, "%s:pmr6 %p %d %d", __func__, ep, 1468 mpa->revision, mpa_rev); 1469 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d, " 1470 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1471 err = -EPROTO; 1472 goto err; 1473 } 1474 1475 if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) { 1476 1477 CTR2(KTR_IW_CXGBE, "%s:pmr7 %p", __func__, ep); 1478 err = -EPROTO; 1479 goto err; 1480 } 1481 1482 plen = ntohs(mpa->private_data_size); 1483 1484 /* 1485 * Fail if there's too much private data. 1486 */ 1487 if (plen > MPA_MAX_PRIVATE_DATA) { 1488 1489 CTR2(KTR_IW_CXGBE, "%s:pmr8 %p", __func__, ep); 1490 err = -EPROTO; 1491 goto err; 1492 } 1493 1494 /* 1495 * If plen does not account for pkt size 1496 */ 1497 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) { 1498 1499 CTR2(KTR_IW_CXGBE, "%s:pmr9 %p", __func__, ep); 1500 err = -EPROTO; 1501 goto err; 1502 } 1503 1504 ep->plen = (u8) plen; 1505 1506 /* 1507 * If we don't have all the pdata yet, then bail. 1508 * We'll continue process when more data arrives. 1509 */ 1510 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) { 1511 1512 CTR2(KTR_IW_CXGBE, "%s:pmra %p", __func__, ep); 1513 return; 1514 } 1515 1516 if (mpa->flags & MPA_REJECT) { 1517 1518 CTR2(KTR_IW_CXGBE, "%s:pmrb %p", __func__, ep); 1519 err = -ECONNREFUSED; 1520 goto err; 1521 } 1522 1523 /* 1524 * If we get here we have accumulated the entire mpa 1525 * start reply message including private data. And 1526 * the MPA header is valid. 1527 */ 1528 state_set(&ep->com, FPDU_MODE); 1529 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1530 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1531 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1532 ep->mpa_attr.version = mpa->revision; 1533 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1534 1535 if (mpa->revision == 2) { 1536 1537 CTR2(KTR_IW_CXGBE, "%s:pmrc %p", __func__, ep); 1538 ep->mpa_attr.enhanced_rdma_conn = 1539 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1540 1541 if (ep->mpa_attr.enhanced_rdma_conn) { 1542 1543 CTR2(KTR_IW_CXGBE, "%s:pmrd %p", __func__, ep); 1544 mpa_v2_params = (struct mpa_v2_conn_params *) 1545 (ep->mpa_pkt + sizeof(*mpa)); 1546 resp_ird = ntohs(mpa_v2_params->ird) & 1547 MPA_V2_IRD_ORD_MASK; 1548 resp_ord = ntohs(mpa_v2_params->ord) & 1549 MPA_V2_IRD_ORD_MASK; 1550 1551 /* 1552 * This is a double-check. Ideally, below checks are 1553 * not required since ird/ord stuff has been taken 1554 * care of in c4iw_accept_cr 1555 */ 1556 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) { 1557 1558 CTR2(KTR_IW_CXGBE, "%s:pmre %p", __func__, ep); 1559 err = -ENOMEM; 1560 ep->ird = resp_ord; 1561 ep->ord = resp_ird; 1562 insuff_ird = 1; 1563 } 1564 1565 if (ntohs(mpa_v2_params->ird) & 1566 MPA_V2_PEER2PEER_MODEL) { 1567 1568 CTR2(KTR_IW_CXGBE, "%s:pmrf %p", __func__, ep); 1569 if (ntohs(mpa_v2_params->ord) & 1570 MPA_V2_RDMA_WRITE_RTR) { 1571 1572 CTR2(KTR_IW_CXGBE, "%s:pmrg %p", __func__, ep); 1573 ep->mpa_attr.p2p_type = 1574 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1575 } 1576 else if (ntohs(mpa_v2_params->ord) & 1577 MPA_V2_RDMA_READ_RTR) { 1578 1579 CTR2(KTR_IW_CXGBE, "%s:pmrh %p", __func__, ep); 1580 ep->mpa_attr.p2p_type = 1581 FW_RI_INIT_P2PTYPE_READ_REQ; 1582 } 1583 } 1584 } 1585 } else { 1586 1587 CTR2(KTR_IW_CXGBE, "%s:pmri %p", __func__, ep); 1588 1589 if (mpa->revision == 1) { 1590 1591 CTR2(KTR_IW_CXGBE, "%s:pmrj %p", __func__, ep); 1592 1593 if (peer2peer) { 1594 1595 CTR2(KTR_IW_CXGBE, "%s:pmrk %p", __func__, ep); 1596 ep->mpa_attr.p2p_type = p2p_type; 1597 } 1598 } 1599 } 1600 1601 if (set_tcpinfo(ep)) { 1602 1603 CTR2(KTR_IW_CXGBE, "%s:pmrl %p", __func__, ep); 1604 printf("%s set_tcpinfo error\n", __func__); 1605 goto err; 1606 } 1607 1608 CTR6(KTR_IW_CXGBE, "%s - crc_enabled = %d, recv_marker_enabled = %d, " 1609 "xmit_marker_enabled = %d, version = %d p2p_type = %d", __func__, 1610 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1611 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version, 1612 ep->mpa_attr.p2p_type); 1613 1614 /* 1615 * If responder's RTR does not match with that of initiator, assign 1616 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not 1617 * generated when moving QP to RTS state. 1618 * A TERM message will be sent after QP has moved to RTS state 1619 */ 1620 if ((ep->mpa_attr.version == 2) && peer2peer && 1621 (ep->mpa_attr.p2p_type != p2p_type)) { 1622 1623 CTR2(KTR_IW_CXGBE, "%s:pmrm %p", __func__, ep); 1624 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1625 rtr_mismatch = 1; 1626 } 1627 1628 1629 //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; 1630 attrs.mpa_attr = ep->mpa_attr; 1631 attrs.max_ird = ep->ird; 1632 attrs.max_ord = ep->ord; 1633 attrs.llp_stream_handle = ep; 1634 attrs.next_state = C4IW_QP_STATE_RTS; 1635 1636 mask = C4IW_QP_ATTR_NEXT_STATE | 1637 C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR | 1638 C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD; 1639 1640 /* bind QP and TID with INIT_WR */ 1641 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); 1642 1643 if (err) { 1644 1645 CTR2(KTR_IW_CXGBE, "%s:pmrn %p", __func__, ep); 1646 goto err; 1647 } 1648 1649 /* 1650 * If responder's RTR requirement did not match with what initiator 1651 * supports, generate TERM message 1652 */ 1653 if (rtr_mismatch) { 1654 1655 CTR2(KTR_IW_CXGBE, "%s:pmro %p", __func__, ep); 1656 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__); 1657 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1658 attrs.ecode = MPA_NOMATCH_RTR; 1659 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1660 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1661 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1662 err = -ENOMEM; 1663 goto out; 1664 } 1665 1666 /* 1667 * Generate TERM if initiator IRD is not sufficient for responder 1668 * provided ORD. Currently, we do the same behaviour even when 1669 * responder provided IRD is also not sufficient as regards to 1670 * initiator ORD. 1671 */ 1672 if (insuff_ird) { 1673 1674 CTR2(KTR_IW_CXGBE, "%s:pmrp %p", __func__, ep); 1675 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n", 1676 __func__); 1677 attrs.layer_etype = LAYER_MPA | DDP_LLP; 1678 attrs.ecode = MPA_INSUFF_IRD; 1679 attrs.next_state = C4IW_QP_STATE_TERMINATE; 1680 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, 1681 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0); 1682 err = -ENOMEM; 1683 goto out; 1684 } 1685 goto out; 1686 err: 1687 state_set(&ep->com, ABORTING); 1688 abort_connection(ep); 1689 out: 1690 connect_reply_upcall(ep, err); 1691 CTR2(KTR_IW_CXGBE, "%s:pmrE %p", __func__, ep); 1692 return; 1693 } 1694 1695 static void 1696 process_mpa_request(struct c4iw_ep *ep) 1697 { 1698 struct mpa_message *mpa; 1699 u16 plen; 1700 int flags = MSG_DONTWAIT; 1701 int rc; 1702 struct iovec iov; 1703 struct uio uio; 1704 enum c4iw_ep_state state = state_read(&ep->com); 1705 1706 CTR3(KTR_IW_CXGBE, "%s: ep %p, state %s", __func__, ep, states[state]); 1707 1708 if (state != MPA_REQ_WAIT) 1709 return; 1710 1711 iov.iov_base = &ep->mpa_pkt[ep->mpa_pkt_len]; 1712 iov.iov_len = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; 1713 uio.uio_iov = &iov; 1714 uio.uio_iovcnt = 1; 1715 uio.uio_offset = 0; 1716 uio.uio_resid = sizeof(ep->mpa_pkt) - ep->mpa_pkt_len; 1717 uio.uio_segflg = UIO_SYSSPACE; 1718 uio.uio_rw = UIO_READ; 1719 uio.uio_td = NULL; /* uio.uio_td = ep->com.thread; */ 1720 1721 rc = soreceive(ep->com.so, NULL, &uio, NULL, NULL, &flags); 1722 if (rc == EAGAIN) 1723 return; 1724 else if (rc) { 1725 abort: 1726 STOP_EP_TIMER(ep); 1727 abort_connection(ep); 1728 return; 1729 } 1730 KASSERT(uio.uio_offset > 0, ("%s: sorecieve on so %p read no data", 1731 __func__, ep->com.so)); 1732 ep->mpa_pkt_len += uio.uio_offset; 1733 1734 /* 1735 * If we get more than the supported amount of private data then we must 1736 * fail this connection. XXX: check so_rcv->sb_cc, or peek with another 1737 * soreceive, or increase the size of mpa_pkt by 1 and abort if the last 1738 * byte is filled by the soreceive above. 1739 */ 1740 1741 /* Don't even have the MPA message. Wait for more data to arrive. */ 1742 if (ep->mpa_pkt_len < sizeof(*mpa)) 1743 return; 1744 mpa = (struct mpa_message *) ep->mpa_pkt; 1745 1746 /* 1747 * Validate MPA Header. 1748 */ 1749 if (mpa->revision > mpa_rev) { 1750 log(LOG_ERR, "%s: MPA version mismatch. Local = %d," 1751 " Received = %d\n", __func__, mpa_rev, mpa->revision); 1752 goto abort; 1753 } 1754 1755 if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key))) 1756 goto abort; 1757 1758 /* 1759 * Fail if there's too much private data. 1760 */ 1761 plen = ntohs(mpa->private_data_size); 1762 if (plen > MPA_MAX_PRIVATE_DATA) 1763 goto abort; 1764 1765 /* 1766 * If plen does not account for pkt size 1767 */ 1768 if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) 1769 goto abort; 1770 1771 ep->plen = (u8) plen; 1772 1773 /* 1774 * If we don't have all the pdata yet, then bail. 1775 */ 1776 if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) 1777 return; 1778 1779 /* 1780 * If we get here we have accumulated the entire mpa 1781 * start reply message including private data. 1782 */ 1783 ep->mpa_attr.initiator = 0; 1784 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1785 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1786 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1787 ep->mpa_attr.version = mpa->revision; 1788 if (mpa->revision == 1) 1789 ep->tried_with_mpa_v1 = 1; 1790 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 1791 1792 if (mpa->revision == 2) { 1793 ep->mpa_attr.enhanced_rdma_conn = 1794 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; 1795 if (ep->mpa_attr.enhanced_rdma_conn) { 1796 struct mpa_v2_conn_params *mpa_v2_params; 1797 u16 ird, ord; 1798 1799 mpa_v2_params = (void *)&ep->mpa_pkt[sizeof(*mpa)]; 1800 ird = ntohs(mpa_v2_params->ird); 1801 ord = ntohs(mpa_v2_params->ord); 1802 1803 ep->ird = ird & MPA_V2_IRD_ORD_MASK; 1804 ep->ord = ord & MPA_V2_IRD_ORD_MASK; 1805 if (ird & MPA_V2_PEER2PEER_MODEL && peer2peer) { 1806 if (ord & MPA_V2_RDMA_WRITE_RTR) { 1807 ep->mpa_attr.p2p_type = 1808 FW_RI_INIT_P2PTYPE_RDMA_WRITE; 1809 } else if (ord & MPA_V2_RDMA_READ_RTR) { 1810 ep->mpa_attr.p2p_type = 1811 FW_RI_INIT_P2PTYPE_READ_REQ; 1812 } 1813 } 1814 } 1815 } else if (mpa->revision == 1 && peer2peer) 1816 ep->mpa_attr.p2p_type = p2p_type; 1817 1818 if (set_tcpinfo(ep)) 1819 goto abort; 1820 1821 CTR5(KTR_IW_CXGBE, "%s: crc_enabled = %d, recv_marker_enabled = %d, " 1822 "xmit_marker_enabled = %d, version = %d", __func__, 1823 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1824 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version); 1825 1826 state_set(&ep->com, MPA_REQ_RCVD); 1827 STOP_EP_TIMER(ep); 1828 1829 /* drive upcall */ 1830 mutex_lock(&ep->parent_ep->com.mutex); 1831 if (ep->parent_ep->com.state != DEAD) { 1832 if(connect_request_upcall(ep)) { 1833 abort_connection(ep); 1834 } 1835 }else 1836 abort_connection(ep); 1837 mutex_unlock(&ep->parent_ep->com.mutex); 1838 } 1839 1840 /* 1841 * Upcall from the adapter indicating data has been transmitted. 1842 * For us its just the single MPA request or reply. We can now free 1843 * the skb holding the mpa message. 1844 */ 1845 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len) 1846 { 1847 int err; 1848 struct c4iw_ep *ep = to_ep(cm_id); 1849 CTR2(KTR_IW_CXGBE, "%s:crcB %p", __func__, ep); 1850 1851 if (state_read(&ep->com) == DEAD) { 1852 1853 CTR2(KTR_IW_CXGBE, "%s:crc1 %p", __func__, ep); 1854 c4iw_put_ep(&ep->com); 1855 return -ECONNRESET; 1856 } 1857 set_bit(ULP_REJECT, &ep->com.history); 1858 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1859 1860 if (mpa_rev == 0) { 1861 1862 CTR2(KTR_IW_CXGBE, "%s:crc2 %p", __func__, ep); 1863 abort_connection(ep); 1864 } 1865 else { 1866 1867 CTR2(KTR_IW_CXGBE, "%s:crc3 %p", __func__, ep); 1868 err = send_mpa_reject(ep, pdata, pdata_len); 1869 err = soshutdown(ep->com.so, 3); 1870 } 1871 c4iw_put_ep(&ep->com); 1872 CTR2(KTR_IW_CXGBE, "%s:crc4 %p", __func__, ep); 1873 return 0; 1874 } 1875 1876 int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1877 { 1878 int err; 1879 struct c4iw_qp_attributes attrs; 1880 enum c4iw_qp_attr_mask mask; 1881 struct c4iw_ep *ep = to_ep(cm_id); 1882 struct c4iw_dev *h = to_c4iw_dev(cm_id->device); 1883 struct c4iw_qp *qp = get_qhp(h, conn_param->qpn); 1884 1885 CTR2(KTR_IW_CXGBE, "%s:cacB %p", __func__, ep); 1886 1887 if (state_read(&ep->com) == DEAD) { 1888 1889 CTR2(KTR_IW_CXGBE, "%s:cac1 %p", __func__, ep); 1890 err = -ECONNRESET; 1891 goto err; 1892 } 1893 1894 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1895 BUG_ON(!qp); 1896 1897 set_bit(ULP_ACCEPT, &ep->com.history); 1898 1899 if ((conn_param->ord > c4iw_max_read_depth) || 1900 (conn_param->ird > c4iw_max_read_depth)) { 1901 1902 CTR2(KTR_IW_CXGBE, "%s:cac2 %p", __func__, ep); 1903 abort_connection(ep); 1904 err = -EINVAL; 1905 goto err; 1906 } 1907 1908 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1909 1910 CTR2(KTR_IW_CXGBE, "%s:cac3 %p", __func__, ep); 1911 1912 if (conn_param->ord > ep->ird) { 1913 1914 CTR2(KTR_IW_CXGBE, "%s:cac4 %p", __func__, ep); 1915 ep->ird = conn_param->ird; 1916 ep->ord = conn_param->ord; 1917 send_mpa_reject(ep, conn_param->private_data, 1918 conn_param->private_data_len); 1919 abort_connection(ep); 1920 err = -ENOMEM; 1921 goto err; 1922 } 1923 1924 if (conn_param->ird > ep->ord) { 1925 1926 CTR2(KTR_IW_CXGBE, "%s:cac5 %p", __func__, ep); 1927 1928 if (!ep->ord) { 1929 1930 CTR2(KTR_IW_CXGBE, "%s:cac6 %p", __func__, ep); 1931 conn_param->ird = 1; 1932 } 1933 else { 1934 CTR2(KTR_IW_CXGBE, "%s:cac7 %p", __func__, ep); 1935 abort_connection(ep); 1936 err = -ENOMEM; 1937 goto err; 1938 } 1939 } 1940 1941 } 1942 ep->ird = conn_param->ird; 1943 ep->ord = conn_param->ord; 1944 1945 if (ep->mpa_attr.version != 2) { 1946 1947 CTR2(KTR_IW_CXGBE, "%s:cac8 %p", __func__, ep); 1948 1949 if (peer2peer && ep->ird == 0) { 1950 1951 CTR2(KTR_IW_CXGBE, "%s:cac9 %p", __func__, ep); 1952 ep->ird = 1; 1953 } 1954 } 1955 1956 1957 cm_id->add_ref(cm_id); 1958 ep->com.cm_id = cm_id; 1959 ep->com.qp = qp; 1960 //ep->ofld_txq = TOEPCB(ep->com.so)->ofld_txq; 1961 1962 /* bind QP to EP and move to RTS */ 1963 attrs.mpa_attr = ep->mpa_attr; 1964 attrs.max_ird = ep->ird; 1965 attrs.max_ord = ep->ord; 1966 attrs.llp_stream_handle = ep; 1967 attrs.next_state = C4IW_QP_STATE_RTS; 1968 1969 /* bind QP and TID with INIT_WR */ 1970 mask = C4IW_QP_ATTR_NEXT_STATE | 1971 C4IW_QP_ATTR_LLP_STREAM_HANDLE | 1972 C4IW_QP_ATTR_MPA_ATTR | 1973 C4IW_QP_ATTR_MAX_IRD | 1974 C4IW_QP_ATTR_MAX_ORD; 1975 1976 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp, mask, &attrs, 1); 1977 1978 if (err) { 1979 1980 CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); 1981 goto err1; 1982 } 1983 err = send_mpa_reply(ep, conn_param->private_data, 1984 conn_param->private_data_len); 1985 1986 if (err) { 1987 1988 CTR2(KTR_IW_CXGBE, "%s:caca %p", __func__, ep); 1989 goto err1; 1990 } 1991 1992 state_set(&ep->com, FPDU_MODE); 1993 established_upcall(ep); 1994 c4iw_put_ep(&ep->com); 1995 CTR2(KTR_IW_CXGBE, "%s:cacE %p", __func__, ep); 1996 return 0; 1997 err1: 1998 ep->com.cm_id = NULL; 1999 ep->com.qp = NULL; 2000 cm_id->rem_ref(cm_id); 2001 err: 2002 c4iw_put_ep(&ep->com); 2003 CTR2(KTR_IW_CXGBE, "%s:cacE err %p", __func__, ep); 2004 return err; 2005 } 2006 2007 2008 2009 int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 2010 { 2011 int err = 0; 2012 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 2013 struct c4iw_ep *ep = NULL; 2014 struct nhop4_extended nh4; 2015 struct toedev *tdev; 2016 2017 CTR2(KTR_IW_CXGBE, "%s:ccB %p", __func__, cm_id); 2018 2019 if ((conn_param->ord > c4iw_max_read_depth) || 2020 (conn_param->ird > c4iw_max_read_depth)) { 2021 2022 CTR2(KTR_IW_CXGBE, "%s:cc1 %p", __func__, cm_id); 2023 err = -EINVAL; 2024 goto out; 2025 } 2026 ep = alloc_ep(sizeof(*ep), M_NOWAIT); 2027 2028 if (!ep) { 2029 2030 CTR2(KTR_IW_CXGBE, "%s:cc2 %p", __func__, cm_id); 2031 printk(KERN_ERR MOD "%s - cannot alloc ep.\n", __func__); 2032 err = -ENOMEM; 2033 goto out; 2034 } 2035 init_timer(&ep->timer); 2036 ep->plen = conn_param->private_data_len; 2037 2038 if (ep->plen) { 2039 2040 CTR2(KTR_IW_CXGBE, "%s:cc3 %p", __func__, ep); 2041 memcpy(ep->mpa_pkt + sizeof(struct mpa_message), 2042 conn_param->private_data, ep->plen); 2043 } 2044 ep->ird = conn_param->ird; 2045 ep->ord = conn_param->ord; 2046 2047 if (peer2peer && ep->ord == 0) { 2048 2049 CTR2(KTR_IW_CXGBE, "%s:cc4 %p", __func__, ep); 2050 ep->ord = 1; 2051 } 2052 2053 cm_id->add_ref(cm_id); 2054 ep->com.dev = dev; 2055 ep->com.cm_id = cm_id; 2056 ep->com.qp = get_qhp(dev, conn_param->qpn); 2057 2058 if (!ep->com.qp) { 2059 2060 CTR2(KTR_IW_CXGBE, "%s:cc5 %p", __func__, ep); 2061 err = -EINVAL; 2062 goto fail2; 2063 } 2064 ep->com.thread = curthread; 2065 ep->com.so = cm_id->so; 2066 2067 init_sock(&ep->com); 2068 2069 /* find a route */ 2070 err = find_route( 2071 cm_id->local_addr.sin_addr.s_addr, 2072 cm_id->remote_addr.sin_addr.s_addr, 2073 cm_id->local_addr.sin_port, 2074 cm_id->remote_addr.sin_port, 0, &nh4); 2075 2076 if (err) { 2077 2078 CTR2(KTR_IW_CXGBE, "%s:cc7 %p", __func__, ep); 2079 printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); 2080 err = -EHOSTUNREACH; 2081 goto fail2; 2082 } 2083 2084 if (!(nh4.nh_ifp->if_capenable & IFCAP_TOE)) { 2085 2086 CTR2(KTR_IW_CXGBE, "%s:cc8 %p", __func__, ep); 2087 printf("%s - interface not TOE capable.\n", __func__); 2088 close_socket(&ep->com, 0); 2089 err = -ENOPROTOOPT; 2090 goto fail3; 2091 } 2092 tdev = TOEDEV(nh4.nh_ifp); 2093 2094 if (tdev == NULL) { 2095 2096 CTR2(KTR_IW_CXGBE, "%s:cc9 %p", __func__, ep); 2097 printf("%s - No toedev for interface.\n", __func__); 2098 goto fail3; 2099 } 2100 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 2101 2102 state_set(&ep->com, CONNECTING); 2103 ep->tos = 0; 2104 ep->com.local_addr = cm_id->local_addr; 2105 ep->com.remote_addr = cm_id->remote_addr; 2106 err = soconnect(ep->com.so, (struct sockaddr *)&ep->com.remote_addr, 2107 ep->com.thread); 2108 2109 if (!err) { 2110 CTR2(KTR_IW_CXGBE, "%s:cca %p", __func__, ep); 2111 goto out; 2112 } else { 2113 close_socket(&ep->com, 0); 2114 goto fail2; 2115 } 2116 2117 fail3: 2118 CTR2(KTR_IW_CXGBE, "%s:ccb %p", __func__, ep); 2119 fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); 2120 fail2: 2121 cm_id->rem_ref(cm_id); 2122 c4iw_put_ep(&ep->com); 2123 out: 2124 CTR2(KTR_IW_CXGBE, "%s:ccE %p", __func__, ep); 2125 return err; 2126 } 2127 2128 /* 2129 * iwcm->create_listen. Returns -errno on failure. 2130 */ 2131 int 2132 c4iw_create_listen(struct iw_cm_id *cm_id, int backlog) 2133 { 2134 int rc; 2135 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 2136 struct c4iw_listen_ep *ep; 2137 struct socket *so = cm_id->so; 2138 2139 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 2140 CTR5(KTR_IW_CXGBE, "%s: cm_id %p, lso %p, ep %p, inp %p", __func__, 2141 cm_id, so, ep, so->so_pcb); 2142 if (ep == NULL) { 2143 log(LOG_ERR, "%s: failed to alloc memory for endpoint\n", 2144 __func__); 2145 rc = ENOMEM; 2146 goto failed; 2147 } 2148 2149 cm_id->add_ref(cm_id); 2150 ep->com.cm_id = cm_id; 2151 ep->com.dev = dev; 2152 ep->backlog = backlog; 2153 ep->com.local_addr = cm_id->local_addr; 2154 ep->com.thread = curthread; 2155 state_set(&ep->com, LISTEN); 2156 ep->com.so = so; 2157 init_sock(&ep->com); 2158 2159 rc = solisten(so, ep->backlog, ep->com.thread); 2160 if (rc != 0) { 2161 log(LOG_ERR, "%s: failed to start listener: %d\n", __func__, 2162 rc); 2163 close_socket(&ep->com, 0); 2164 cm_id->rem_ref(cm_id); 2165 c4iw_put_ep(&ep->com); 2166 goto failed; 2167 } 2168 2169 cm_id->provider_data = ep; 2170 return (0); 2171 2172 failed: 2173 CTR3(KTR_IW_CXGBE, "%s: cm_id %p, FAILED (%d)", __func__, cm_id, rc); 2174 return (-rc); 2175 } 2176 2177 int 2178 c4iw_destroy_listen(struct iw_cm_id *cm_id) 2179 { 2180 int rc; 2181 struct c4iw_listen_ep *ep = to_listen_ep(cm_id); 2182 2183 CTR4(KTR_IW_CXGBE, "%s: cm_id %p, so %p, inp %p", __func__, cm_id, 2184 cm_id->so, cm_id->so->so_pcb); 2185 2186 state_set(&ep->com, DEAD); 2187 rc = close_socket(&ep->com, 0); 2188 cm_id->rem_ref(cm_id); 2189 c4iw_put_ep(&ep->com); 2190 2191 return (rc); 2192 } 2193 2194 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp) 2195 { 2196 int ret = 0; 2197 int close = 0; 2198 int fatal = 0; 2199 struct c4iw_rdev *rdev; 2200 2201 mutex_lock(&ep->com.mutex); 2202 2203 CTR2(KTR_IW_CXGBE, "%s:cedB %p", __func__, ep); 2204 2205 rdev = &ep->com.dev->rdev; 2206 2207 if (c4iw_fatal_error(rdev)) { 2208 2209 CTR2(KTR_IW_CXGBE, "%s:ced1 %p", __func__, ep); 2210 fatal = 1; 2211 close_complete_upcall(ep, -ECONNRESET); 2212 ep->com.state = DEAD; 2213 } 2214 CTR3(KTR_IW_CXGBE, "%s:ced2 %p %s", __func__, ep, 2215 states[ep->com.state]); 2216 2217 switch (ep->com.state) { 2218 2219 case MPA_REQ_WAIT: 2220 case MPA_REQ_SENT: 2221 case MPA_REQ_RCVD: 2222 case MPA_REP_SENT: 2223 case FPDU_MODE: 2224 close = 1; 2225 if (abrupt) 2226 ep->com.state = ABORTING; 2227 else { 2228 ep->com.state = CLOSING; 2229 START_EP_TIMER(ep); 2230 } 2231 set_bit(CLOSE_SENT, &ep->com.flags); 2232 break; 2233 2234 case CLOSING: 2235 2236 if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) { 2237 2238 close = 1; 2239 if (abrupt) { 2240 STOP_EP_TIMER(ep); 2241 ep->com.state = ABORTING; 2242 } else 2243 ep->com.state = MORIBUND; 2244 } 2245 break; 2246 2247 case MORIBUND: 2248 case ABORTING: 2249 case DEAD: 2250 CTR3(KTR_IW_CXGBE, 2251 "%s ignoring disconnect ep %p state %u", __func__, 2252 ep, ep->com.state); 2253 break; 2254 2255 default: 2256 BUG(); 2257 break; 2258 } 2259 2260 mutex_unlock(&ep->com.mutex); 2261 2262 if (close) { 2263 2264 CTR2(KTR_IW_CXGBE, "%s:ced3 %p", __func__, ep); 2265 2266 if (abrupt) { 2267 2268 CTR2(KTR_IW_CXGBE, "%s:ced4 %p", __func__, ep); 2269 set_bit(EP_DISC_ABORT, &ep->com.history); 2270 ret = abort_connection(ep); 2271 } else { 2272 2273 CTR2(KTR_IW_CXGBE, "%s:ced5 %p", __func__, ep); 2274 set_bit(EP_DISC_CLOSE, &ep->com.history); 2275 2276 if (!ep->parent_ep) 2277 __state_set(&ep->com, MORIBUND); 2278 ret = shutdown_socket(&ep->com); 2279 } 2280 2281 if (ret) { 2282 2283 fatal = 1; 2284 } 2285 } 2286 2287 if (fatal) { 2288 2289 release_ep_resources(ep); 2290 CTR2(KTR_IW_CXGBE, "%s:ced6 %p", __func__, ep); 2291 } 2292 CTR2(KTR_IW_CXGBE, "%s:cedE %p", __func__, ep); 2293 return ret; 2294 } 2295 2296 #ifdef C4IW_EP_REDIRECT 2297 int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, 2298 struct l2t_entry *l2t) 2299 { 2300 struct c4iw_ep *ep = ctx; 2301 2302 if (ep->dst != old) 2303 return 0; 2304 2305 PDBG("%s ep %p redirect to dst %p l2t %p\n", __func__, ep, new, 2306 l2t); 2307 dst_hold(new); 2308 cxgb4_l2t_release(ep->l2t); 2309 ep->l2t = l2t; 2310 dst_release(old); 2311 ep->dst = new; 2312 return 1; 2313 } 2314 #endif 2315 2316 2317 2318 static void ep_timeout(unsigned long arg) 2319 { 2320 struct c4iw_ep *ep = (struct c4iw_ep *)arg; 2321 int kickit = 0; 2322 2323 CTR2(KTR_IW_CXGBE, "%s:etB %p", __func__, ep); 2324 spin_lock(&timeout_lock); 2325 2326 if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { 2327 2328 list_add_tail(&ep->entry, &timeout_list); 2329 kickit = 1; 2330 } 2331 spin_unlock(&timeout_lock); 2332 2333 if (kickit) { 2334 2335 CTR2(KTR_IW_CXGBE, "%s:et1 %p", __func__, ep); 2336 queue_work(c4iw_taskq, &c4iw_task); 2337 } 2338 CTR2(KTR_IW_CXGBE, "%s:etE %p", __func__, ep); 2339 } 2340 2341 static int fw6_wr_rpl(struct adapter *sc, const __be64 *rpl) 2342 { 2343 uint64_t val = be64toh(*rpl); 2344 int ret; 2345 struct c4iw_wr_wait *wr_waitp; 2346 2347 ret = (int)((val >> 8) & 0xff); 2348 wr_waitp = (struct c4iw_wr_wait *)rpl[1]; 2349 CTR3(KTR_IW_CXGBE, "%s wr_waitp %p ret %u", __func__, wr_waitp, ret); 2350 if (wr_waitp) 2351 c4iw_wake_up(wr_waitp, ret ? -ret : 0); 2352 2353 return (0); 2354 } 2355 2356 static int fw6_cqe_handler(struct adapter *sc, const __be64 *rpl) 2357 { 2358 struct t4_cqe cqe =*(const struct t4_cqe *)(&rpl[0]); 2359 2360 CTR2(KTR_IW_CXGBE, "%s rpl %p", __func__, rpl); 2361 c4iw_ev_dispatch(sc->iwarp_softc, &cqe); 2362 2363 return (0); 2364 } 2365 2366 static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 2367 { 2368 2369 struct adapter *sc = iq->adapter; 2370 2371 const struct cpl_rdma_terminate *rpl = (const void *)(rss + 1); 2372 unsigned int tid = GET_TID(rpl); 2373 struct c4iw_qp_attributes attrs; 2374 struct toepcb *toep = lookup_tid(sc, tid); 2375 struct socket *so = inp_inpcbtosocket(toep->inp); 2376 struct c4iw_ep *ep = so->so_rcv.sb_upcallarg; 2377 2378 CTR2(KTR_IW_CXGBE, "%s:tB %p %d", __func__, ep); 2379 2380 if (ep && ep->com.qp) { 2381 2382 printk(KERN_WARNING MOD "TERM received tid %u qpid %u\n", tid, 2383 ep->com.qp->wq.sq.qid); 2384 attrs.next_state = C4IW_QP_STATE_TERMINATE; 2385 c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 2386 1); 2387 } else 2388 printk(KERN_WARNING MOD "TERM received tid %u no ep/qp\n", tid); 2389 CTR2(KTR_IW_CXGBE, "%s:tE %p %d", __func__, ep); 2390 2391 return 0; 2392 } 2393 2394 void 2395 c4iw_cm_init_cpl(struct adapter *sc) 2396 { 2397 2398 t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, terminate); 2399 t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, fw6_wr_rpl); 2400 t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, fw6_cqe_handler); 2401 t4_register_an_handler(sc, c4iw_ev_handler); 2402 } 2403 2404 void 2405 c4iw_cm_term_cpl(struct adapter *sc) 2406 { 2407 2408 t4_register_cpl_handler(sc, CPL_RDMA_TERMINATE, NULL); 2409 t4_register_fw_msg_handler(sc, FW6_TYPE_WR_RPL, NULL); 2410 t4_register_fw_msg_handler(sc, FW6_TYPE_CQE, NULL); 2411 } 2412 2413 int __init c4iw_cm_init(void) 2414 { 2415 2416 TAILQ_INIT(&req_list); 2417 spin_lock_init(&req_lock); 2418 INIT_LIST_HEAD(&timeout_list); 2419 spin_lock_init(&timeout_lock); 2420 2421 INIT_WORK(&c4iw_task, process_req); 2422 2423 c4iw_taskq = create_singlethread_workqueue("iw_cxgbe"); 2424 if (!c4iw_taskq) 2425 return -ENOMEM; 2426 2427 2428 return 0; 2429 } 2430 2431 void __exit c4iw_cm_term(void) 2432 { 2433 WARN_ON(!TAILQ_EMPTY(&req_list)); 2434 WARN_ON(!list_empty(&timeout_list)); 2435 flush_workqueue(c4iw_taskq); 2436 destroy_workqueue(c4iw_taskq); 2437 } 2438 #endif 2439