1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 33 #ifdef TCP_OFFLOAD 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/module.h> 39 #include <sys/protosw.h> 40 #include <sys/domain.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/sglist.h> 44 #include <netinet/in.h> 45 #include <netinet/in_pcb.h> 46 #include <netinet/ip.h> 47 #include <netinet/ip6.h> 48 #include <netinet/tcp_var.h> 49 #define TCPSTATES 50 #include <netinet/tcp_fsm.h> 51 #include <netinet/tcp_seq.h> 52 #include <netinet/toecore.h> 53 54 #include "common/common.h" 55 #include "common/t4_msg.h" 56 #include "common/t4_regs.h" 57 #include "common/t4_tcb.h" 58 #include "tom/t4_tom_l2t.h" 59 #include "tom/t4_tom.h" 60 61 VNET_DECLARE(int, tcp_do_autosndbuf); 62 #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 63 VNET_DECLARE(int, tcp_autosndbuf_inc); 64 #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 65 VNET_DECLARE(int, tcp_autosndbuf_max); 66 #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 67 VNET_DECLARE(int, tcp_do_autorcvbuf); 68 #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 69 VNET_DECLARE(int, tcp_autorcvbuf_inc); 70 #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 71 VNET_DECLARE(int, tcp_autorcvbuf_max); 72 #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 73 74 /* 75 * For ULP connections HW may add headers, e.g., for digests, that aren't part 76 * of the messages sent by the host but that are part of the TCP payload and 77 * therefore consume TCP sequence space. Tx connection parameters that 78 * operate in TCP sequence space are affected by the HW additions and need to 79 * compensate for them to accurately track TCP sequence numbers. This array 80 * contains the compensating extra lengths for ULP packets. It is indexed by 81 * a packet's ULP submode. 82 */ 83 const unsigned int t4_ulp_extra_len[] = {0, 4, 4, 8}; 84 85 /* 86 * Return the length of any HW additions that will be made to a Tx packet. 87 * Such additions can happen for some types of ULP packets. 88 */ 89 static inline unsigned int 90 ulp_extra_len(struct mbuf *m, int *ulp_mode) 91 { 92 struct m_tag *mtag; 93 94 if ((mtag = m_tag_find(m, CXGBE_ISCSI_MBUF_TAG, NULL)) == NULL) 95 return (0); 96 *ulp_mode = *((int *)(mtag + 1)); 97 98 return (t4_ulp_extra_len[*ulp_mode & 3]); 99 } 100 101 void 102 send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 103 { 104 struct wrqe *wr; 105 struct fw_flowc_wr *flowc; 106 unsigned int nparams = ftxp ? 8 : 6, flowclen; 107 struct port_info *pi = toep->port; 108 struct adapter *sc = pi->adapter; 109 unsigned int pfvf = G_FW_VIID_PFN(pi->viid) << S_FW_VIID_PFN; 110 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 111 112 KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 113 ("%s: flowc for tid %u sent already", __func__, toep->tid)); 114 115 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 116 117 wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 118 if (wr == NULL) { 119 /* XXX */ 120 panic("%s: allocation failure.", __func__); 121 } 122 flowc = wrtod(wr); 123 memset(flowc, 0, wr->wr_len); 124 125 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 126 V_FW_FLOWC_WR_NPARAMS(nparams)); 127 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 128 V_FW_WR_FLOWID(toep->tid)); 129 130 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 131 flowc->mnemval[0].val = htobe32(pfvf); 132 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 133 flowc->mnemval[1].val = htobe32(pi->tx_chan); 134 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 135 flowc->mnemval[2].val = htobe32(pi->tx_chan); 136 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 137 flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); 138 if (ftxp) { 139 uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 140 141 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 142 flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); 143 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 144 flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); 145 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 146 flowc->mnemval[6].val = htobe32(sndbuf); 147 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 148 flowc->mnemval[7].val = htobe32(ftxp->mss); 149 150 CTR6(KTR_CXGBE, 151 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 152 __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 153 ftxp->rcv_nxt); 154 } else { 155 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 156 flowc->mnemval[4].val = htobe32(512); 157 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 158 flowc->mnemval[5].val = htobe32(512); 159 160 CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 161 } 162 163 txsd->tx_credits = howmany(flowclen, 16); 164 txsd->plen = 0; 165 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 166 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 167 toep->tx_credits -= txsd->tx_credits; 168 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 169 toep->txsd_pidx = 0; 170 toep->txsd_avail--; 171 172 toep->flags |= TPF_FLOWC_WR_SENT; 173 t4_wrq_tx(sc, wr); 174 } 175 176 void 177 send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 178 { 179 struct wrqe *wr; 180 struct cpl_abort_req *req; 181 int tid = toep->tid; 182 struct inpcb *inp = toep->inp; 183 struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 184 185 INP_WLOCK_ASSERT(inp); 186 187 CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 188 __func__, toep->tid, 189 inp->inp_flags & INP_DROPPED ? "inp dropped" : 190 tcpstates[tp->t_state], 191 toep->flags, inp->inp_flags, 192 toep->flags & TPF_ABORT_SHUTDOWN ? 193 " (abort already in progress)" : ""); 194 195 if (toep->flags & TPF_ABORT_SHUTDOWN) 196 return; /* abort already in progress */ 197 198 toep->flags |= TPF_ABORT_SHUTDOWN; 199 200 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 201 ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 202 203 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 204 if (wr == NULL) { 205 /* XXX */ 206 panic("%s: allocation failure.", __func__); 207 } 208 req = wrtod(wr); 209 210 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 211 if (inp->inp_flags & INP_DROPPED) 212 req->rsvd0 = htobe32(snd_nxt); 213 else 214 req->rsvd0 = htobe32(tp->snd_nxt); 215 req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 216 req->cmd = CPL_ABORT_SEND_RST; 217 218 /* 219 * XXX: What's the correct way to tell that the inp hasn't been detached 220 * from its socket? Should I even be flushing the snd buffer here? 221 */ 222 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 223 struct socket *so = inp->inp_socket; 224 225 if (so != NULL) /* because I'm not sure. See comment above */ 226 sbflush(&so->so_snd); 227 } 228 229 t4_l2t_send(sc, wr, toep->l2te); 230 } 231 232 /* 233 * Called when a connection is established to translate the TCP options 234 * reported by HW to FreeBSD's native format. 235 */ 236 static void 237 assign_rxopt(struct tcpcb *tp, unsigned int opt) 238 { 239 struct toepcb *toep = tp->t_toe; 240 struct inpcb *inp = tp->t_inpcb; 241 struct adapter *sc = td_adapter(toep->td); 242 int n; 243 244 INP_LOCK_ASSERT(inp); 245 246 if (inp->inp_inc.inc_flags & INC_ISIPV6) 247 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 248 else 249 n = sizeof(struct ip) + sizeof(struct tcphdr); 250 tp->t_maxseg = tp->t_maxopd = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 251 252 CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, 253 G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); 254 255 if (G_TCPOPT_TSTAMP(opt)) { 256 tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 257 tp->ts_recent = 0; /* hmmm */ 258 tp->ts_recent_age = tcp_ts_getticks(); 259 tp->t_maxseg -= TCPOLEN_TSTAMP_APPA; 260 } 261 262 if (G_TCPOPT_SACK(opt)) 263 tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 264 else 265 tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 266 267 if (G_TCPOPT_WSCALE_OK(opt)) 268 tp->t_flags |= TF_RCVD_SCALE; 269 270 /* Doing window scaling? */ 271 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 272 (TF_RCVD_SCALE | TF_REQ_SCALE)) { 273 tp->rcv_scale = tp->request_r_scale; 274 tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 275 } 276 } 277 278 /* 279 * Completes some final bits of initialization for just established connections 280 * and changes their state to TCPS_ESTABLISHED. 281 * 282 * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 283 */ 284 void 285 make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 286 uint16_t opt) 287 { 288 struct inpcb *inp = toep->inp; 289 struct socket *so = inp->inp_socket; 290 struct tcpcb *tp = intotcpcb(inp); 291 long bufsize; 292 uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 293 uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 294 uint16_t tcpopt = be16toh(opt); 295 struct flowc_tx_params ftxp; 296 297 INP_WLOCK_ASSERT(inp); 298 KASSERT(tp->t_state == TCPS_SYN_SENT || 299 tp->t_state == TCPS_SYN_RECEIVED, 300 ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 301 302 CTR4(KTR_CXGBE, "%s: tid %d, toep %p, inp %p", 303 __func__, toep->tid, toep, inp); 304 305 tp->t_state = TCPS_ESTABLISHED; 306 tp->t_starttime = ticks; 307 TCPSTAT_INC(tcps_connects); 308 309 tp->irs = irs; 310 tcp_rcvseqinit(tp); 311 tp->rcv_wnd = toep->rx_credits << 10; 312 tp->rcv_adv += tp->rcv_wnd; 313 tp->last_ack_sent = tp->rcv_nxt; 314 315 /* 316 * If we were unable to send all rx credits via opt0, save the remainder 317 * in rx_credits so that they can be handed over with the next credit 318 * update. 319 */ 320 SOCKBUF_LOCK(&so->so_rcv); 321 bufsize = select_rcv_wnd(so); 322 SOCKBUF_UNLOCK(&so->so_rcv); 323 toep->rx_credits = bufsize - tp->rcv_wnd; 324 325 tp->iss = iss; 326 tcp_sendseqinit(tp); 327 tp->snd_una = iss + 1; 328 tp->snd_nxt = iss + 1; 329 tp->snd_max = iss + 1; 330 331 assign_rxopt(tp, tcpopt); 332 333 SOCKBUF_LOCK(&so->so_snd); 334 if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 335 bufsize = V_tcp_autosndbuf_max; 336 else 337 bufsize = sbspace(&so->so_snd); 338 SOCKBUF_UNLOCK(&so->so_snd); 339 340 ftxp.snd_nxt = tp->snd_nxt; 341 ftxp.rcv_nxt = tp->rcv_nxt; 342 ftxp.snd_space = bufsize; 343 ftxp.mss = tp->t_maxseg; 344 send_flowc_wr(toep, &ftxp); 345 346 soisconnected(so); 347 } 348 349 static int 350 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 351 { 352 struct wrqe *wr; 353 struct cpl_rx_data_ack *req; 354 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 355 356 KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 357 358 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 359 if (wr == NULL) 360 return (0); 361 req = wrtod(wr); 362 363 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 364 req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 365 366 t4_wrq_tx(sc, wr); 367 return (credits); 368 } 369 370 void 371 t4_rcvd(struct toedev *tod, struct tcpcb *tp) 372 { 373 struct adapter *sc = tod->tod_softc; 374 struct inpcb *inp = tp->t_inpcb; 375 struct socket *so = inp->inp_socket; 376 struct sockbuf *sb = &so->so_rcv; 377 struct toepcb *toep = tp->t_toe; 378 int credits; 379 380 INP_WLOCK_ASSERT(inp); 381 382 SOCKBUF_LOCK(sb); 383 KASSERT(toep->sb_cc >= sbused(sb), 384 ("%s: sb %p has more data (%d) than last time (%d).", 385 __func__, sb, sbused(sb), toep->sb_cc)); 386 if (toep->ulp_mode == ULP_MODE_ISCSI) { 387 toep->rx_credits += toep->sb_cc; 388 toep->sb_cc = 0; 389 } else { 390 toep->rx_credits += toep->sb_cc - sbused(sb); 391 toep->sb_cc = sbused(sb); 392 } 393 if (toep->rx_credits > 0 && 394 (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || 395 (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || 396 toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { 397 398 credits = send_rx_credits(sc, toep, toep->rx_credits); 399 toep->rx_credits -= credits; 400 tp->rcv_wnd += credits; 401 tp->rcv_adv += credits; 402 } 403 SOCKBUF_UNLOCK(sb); 404 } 405 406 /* 407 * Close a connection by sending a CPL_CLOSE_CON_REQ message. 408 */ 409 static int 410 close_conn(struct adapter *sc, struct toepcb *toep) 411 { 412 struct wrqe *wr; 413 struct cpl_close_con_req *req; 414 unsigned int tid = toep->tid; 415 416 CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 417 toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 418 419 if (toep->flags & TPF_FIN_SENT) 420 return (0); 421 422 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 423 ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 424 425 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 426 if (wr == NULL) { 427 /* XXX */ 428 panic("%s: allocation failure.", __func__); 429 } 430 req = wrtod(wr); 431 432 req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 433 V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 434 req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 435 V_FW_WR_FLOWID(tid)); 436 req->wr.wr_lo = cpu_to_be64(0); 437 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 438 req->rsvd = 0; 439 440 toep->flags |= TPF_FIN_SENT; 441 toep->flags &= ~TPF_SEND_FIN; 442 t4_l2t_send(sc, wr, toep->l2te); 443 444 return (0); 445 } 446 447 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 448 #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 449 450 /* Maximum amount of immediate data we could stuff in a WR */ 451 static inline int 452 max_imm_payload(int tx_credits) 453 { 454 const int n = 2; /* Use only up to 2 desc for imm. data WR */ 455 456 KASSERT(tx_credits >= 0 && 457 tx_credits <= MAX_OFLD_TX_CREDITS, 458 ("%s: %d credits", __func__, tx_credits)); 459 460 if (tx_credits < MIN_OFLD_TX_CREDITS) 461 return (0); 462 463 if (tx_credits >= (n * EQ_ESIZE) / 16) 464 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 465 else 466 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 467 } 468 469 /* Maximum number of SGL entries we could stuff in a WR */ 470 static inline int 471 max_dsgl_nsegs(int tx_credits) 472 { 473 int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 474 int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 475 476 KASSERT(tx_credits >= 0 && 477 tx_credits <= MAX_OFLD_TX_CREDITS, 478 ("%s: %d credits", __func__, tx_credits)); 479 480 if (tx_credits < MIN_OFLD_TX_CREDITS) 481 return (0); 482 483 nseg += 2 * (sge_pair_credits * 16 / 24); 484 if ((sge_pair_credits * 16) % 24 == 16) 485 nseg++; 486 487 return (nseg); 488 } 489 490 static inline void 491 write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 492 unsigned int plen, uint8_t credits, int shove, int ulp_mode, int txalign) 493 { 494 struct fw_ofld_tx_data_wr *txwr = dst; 495 unsigned int wr_ulp_mode; 496 497 txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 498 V_FW_WR_IMMDLEN(immdlen)); 499 txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 500 V_FW_WR_LEN16(credits)); 501 502 /* for iscsi, the mode & submode setting is per-packet */ 503 if (toep->ulp_mode == ULP_MODE_ISCSI) 504 wr_ulp_mode = V_TX_ULP_MODE(ulp_mode >> 4) | 505 V_TX_ULP_SUBMODE(ulp_mode & 3); 506 else 507 wr_ulp_mode = V_TX_ULP_MODE(toep->ulp_mode); 508 509 txwr->lsodisable_to_flags = htobe32(wr_ulp_mode | V_TX_URG(0) | /*XXX*/ 510 V_TX_SHOVE(shove)); 511 txwr->plen = htobe32(plen); 512 513 if (txalign > 0) { 514 struct tcpcb *tp = intotcpcb(toep->inp); 515 516 if (plen < 2 * tp->t_maxseg || is_10G_port(toep->port)) 517 txwr->lsodisable_to_flags |= 518 htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 519 else 520 txwr->lsodisable_to_flags |= 521 htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 522 (tp->t_flags & TF_NODELAY ? 0 : 523 F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 524 } 525 } 526 527 /* 528 * Generate a DSGL from a starting mbuf. The total number of segments and the 529 * maximum segments in any one mbuf are provided. 530 */ 531 static void 532 write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 533 { 534 struct mbuf *m; 535 struct ulptx_sgl *usgl = dst; 536 int i, j, rc; 537 struct sglist sg; 538 struct sglist_seg segs[n]; 539 540 KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 541 542 sglist_init(&sg, n, segs); 543 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 544 V_ULPTX_NSGE(nsegs)); 545 546 i = -1; 547 for (m = start; m != stop; m = m->m_next) { 548 rc = sglist_append(&sg, mtod(m, void *), m->m_len); 549 if (__predict_false(rc != 0)) 550 panic("%s: sglist_append %d", __func__, rc); 551 552 for (j = 0; j < sg.sg_nseg; i++, j++) { 553 if (i < 0) { 554 usgl->len0 = htobe32(segs[j].ss_len); 555 usgl->addr0 = htobe64(segs[j].ss_paddr); 556 } else { 557 usgl->sge[i / 2].len[i & 1] = 558 htobe32(segs[j].ss_len); 559 usgl->sge[i / 2].addr[i & 1] = 560 htobe64(segs[j].ss_paddr); 561 } 562 #ifdef INVARIANTS 563 nsegs--; 564 #endif 565 } 566 sglist_reset(&sg); 567 } 568 if (i & 1) 569 usgl->sge[i / 2].len[1] = htobe32(0); 570 KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 571 __func__, nsegs, start, stop)); 572 } 573 574 /* 575 * Max number of SGL entries an offload tx work request can have. This is 41 576 * (1 + 40) for a full 512B work request. 577 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 578 */ 579 #define OFLD_SGL_LEN (41) 580 581 /* 582 * Send data and/or a FIN to the peer. 583 * 584 * The socket's so_snd buffer consists of a stream of data starting with sb_mb 585 * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 586 * was transmitted. 587 * 588 * drop indicates the number of bytes that should be dropped from the head of 589 * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 590 * contention on the send buffer lock (before this change it used to do 591 * sowwakeup and then t4_push_frames right after that when recovering from tx 592 * stalls). When drop is set this function MUST drop the bytes and wake up any 593 * writers. 594 */ 595 void 596 t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 597 { 598 struct mbuf *sndptr, *m, *sb_sndptr; 599 struct fw_ofld_tx_data_wr *txwr; 600 struct wrqe *wr; 601 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 602 struct inpcb *inp = toep->inp; 603 struct tcpcb *tp = intotcpcb(inp); 604 struct socket *so = inp->inp_socket; 605 struct sockbuf *sb = &so->so_snd; 606 int tx_credits, shove, compl, space, sowwakeup; 607 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 608 609 INP_WLOCK_ASSERT(inp); 610 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 611 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 612 613 KASSERT(toep->ulp_mode == ULP_MODE_NONE || 614 toep->ulp_mode == ULP_MODE_TCPDDP || 615 toep->ulp_mode == ULP_MODE_RDMA, 616 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 617 618 /* 619 * This function doesn't resume by itself. Someone else must clear the 620 * flag and call this function. 621 */ 622 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 623 KASSERT(drop == 0, 624 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 625 return; 626 } 627 628 do { 629 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 630 max_imm = max_imm_payload(tx_credits); 631 max_nsegs = max_dsgl_nsegs(tx_credits); 632 633 SOCKBUF_LOCK(sb); 634 sowwakeup = drop; 635 if (drop) { 636 sbdrop_locked(sb, drop); 637 drop = 0; 638 } 639 sb_sndptr = sb->sb_sndptr; 640 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 641 plen = 0; 642 nsegs = 0; 643 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 644 for (m = sndptr; m != NULL; m = m->m_next) { 645 int n = sglist_count(mtod(m, void *), m->m_len); 646 647 nsegs += n; 648 plen += m->m_len; 649 650 /* This mbuf sent us _over_ the nsegs limit, back out */ 651 if (plen > max_imm && nsegs > max_nsegs) { 652 nsegs -= n; 653 plen -= m->m_len; 654 if (plen == 0) { 655 /* Too few credits */ 656 toep->flags |= TPF_TX_SUSPENDED; 657 if (sowwakeup) 658 sowwakeup_locked(so); 659 else 660 SOCKBUF_UNLOCK(sb); 661 SOCKBUF_UNLOCK_ASSERT(sb); 662 return; 663 } 664 break; 665 } 666 667 if (max_nsegs_1mbuf < n) 668 max_nsegs_1mbuf = n; 669 sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 670 671 /* This mbuf put us right at the max_nsegs limit */ 672 if (plen > max_imm && nsegs == max_nsegs) { 673 m = m->m_next; 674 break; 675 } 676 } 677 678 space = sbspace(sb); 679 680 if (space <= sb->sb_hiwat * 3 / 8 && 681 toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 682 compl = 1; 683 else 684 compl = 0; 685 686 if (sb->sb_flags & SB_AUTOSIZE && 687 V_tcp_do_autosndbuf && 688 sb->sb_hiwat < V_tcp_autosndbuf_max && 689 space < sb->sb_hiwat / 8) { 690 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 691 V_tcp_autosndbuf_max); 692 693 if (!sbreserve_locked(sb, newsize, so, NULL)) 694 sb->sb_flags &= ~SB_AUTOSIZE; 695 else 696 sowwakeup = 1; /* room available */ 697 } 698 if (sowwakeup) 699 sowwakeup_locked(so); 700 else 701 SOCKBUF_UNLOCK(sb); 702 SOCKBUF_UNLOCK_ASSERT(sb); 703 704 /* nothing to send */ 705 if (plen == 0) { 706 KASSERT(m == NULL, 707 ("%s: nothing to send, but m != NULL", __func__)); 708 break; 709 } 710 711 if (__predict_false(toep->flags & TPF_FIN_SENT)) 712 panic("%s: excess tx.", __func__); 713 714 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 715 if (plen <= max_imm) { 716 717 /* Immediate data tx */ 718 719 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 720 toep->ofld_txq); 721 if (wr == NULL) { 722 /* XXX: how will we recover from this? */ 723 toep->flags |= TPF_TX_SUSPENDED; 724 return; 725 } 726 txwr = wrtod(wr); 727 credits = howmany(wr->wr_len, 16); 728 write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 729 sc->tt.tx_align); 730 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 731 nsegs = 0; 732 } else { 733 int wr_len; 734 735 /* DSGL tx */ 736 737 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 738 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 739 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 740 if (wr == NULL) { 741 /* XXX: how will we recover from this? */ 742 toep->flags |= TPF_TX_SUSPENDED; 743 return; 744 } 745 txwr = wrtod(wr); 746 credits = howmany(wr_len, 16); 747 write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 748 sc->tt.tx_align); 749 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 750 max_nsegs_1mbuf); 751 if (wr_len & 0xf) { 752 uint64_t *pad = (uint64_t *) 753 ((uintptr_t)txwr + wr_len); 754 *pad = 0; 755 } 756 } 757 758 KASSERT(toep->tx_credits >= credits, 759 ("%s: not enough credits", __func__)); 760 761 toep->tx_credits -= credits; 762 toep->tx_nocompl += credits; 763 toep->plen_nocompl += plen; 764 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 765 toep->tx_nocompl >= toep->tx_total / 4) 766 compl = 1; 767 768 if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 769 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 770 toep->tx_nocompl = 0; 771 toep->plen_nocompl = 0; 772 } 773 774 tp->snd_nxt += plen; 775 tp->snd_max += plen; 776 777 SOCKBUF_LOCK(sb); 778 KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 779 sb->sb_sndptr = sb_sndptr; 780 SOCKBUF_UNLOCK(sb); 781 782 toep->flags |= TPF_TX_DATA_SENT; 783 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 784 toep->flags |= TPF_TX_SUSPENDED; 785 786 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 787 txsd->plen = plen; 788 txsd->tx_credits = credits; 789 txsd++; 790 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 791 toep->txsd_pidx = 0; 792 txsd = &toep->txsd[0]; 793 } 794 toep->txsd_avail--; 795 796 t4_l2t_send(sc, wr, toep->l2te); 797 } while (m != NULL); 798 799 /* Send a FIN if requested, but only if there's no more data to send */ 800 if (m == NULL && toep->flags & TPF_SEND_FIN) 801 close_conn(sc, toep); 802 } 803 804 /* Send ULP data over TOE using TX_DATA_WR. We send whole mbuf at once */ 805 void 806 t4_ulp_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 807 { 808 struct mbuf *sndptr, *m = NULL; 809 struct fw_ofld_tx_data_wr *txwr; 810 struct wrqe *wr; 811 unsigned int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 812 struct inpcb *inp = toep->inp; 813 struct tcpcb *tp; 814 struct socket *so; 815 struct sockbuf *sb; 816 int tx_credits, ulp_len = 0, ulp_mode = 0, qlen = 0; 817 int shove, compl; 818 struct ofld_tx_sdesc *txsd; 819 820 INP_WLOCK_ASSERT(inp); 821 if (toep->flags & TPF_ABORT_SHUTDOWN) 822 return; 823 824 tp = intotcpcb(inp); 825 so = inp->inp_socket; 826 sb = &so->so_snd; 827 txsd = &toep->txsd[toep->txsd_pidx]; 828 829 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 830 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 831 832 /* 833 * This function doesn't resume by itself. Someone else must clear the 834 * flag and call this function. 835 */ 836 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) 837 return; 838 839 sndptr = t4_queue_iscsi_callback(so, toep, 1, &qlen); 840 if (!qlen) 841 return; 842 843 do { 844 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 845 max_imm = max_imm_payload(tx_credits); 846 max_nsegs = max_dsgl_nsegs(tx_credits); 847 848 if (drop) { 849 t4_cpl_iscsi_callback(toep->td, toep, &drop, 850 CPL_FW4_ACK); 851 drop = 0; 852 } 853 854 plen = 0; 855 nsegs = 0; 856 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 857 for (m = sndptr; m != NULL; m = m->m_next) { 858 int n = sglist_count(mtod(m, void *), m->m_len); 859 860 nsegs += n; 861 plen += m->m_len; 862 863 /* This mbuf sent us _over_ the nsegs limit, return */ 864 if (plen > max_imm && nsegs > max_nsegs) { 865 toep->flags |= TPF_TX_SUSPENDED; 866 return; 867 } 868 869 if (max_nsegs_1mbuf < n) 870 max_nsegs_1mbuf = n; 871 872 /* This mbuf put us right at the max_nsegs limit */ 873 if (plen > max_imm && nsegs == max_nsegs) { 874 toep->flags |= TPF_TX_SUSPENDED; 875 return; 876 } 877 } 878 879 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 880 /* nothing to send */ 881 if (plen == 0) { 882 KASSERT(m == NULL, 883 ("%s: nothing to send, but m != NULL", __func__)); 884 break; 885 } 886 887 if (__predict_false(toep->flags & TPF_FIN_SENT)) 888 panic("%s: excess tx.", __func__); 889 890 ulp_len = plen + ulp_extra_len(sndptr, &ulp_mode); 891 if (plen <= max_imm) { 892 893 /* Immediate data tx */ 894 wr = alloc_wrqe(roundup(sizeof(*txwr) + plen, 16), 895 toep->ofld_txq); 896 if (wr == NULL) { 897 /* XXX: how will we recover from this? */ 898 toep->flags |= TPF_TX_SUSPENDED; 899 return; 900 } 901 txwr = wrtod(wr); 902 credits = howmany(wr->wr_len, 16); 903 write_tx_wr(txwr, toep, plen, ulp_len, credits, shove, 904 ulp_mode, 0); 905 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 906 } else { 907 int wr_len; 908 909 /* DSGL tx */ 910 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 911 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 912 wr = alloc_wrqe(roundup(wr_len, 16), toep->ofld_txq); 913 if (wr == NULL) { 914 /* XXX: how will we recover from this? */ 915 toep->flags |= TPF_TX_SUSPENDED; 916 return; 917 } 918 txwr = wrtod(wr); 919 credits = howmany(wr_len, 16); 920 write_tx_wr(txwr, toep, 0, ulp_len, credits, shove, 921 ulp_mode, 0); 922 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 923 max_nsegs_1mbuf); 924 if (wr_len & 0xf) { 925 uint64_t *pad = (uint64_t *) 926 ((uintptr_t)txwr + wr_len); 927 *pad = 0; 928 } 929 } 930 931 KASSERT(toep->tx_credits >= credits, 932 ("%s: not enough credits", __func__)); 933 934 toep->tx_credits -= credits; 935 toep->tx_nocompl += credits; 936 toep->plen_nocompl += plen; 937 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 938 toep->tx_nocompl >= toep->tx_total / 4) 939 compl = 1; 940 941 if (compl) { 942 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 943 toep->tx_nocompl = 0; 944 toep->plen_nocompl = 0; 945 } 946 tp->snd_nxt += ulp_len; 947 tp->snd_max += ulp_len; 948 949 /* goto next mbuf */ 950 sndptr = m = t4_queue_iscsi_callback(so, toep, 2, &qlen); 951 952 toep->flags |= TPF_TX_DATA_SENT; 953 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) { 954 toep->flags |= TPF_TX_SUSPENDED; 955 } 956 957 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 958 txsd->plen = plen; 959 txsd->tx_credits = credits; 960 txsd++; 961 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 962 toep->txsd_pidx = 0; 963 txsd = &toep->txsd[0]; 964 } 965 toep->txsd_avail--; 966 967 t4_l2t_send(sc, wr, toep->l2te); 968 } while (m != NULL); 969 970 /* Send a FIN if requested, but only if there's no more data to send */ 971 if (m == NULL && toep->flags & TPF_SEND_FIN) 972 close_conn(sc, toep); 973 } 974 975 int 976 t4_tod_output(struct toedev *tod, struct tcpcb *tp) 977 { 978 struct adapter *sc = tod->tod_softc; 979 #ifdef INVARIANTS 980 struct inpcb *inp = tp->t_inpcb; 981 #endif 982 struct toepcb *toep = tp->t_toe; 983 984 INP_WLOCK_ASSERT(inp); 985 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 986 ("%s: inp %p dropped.", __func__, inp)); 987 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 988 989 t4_push_frames(sc, toep, 0); 990 991 return (0); 992 } 993 994 int 995 t4_send_fin(struct toedev *tod, struct tcpcb *tp) 996 { 997 struct adapter *sc = tod->tod_softc; 998 #ifdef INVARIANTS 999 struct inpcb *inp = tp->t_inpcb; 1000 #endif 1001 struct toepcb *toep = tp->t_toe; 1002 1003 INP_WLOCK_ASSERT(inp); 1004 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1005 ("%s: inp %p dropped.", __func__, inp)); 1006 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1007 1008 toep->flags |= TPF_SEND_FIN; 1009 if (tp->t_state >= TCPS_ESTABLISHED) { 1010 if (toep->ulp_mode == ULP_MODE_ISCSI) 1011 t4_ulp_push_frames(sc, toep, 0); 1012 else 1013 t4_push_frames(sc, toep, 0); 1014 } 1015 1016 return (0); 1017 } 1018 1019 int 1020 t4_send_rst(struct toedev *tod, struct tcpcb *tp) 1021 { 1022 struct adapter *sc = tod->tod_softc; 1023 #if defined(INVARIANTS) 1024 struct inpcb *inp = tp->t_inpcb; 1025 #endif 1026 struct toepcb *toep = tp->t_toe; 1027 1028 INP_WLOCK_ASSERT(inp); 1029 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1030 ("%s: inp %p dropped.", __func__, inp)); 1031 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1032 1033 /* hmmmm */ 1034 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1035 ("%s: flowc for tid %u [%s] not sent already", 1036 __func__, toep->tid, tcpstates[tp->t_state])); 1037 1038 send_reset(sc, toep, 0); 1039 return (0); 1040 } 1041 1042 /* 1043 * Peer has sent us a FIN. 1044 */ 1045 static int 1046 do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1047 { 1048 struct adapter *sc = iq->adapter; 1049 const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1050 unsigned int tid = GET_TID(cpl); 1051 struct toepcb *toep = lookup_tid(sc, tid); 1052 struct inpcb *inp = toep->inp; 1053 struct tcpcb *tp = NULL; 1054 struct socket *so; 1055 struct sockbuf *sb; 1056 #ifdef INVARIANTS 1057 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1058 #endif 1059 1060 KASSERT(opcode == CPL_PEER_CLOSE, 1061 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1062 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1063 1064 if (__predict_false(toep->flags & TPF_SYNQE)) { 1065 #ifdef INVARIANTS 1066 struct synq_entry *synqe = (void *)toep; 1067 1068 INP_WLOCK(synqe->lctx->inp); 1069 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1070 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1071 ("%s: listen socket closed but tid %u not aborted.", 1072 __func__, tid)); 1073 } else { 1074 /* 1075 * do_pass_accept_req is still running and will 1076 * eventually take care of this tid. 1077 */ 1078 } 1079 INP_WUNLOCK(synqe->lctx->inp); 1080 #endif 1081 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1082 toep, toep->flags); 1083 return (0); 1084 } 1085 1086 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1087 1088 INP_INFO_RLOCK(&V_tcbinfo); 1089 INP_WLOCK(inp); 1090 tp = intotcpcb(inp); 1091 1092 CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1093 tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1094 1095 if (toep->flags & TPF_ABORT_SHUTDOWN) 1096 goto done; 1097 1098 tp->rcv_nxt++; /* FIN */ 1099 1100 so = inp->inp_socket; 1101 sb = &so->so_rcv; 1102 SOCKBUF_LOCK(sb); 1103 if (__predict_false(toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) { 1104 handle_ddp_close(toep, tp, sb, cpl->rcv_nxt); 1105 } 1106 socantrcvmore_locked(so); /* unlocks the sockbuf */ 1107 1108 if (toep->ulp_mode != ULP_MODE_RDMA) { 1109 KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1110 ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1111 be32toh(cpl->rcv_nxt))); 1112 } 1113 1114 switch (tp->t_state) { 1115 case TCPS_SYN_RECEIVED: 1116 tp->t_starttime = ticks; 1117 /* FALLTHROUGH */ 1118 1119 case TCPS_ESTABLISHED: 1120 tp->t_state = TCPS_CLOSE_WAIT; 1121 break; 1122 1123 case TCPS_FIN_WAIT_1: 1124 tp->t_state = TCPS_CLOSING; 1125 break; 1126 1127 case TCPS_FIN_WAIT_2: 1128 tcp_twstart(tp); 1129 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1130 INP_INFO_RUNLOCK(&V_tcbinfo); 1131 1132 INP_WLOCK(inp); 1133 final_cpl_received(toep); 1134 return (0); 1135 1136 default: 1137 log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1138 __func__, tid, tp->t_state); 1139 } 1140 done: 1141 INP_WUNLOCK(inp); 1142 INP_INFO_RUNLOCK(&V_tcbinfo); 1143 return (0); 1144 } 1145 1146 /* 1147 * Peer has ACK'd our FIN. 1148 */ 1149 static int 1150 do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1151 struct mbuf *m) 1152 { 1153 struct adapter *sc = iq->adapter; 1154 const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1155 unsigned int tid = GET_TID(cpl); 1156 struct toepcb *toep = lookup_tid(sc, tid); 1157 struct inpcb *inp = toep->inp; 1158 struct tcpcb *tp = NULL; 1159 struct socket *so = NULL; 1160 #ifdef INVARIANTS 1161 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1162 #endif 1163 1164 KASSERT(opcode == CPL_CLOSE_CON_RPL, 1165 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1166 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1167 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1168 1169 INP_INFO_RLOCK(&V_tcbinfo); 1170 INP_WLOCK(inp); 1171 tp = intotcpcb(inp); 1172 1173 CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1174 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1175 1176 if (toep->flags & TPF_ABORT_SHUTDOWN) 1177 goto done; 1178 1179 so = inp->inp_socket; 1180 tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1181 1182 switch (tp->t_state) { 1183 case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1184 tcp_twstart(tp); 1185 release: 1186 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1187 INP_INFO_RUNLOCK(&V_tcbinfo); 1188 1189 INP_WLOCK(inp); 1190 final_cpl_received(toep); /* no more CPLs expected */ 1191 1192 return (0); 1193 case TCPS_LAST_ACK: 1194 if (tcp_close(tp)) 1195 INP_WUNLOCK(inp); 1196 goto release; 1197 1198 case TCPS_FIN_WAIT_1: 1199 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1200 soisdisconnected(so); 1201 tp->t_state = TCPS_FIN_WAIT_2; 1202 break; 1203 1204 default: 1205 log(LOG_ERR, 1206 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1207 __func__, tid, tcpstates[tp->t_state]); 1208 } 1209 done: 1210 INP_WUNLOCK(inp); 1211 INP_INFO_RUNLOCK(&V_tcbinfo); 1212 return (0); 1213 } 1214 1215 void 1216 send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1217 int rst_status) 1218 { 1219 struct wrqe *wr; 1220 struct cpl_abort_rpl *cpl; 1221 1222 wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1223 if (wr == NULL) { 1224 /* XXX */ 1225 panic("%s: allocation failure.", __func__); 1226 } 1227 cpl = wrtod(wr); 1228 1229 INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1230 cpl->cmd = rst_status; 1231 1232 t4_wrq_tx(sc, wr); 1233 } 1234 1235 static int 1236 abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1237 { 1238 switch (abort_reason) { 1239 case CPL_ERR_BAD_SYN: 1240 case CPL_ERR_CONN_RESET: 1241 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1242 case CPL_ERR_XMIT_TIMEDOUT: 1243 case CPL_ERR_PERSIST_TIMEDOUT: 1244 case CPL_ERR_FINWAIT2_TIMEDOUT: 1245 case CPL_ERR_KEEPALIVE_TIMEDOUT: 1246 return (ETIMEDOUT); 1247 default: 1248 return (EIO); 1249 } 1250 } 1251 1252 int 1253 cpl_not_handled(struct sge_iq *, const struct rss_header *, struct mbuf *); 1254 /* 1255 * tom_cpl_iscsi_callback - 1256 * iscsi and tom would share the following cpl messages, so when any of these 1257 * message is received, after tom is done with processing it, the messages 1258 * needs to be forwarded to iscsi for further processing: 1259 * - CPL_SET_TCB_RPL 1260 * - CPL_RX_DATA_DDP 1261 */ 1262 void (*tom_cpl_iscsi_callback)(struct tom_data *, struct socket *, void *, 1263 unsigned int); 1264 1265 struct mbuf *(*tom_queue_iscsi_callback)(struct socket *, unsigned int, int *); 1266 /* 1267 * Check if the handler function is set for a given CPL 1268 * return 0 if the function is NULL or cpl_not_handled, 1 otherwise. 1269 */ 1270 int 1271 t4tom_cpl_handler_registered(struct adapter *sc, unsigned int opcode) 1272 { 1273 1274 MPASS(opcode < nitems(sc->cpl_handler)); 1275 1276 return (sc->cpl_handler[opcode] && 1277 sc->cpl_handler[opcode] != cpl_not_handled); 1278 } 1279 1280 /* 1281 * set the tom_cpl_iscsi_callback function, this function should be used 1282 * whenever both toe and iscsi need to process the same cpl msg. 1283 */ 1284 void 1285 t4tom_register_cpl_iscsi_callback(void (*fp)(struct tom_data *, struct socket *, 1286 void *, unsigned int)) 1287 { 1288 1289 tom_cpl_iscsi_callback = fp; 1290 } 1291 1292 void 1293 t4tom_register_queue_iscsi_callback(struct mbuf *(*fp)(struct socket *, 1294 unsigned int, int *qlen)) 1295 { 1296 1297 tom_queue_iscsi_callback = fp; 1298 } 1299 1300 int 1301 t4_cpl_iscsi_callback(struct tom_data *td, struct toepcb *toep, void *m, 1302 unsigned int opcode) 1303 { 1304 struct socket *so; 1305 1306 if (opcode == CPL_FW4_ACK) 1307 so = toep->inp->inp_socket; 1308 else { 1309 INP_WLOCK(toep->inp); 1310 so = toep->inp->inp_socket; 1311 INP_WUNLOCK(toep->inp); 1312 } 1313 1314 if (tom_cpl_iscsi_callback && so) { 1315 if (toep->ulp_mode == ULP_MODE_ISCSI) { 1316 tom_cpl_iscsi_callback(td, so, m, opcode); 1317 return (0); 1318 } 1319 } 1320 1321 return (1); 1322 } 1323 1324 struct mbuf * 1325 t4_queue_iscsi_callback(struct socket *so, struct toepcb *toep, 1326 unsigned int cmd, int *qlen) 1327 { 1328 1329 if (tom_queue_iscsi_callback && so) { 1330 if (toep->ulp_mode == ULP_MODE_ISCSI) 1331 return (tom_queue_iscsi_callback(so, cmd, qlen)); 1332 } 1333 1334 return (NULL); 1335 } 1336 1337 /* 1338 * TCP RST from the peer, timeout, or some other such critical error. 1339 */ 1340 static int 1341 do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1342 { 1343 struct adapter *sc = iq->adapter; 1344 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1345 unsigned int tid = GET_TID(cpl); 1346 struct toepcb *toep = lookup_tid(sc, tid); 1347 struct sge_wrq *ofld_txq = toep->ofld_txq; 1348 struct inpcb *inp; 1349 struct tcpcb *tp; 1350 #ifdef INVARIANTS 1351 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1352 #endif 1353 1354 KASSERT(opcode == CPL_ABORT_REQ_RSS, 1355 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1356 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1357 1358 if (toep->flags & TPF_SYNQE) 1359 return (do_abort_req_synqe(iq, rss, m)); 1360 1361 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1362 1363 if (negative_advice(cpl->status)) { 1364 CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1365 __func__, cpl->status, tid, toep->flags); 1366 return (0); /* Ignore negative advice */ 1367 } 1368 1369 inp = toep->inp; 1370 INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ 1371 INP_WLOCK(inp); 1372 1373 tp = intotcpcb(inp); 1374 1375 CTR6(KTR_CXGBE, 1376 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1377 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1378 inp->inp_flags, cpl->status); 1379 1380 /* 1381 * If we'd initiated an abort earlier the reply to it is responsible for 1382 * cleaning up resources. Otherwise we tear everything down right here 1383 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1384 */ 1385 if (toep->flags & TPF_ABORT_SHUTDOWN) { 1386 INP_WUNLOCK(inp); 1387 goto done; 1388 } 1389 toep->flags |= TPF_ABORT_SHUTDOWN; 1390 1391 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1392 struct socket *so = inp->inp_socket; 1393 1394 if (so != NULL) 1395 so_error_set(so, abort_status_to_errno(tp, 1396 cpl->status)); 1397 tp = tcp_close(tp); 1398 if (tp == NULL) 1399 INP_WLOCK(inp); /* re-acquire */ 1400 } 1401 1402 final_cpl_received(toep); 1403 done: 1404 INP_INFO_RUNLOCK(&V_tcbinfo); 1405 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1406 return (0); 1407 } 1408 1409 /* 1410 * Reply to the CPL_ABORT_REQ (send_reset) 1411 */ 1412 static int 1413 do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1414 { 1415 struct adapter *sc = iq->adapter; 1416 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1417 unsigned int tid = GET_TID(cpl); 1418 struct toepcb *toep = lookup_tid(sc, tid); 1419 struct inpcb *inp = toep->inp; 1420 #ifdef INVARIANTS 1421 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1422 #endif 1423 1424 KASSERT(opcode == CPL_ABORT_RPL_RSS, 1425 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1426 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1427 1428 if (toep->flags & TPF_SYNQE) 1429 return (do_abort_rpl_synqe(iq, rss, m)); 1430 1431 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1432 1433 CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1434 __func__, tid, toep, inp, cpl->status); 1435 1436 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1437 ("%s: wasn't expecting abort reply", __func__)); 1438 1439 INP_WLOCK(inp); 1440 final_cpl_received(toep); 1441 1442 return (0); 1443 } 1444 1445 static int 1446 do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1447 { 1448 struct adapter *sc = iq->adapter; 1449 const struct cpl_rx_data *cpl = mtod(m, const void *); 1450 unsigned int tid = GET_TID(cpl); 1451 struct toepcb *toep = lookup_tid(sc, tid); 1452 struct inpcb *inp = toep->inp; 1453 struct tcpcb *tp; 1454 struct socket *so; 1455 struct sockbuf *sb; 1456 int len; 1457 uint32_t ddp_placed = 0; 1458 1459 if (__predict_false(toep->flags & TPF_SYNQE)) { 1460 #ifdef INVARIANTS 1461 struct synq_entry *synqe = (void *)toep; 1462 1463 INP_WLOCK(synqe->lctx->inp); 1464 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1465 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1466 ("%s: listen socket closed but tid %u not aborted.", 1467 __func__, tid)); 1468 } else { 1469 /* 1470 * do_pass_accept_req is still running and will 1471 * eventually take care of this tid. 1472 */ 1473 } 1474 INP_WUNLOCK(synqe->lctx->inp); 1475 #endif 1476 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1477 toep, toep->flags); 1478 m_freem(m); 1479 return (0); 1480 } 1481 1482 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1483 1484 /* strip off CPL header */ 1485 m_adj(m, sizeof(*cpl)); 1486 len = m->m_pkthdr.len; 1487 1488 INP_WLOCK(inp); 1489 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1490 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1491 __func__, tid, len, inp->inp_flags); 1492 INP_WUNLOCK(inp); 1493 m_freem(m); 1494 return (0); 1495 } 1496 1497 tp = intotcpcb(inp); 1498 1499 if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1500 ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1501 1502 tp->rcv_nxt += len; 1503 KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); 1504 tp->rcv_wnd -= len; 1505 tp->t_rcvtime = ticks; 1506 1507 so = inp_inpcbtosocket(inp); 1508 sb = &so->so_rcv; 1509 SOCKBUF_LOCK(sb); 1510 1511 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1512 CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1513 __func__, tid, len); 1514 m_freem(m); 1515 SOCKBUF_UNLOCK(sb); 1516 INP_WUNLOCK(inp); 1517 1518 INP_INFO_RLOCK(&V_tcbinfo); 1519 INP_WLOCK(inp); 1520 tp = tcp_drop(tp, ECONNRESET); 1521 if (tp) 1522 INP_WUNLOCK(inp); 1523 INP_INFO_RUNLOCK(&V_tcbinfo); 1524 1525 return (0); 1526 } 1527 1528 /* receive buffer autosize */ 1529 if (sb->sb_flags & SB_AUTOSIZE && 1530 V_tcp_do_autorcvbuf && 1531 sb->sb_hiwat < V_tcp_autorcvbuf_max && 1532 len > (sbspace(sb) / 8 * 7)) { 1533 unsigned int hiwat = sb->sb_hiwat; 1534 unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1535 V_tcp_autorcvbuf_max); 1536 1537 if (!sbreserve_locked(sb, newsize, so, NULL)) 1538 sb->sb_flags &= ~SB_AUTOSIZE; 1539 else 1540 toep->rx_credits += newsize - hiwat; 1541 } 1542 1543 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1544 int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; 1545 1546 if (changed) { 1547 if (toep->ddp_flags & DDP_SC_REQ) 1548 toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; 1549 else { 1550 KASSERT(cpl->ddp_off == 1, 1551 ("%s: DDP switched on by itself.", 1552 __func__)); 1553 1554 /* Fell out of DDP mode */ 1555 toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE | 1556 DDP_BUF1_ACTIVE); 1557 1558 if (ddp_placed) 1559 insert_ddp_data(toep, ddp_placed); 1560 } 1561 } 1562 1563 if ((toep->ddp_flags & DDP_OK) == 0 && 1564 time_uptime >= toep->ddp_disabled + DDP_RETRY_WAIT) { 1565 toep->ddp_score = DDP_LOW_SCORE; 1566 toep->ddp_flags |= DDP_OK; 1567 CTR3(KTR_CXGBE, "%s: tid %u DDP_OK @ %u", 1568 __func__, tid, time_uptime); 1569 } 1570 1571 if (toep->ddp_flags & DDP_ON) { 1572 1573 /* 1574 * CPL_RX_DATA with DDP on can only be an indicate. Ask 1575 * soreceive to post a buffer or disable DDP. The 1576 * payload that arrived in this indicate is appended to 1577 * the socket buffer as usual. 1578 */ 1579 1580 #if 0 1581 CTR5(KTR_CXGBE, 1582 "%s: tid %u (0x%x) DDP indicate (seq 0x%x, len %d)", 1583 __func__, tid, toep->flags, be32toh(cpl->seq), len); 1584 #endif 1585 sb->sb_flags |= SB_DDP_INDICATE; 1586 } else if ((toep->ddp_flags & (DDP_OK|DDP_SC_REQ)) == DDP_OK && 1587 tp->rcv_wnd > DDP_RSVD_WIN && len >= sc->tt.ddp_thres) { 1588 1589 /* 1590 * DDP allowed but isn't on (and a request to switch it 1591 * on isn't pending either), and conditions are ripe for 1592 * it to work. Switch it on. 1593 */ 1594 1595 enable_ddp(sc, toep); 1596 } 1597 } 1598 1599 KASSERT(toep->sb_cc >= sbused(sb), 1600 ("%s: sb %p has more data (%d) than last time (%d).", 1601 __func__, sb, sbused(sb), toep->sb_cc)); 1602 toep->rx_credits += toep->sb_cc - sbused(sb); 1603 sbappendstream_locked(sb, m, 0); 1604 toep->sb_cc = sbused(sb); 1605 if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { 1606 int credits; 1607 1608 credits = send_rx_credits(sc, toep, toep->rx_credits); 1609 toep->rx_credits -= credits; 1610 tp->rcv_wnd += credits; 1611 tp->rcv_adv += credits; 1612 } 1613 sorwakeup_locked(so); 1614 SOCKBUF_UNLOCK_ASSERT(sb); 1615 1616 INP_WUNLOCK(inp); 1617 return (0); 1618 } 1619 1620 #define S_CPL_FW4_ACK_OPCODE 24 1621 #define M_CPL_FW4_ACK_OPCODE 0xff 1622 #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1623 #define G_CPL_FW4_ACK_OPCODE(x) \ 1624 (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1625 1626 #define S_CPL_FW4_ACK_FLOWID 0 1627 #define M_CPL_FW4_ACK_FLOWID 0xffffff 1628 #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1629 #define G_CPL_FW4_ACK_FLOWID(x) \ 1630 (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1631 1632 #define S_CPL_FW4_ACK_CR 24 1633 #define M_CPL_FW4_ACK_CR 0xff 1634 #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1635 #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1636 1637 #define S_CPL_FW4_ACK_SEQVAL 0 1638 #define M_CPL_FW4_ACK_SEQVAL 0x1 1639 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1640 #define G_CPL_FW4_ACK_SEQVAL(x) \ 1641 (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1642 #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1643 1644 static int 1645 do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1646 { 1647 struct adapter *sc = iq->adapter; 1648 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1649 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1650 struct toepcb *toep = lookup_tid(sc, tid); 1651 struct inpcb *inp; 1652 struct tcpcb *tp; 1653 struct socket *so; 1654 uint8_t credits = cpl->credits; 1655 struct ofld_tx_sdesc *txsd; 1656 int plen; 1657 #ifdef INVARIANTS 1658 unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1659 #endif 1660 1661 /* 1662 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1663 * now this comes back carrying the credits for the flowc. 1664 */ 1665 if (__predict_false(toep->flags & TPF_SYNQE)) { 1666 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1667 ("%s: credits for a synq entry %p", __func__, toep)); 1668 return (0); 1669 } 1670 1671 inp = toep->inp; 1672 1673 KASSERT(opcode == CPL_FW4_ACK, 1674 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1675 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1676 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1677 1678 INP_WLOCK(inp); 1679 1680 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1681 INP_WUNLOCK(inp); 1682 return (0); 1683 } 1684 1685 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1686 ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1687 1688 tp = intotcpcb(inp); 1689 1690 if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1691 tcp_seq snd_una = be32toh(cpl->snd_una); 1692 1693 #ifdef INVARIANTS 1694 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1695 log(LOG_ERR, 1696 "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1697 __func__, snd_una, toep->tid, tp->snd_una); 1698 } 1699 #endif 1700 1701 if (tp->snd_una != snd_una) { 1702 tp->snd_una = snd_una; 1703 tp->ts_recent_age = tcp_ts_getticks(); 1704 } 1705 } 1706 1707 so = inp->inp_socket; 1708 txsd = &toep->txsd[toep->txsd_cidx]; 1709 plen = 0; 1710 while (credits) { 1711 KASSERT(credits >= txsd->tx_credits, 1712 ("%s: too many (or partial) credits", __func__)); 1713 credits -= txsd->tx_credits; 1714 toep->tx_credits += txsd->tx_credits; 1715 plen += txsd->plen; 1716 txsd++; 1717 toep->txsd_avail++; 1718 KASSERT(toep->txsd_avail <= toep->txsd_total, 1719 ("%s: txsd avail > total", __func__)); 1720 if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1721 txsd = &toep->txsd[0]; 1722 toep->txsd_cidx = 0; 1723 } 1724 } 1725 1726 if (toep->tx_credits == toep->tx_total) { 1727 toep->tx_nocompl = 0; 1728 toep->plen_nocompl = 0; 1729 } 1730 1731 if (toep->flags & TPF_TX_SUSPENDED && 1732 toep->tx_credits >= toep->tx_total / 4) { 1733 toep->flags &= ~TPF_TX_SUSPENDED; 1734 if (toep->ulp_mode == ULP_MODE_ISCSI) 1735 t4_ulp_push_frames(sc, toep, plen); 1736 else 1737 t4_push_frames(sc, toep, plen); 1738 } else if (plen > 0) { 1739 struct sockbuf *sb = &so->so_snd; 1740 1741 if (toep->ulp_mode == ULP_MODE_ISCSI) 1742 t4_cpl_iscsi_callback(toep->td, toep, &plen, 1743 CPL_FW4_ACK); 1744 else { 1745 SOCKBUF_LOCK(sb); 1746 sbdrop_locked(sb, plen); 1747 sowwakeup_locked(so); 1748 SOCKBUF_UNLOCK_ASSERT(sb); 1749 } 1750 } 1751 1752 INP_WUNLOCK(inp); 1753 1754 return (0); 1755 } 1756 1757 static int 1758 do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1759 { 1760 struct adapter *sc = iq->adapter; 1761 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 1762 unsigned int tid = GET_TID(cpl); 1763 #ifdef INVARIANTS 1764 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1765 #endif 1766 1767 KASSERT(opcode == CPL_SET_TCB_RPL, 1768 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1769 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1770 1771 if (is_ftid(sc, tid)) 1772 return (t4_filter_rpl(iq, rss, m)); /* TCB is a filter */ 1773 else { 1774 struct toepcb *toep = lookup_tid(sc, tid); 1775 1776 t4_cpl_iscsi_callback(toep->td, toep, m, CPL_SET_TCB_RPL); 1777 return (0); 1778 } 1779 1780 CXGBE_UNIMPLEMENTED(__func__); 1781 } 1782 1783 void 1784 t4_set_tcb_field(struct adapter *sc, struct toepcb *toep, int ctrl, 1785 uint16_t word, uint64_t mask, uint64_t val) 1786 { 1787 struct wrqe *wr; 1788 struct cpl_set_tcb_field *req; 1789 1790 wr = alloc_wrqe(sizeof(*req), ctrl ? toep->ctrlq : toep->ofld_txq); 1791 if (wr == NULL) { 1792 /* XXX */ 1793 panic("%s: allocation failure.", __func__); 1794 } 1795 req = wrtod(wr); 1796 1797 INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); 1798 req->reply_ctrl = htobe16(V_NO_REPLY(1) | 1799 V_QUEUENO(toep->ofld_rxq->iq.abs_id)); 1800 req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); 1801 req->mask = htobe64(mask); 1802 req->val = htobe64(val); 1803 1804 t4_wrq_tx(sc, wr); 1805 } 1806 1807 void 1808 t4_init_cpl_io_handlers(struct adapter *sc) 1809 { 1810 1811 t4_register_cpl_handler(sc, CPL_PEER_CLOSE, do_peer_close); 1812 t4_register_cpl_handler(sc, CPL_CLOSE_CON_RPL, do_close_con_rpl); 1813 t4_register_cpl_handler(sc, CPL_ABORT_REQ_RSS, do_abort_req); 1814 t4_register_cpl_handler(sc, CPL_ABORT_RPL_RSS, do_abort_rpl); 1815 t4_register_cpl_handler(sc, CPL_RX_DATA, do_rx_data); 1816 t4_register_cpl_handler(sc, CPL_FW4_ACK, do_fw4_ack); 1817 t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, do_set_tcb_rpl); 1818 } 1819 1820 void 1821 t4_uninit_cpl_io_handlers(struct adapter *sc) 1822 { 1823 1824 t4_register_cpl_handler(sc, CPL_SET_TCB_RPL, t4_filter_rpl); 1825 } 1826 #endif 1827