1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012, 2015 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ratelimit.h" 36 37 #ifdef TCP_OFFLOAD 38 #include <sys/param.h> 39 #include <sys/aio.h> 40 #include <sys/file.h> 41 #include <sys/kernel.h> 42 #include <sys/ktr.h> 43 #include <sys/module.h> 44 #include <sys/proc.h> 45 #include <sys/protosw.h> 46 #include <sys/domain.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sglist.h> 50 #include <sys/taskqueue.h> 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip6.h> 55 #define TCPSTATES 56 #include <netinet/tcp_fsm.h> 57 #include <netinet/tcp_seq.h> 58 #include <netinet/tcp_var.h> 59 #include <netinet/toecore.h> 60 61 #include <security/mac/mac_framework.h> 62 63 #include <vm/vm.h> 64 #include <vm/vm_extern.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/vm_page.h> 68 69 #include "common/common.h" 70 #include "common/t4_msg.h" 71 #include "common/t4_regs.h" 72 #include "common/t4_tcb.h" 73 #include "tom/t4_tom_l2t.h" 74 #include "tom/t4_tom.h" 75 76 static void t4_aiotx_cancel(struct kaiocb *job); 77 static void t4_aiotx_queue_toep(struct toepcb *toep); 78 79 static size_t 80 aiotx_mbuf_pgoff(struct mbuf *m) 81 { 82 struct aiotx_buffer *ab; 83 84 MPASS(IS_AIOTX_MBUF(m)); 85 ab = m->m_ext.ext_arg1; 86 return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE); 87 } 88 89 static vm_page_t * 90 aiotx_mbuf_pages(struct mbuf *m) 91 { 92 struct aiotx_buffer *ab; 93 int npages; 94 95 MPASS(IS_AIOTX_MBUF(m)); 96 ab = m->m_ext.ext_arg1; 97 npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE; 98 return (ab->ps.pages + npages); 99 } 100 101 void 102 send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 103 { 104 struct wrqe *wr; 105 struct fw_flowc_wr *flowc; 106 unsigned int nparams, flowclen, paramidx; 107 struct vi_info *vi = toep->vi; 108 struct port_info *pi = vi->pi; 109 struct adapter *sc = pi->adapter; 110 unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 111 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 112 113 KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 114 ("%s: flowc for tid %u sent already", __func__, toep->tid)); 115 116 if (ftxp != NULL) 117 nparams = 8; 118 else 119 nparams = 6; 120 if (toep->ulp_mode == ULP_MODE_TLS) 121 nparams++; 122 if (toep->tls.fcplenmax != 0) 123 nparams++; 124 125 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 126 127 wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 128 if (wr == NULL) { 129 /* XXX */ 130 panic("%s: allocation failure.", __func__); 131 } 132 flowc = wrtod(wr); 133 memset(flowc, 0, wr->wr_len); 134 135 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 136 V_FW_FLOWC_WR_NPARAMS(nparams)); 137 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 138 V_FW_WR_FLOWID(toep->tid)); 139 140 #define FLOWC_PARAM(__m, __v) \ 141 do { \ 142 flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ 143 flowc->mnemval[paramidx].val = htobe32(__v); \ 144 paramidx++; \ 145 } while (0) 146 147 paramidx = 0; 148 149 FLOWC_PARAM(PFNVFN, pfvf); 150 FLOWC_PARAM(CH, pi->tx_chan); 151 FLOWC_PARAM(PORT, pi->tx_chan); 152 FLOWC_PARAM(IQID, toep->ofld_rxq->iq.abs_id); 153 if (ftxp) { 154 uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 155 156 FLOWC_PARAM(SNDNXT, ftxp->snd_nxt); 157 FLOWC_PARAM(RCVNXT, ftxp->rcv_nxt); 158 FLOWC_PARAM(SNDBUF, sndbuf); 159 FLOWC_PARAM(MSS, ftxp->mss); 160 161 CTR6(KTR_CXGBE, 162 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 163 __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 164 ftxp->rcv_nxt); 165 } else { 166 FLOWC_PARAM(SNDBUF, 512); 167 FLOWC_PARAM(MSS, 512); 168 169 CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 170 } 171 if (toep->ulp_mode == ULP_MODE_TLS) 172 FLOWC_PARAM(ULP_MODE, toep->ulp_mode); 173 if (toep->tls.fcplenmax != 0) 174 FLOWC_PARAM(TXDATAPLEN_MAX, toep->tls.fcplenmax); 175 #undef FLOWC_PARAM 176 177 KASSERT(paramidx == nparams, ("nparams mismatch")); 178 179 txsd->tx_credits = howmany(flowclen, 16); 180 txsd->plen = 0; 181 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 182 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 183 toep->tx_credits -= txsd->tx_credits; 184 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 185 toep->txsd_pidx = 0; 186 toep->txsd_avail--; 187 188 toep->flags |= TPF_FLOWC_WR_SENT; 189 t4_wrq_tx(sc, wr); 190 } 191 192 #ifdef RATELIMIT 193 /* 194 * Input is Bytes/second (so_max_pacing-rate), chip counts in Kilobits/second. 195 */ 196 static int 197 update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) 198 { 199 int tc_idx, rc; 200 const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000; 201 const int port_id = toep->vi->pi->port_id; 202 203 CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps); 204 205 if (kbps == 0) { 206 /* unbind */ 207 tc_idx = -1; 208 } else { 209 rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx); 210 if (rc != 0) 211 return (rc); 212 MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); 213 } 214 215 if (toep->tc_idx != tc_idx) { 216 struct wrqe *wr; 217 struct fw_flowc_wr *flowc; 218 int nparams = 1, flowclen, flowclen16; 219 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 220 221 flowclen = sizeof(*flowc) + nparams * sizeof(struct 222 fw_flowc_mnemval); 223 flowclen16 = howmany(flowclen, 16); 224 if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 || 225 (wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq)) == NULL) { 226 if (tc_idx >= 0) 227 t4_release_cl_rl_kbps(sc, port_id, tc_idx); 228 return (ENOMEM); 229 } 230 231 flowc = wrtod(wr); 232 memset(flowc, 0, wr->wr_len); 233 234 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 235 V_FW_FLOWC_WR_NPARAMS(nparams)); 236 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | 237 V_FW_WR_FLOWID(toep->tid)); 238 239 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 240 if (tc_idx == -1) 241 flowc->mnemval[0].val = htobe32(0xff); 242 else 243 flowc->mnemval[0].val = htobe32(tc_idx); 244 245 txsd->tx_credits = flowclen16; 246 txsd->plen = 0; 247 toep->tx_credits -= txsd->tx_credits; 248 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 249 toep->txsd_pidx = 0; 250 toep->txsd_avail--; 251 t4_wrq_tx(sc, wr); 252 } 253 254 if (toep->tc_idx >= 0) 255 t4_release_cl_rl_kbps(sc, port_id, toep->tc_idx); 256 toep->tc_idx = tc_idx; 257 258 return (0); 259 } 260 #endif 261 262 void 263 send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 264 { 265 struct wrqe *wr; 266 struct cpl_abort_req *req; 267 int tid = toep->tid; 268 struct inpcb *inp = toep->inp; 269 struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 270 271 INP_WLOCK_ASSERT(inp); 272 273 CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 274 __func__, toep->tid, 275 inp->inp_flags & INP_DROPPED ? "inp dropped" : 276 tcpstates[tp->t_state], 277 toep->flags, inp->inp_flags, 278 toep->flags & TPF_ABORT_SHUTDOWN ? 279 " (abort already in progress)" : ""); 280 281 if (toep->flags & TPF_ABORT_SHUTDOWN) 282 return; /* abort already in progress */ 283 284 toep->flags |= TPF_ABORT_SHUTDOWN; 285 286 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 287 ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 288 289 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 290 if (wr == NULL) { 291 /* XXX */ 292 panic("%s: allocation failure.", __func__); 293 } 294 req = wrtod(wr); 295 296 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 297 if (inp->inp_flags & INP_DROPPED) 298 req->rsvd0 = htobe32(snd_nxt); 299 else 300 req->rsvd0 = htobe32(tp->snd_nxt); 301 req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 302 req->cmd = CPL_ABORT_SEND_RST; 303 304 /* 305 * XXX: What's the correct way to tell that the inp hasn't been detached 306 * from its socket? Should I even be flushing the snd buffer here? 307 */ 308 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 309 struct socket *so = inp->inp_socket; 310 311 if (so != NULL) /* because I'm not sure. See comment above */ 312 sbflush(&so->so_snd); 313 } 314 315 t4_l2t_send(sc, wr, toep->l2te); 316 } 317 318 /* 319 * Called when a connection is established to translate the TCP options 320 * reported by HW to FreeBSD's native format. 321 */ 322 static void 323 assign_rxopt(struct tcpcb *tp, unsigned int opt) 324 { 325 struct toepcb *toep = tp->t_toe; 326 struct inpcb *inp = tp->t_inpcb; 327 struct adapter *sc = td_adapter(toep->td); 328 int n; 329 330 INP_LOCK_ASSERT(inp); 331 332 if (inp->inp_inc.inc_flags & INC_ISIPV6) 333 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 334 else 335 n = sizeof(struct ip) + sizeof(struct tcphdr); 336 if (V_tcp_do_rfc1323) 337 n += TCPOLEN_TSTAMP_APPA; 338 tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 339 340 CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, 341 G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); 342 343 if (G_TCPOPT_TSTAMP(opt)) { 344 tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 345 tp->ts_recent = 0; /* hmmm */ 346 tp->ts_recent_age = tcp_ts_getticks(); 347 } 348 349 if (G_TCPOPT_SACK(opt)) 350 tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 351 else 352 tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 353 354 if (G_TCPOPT_WSCALE_OK(opt)) 355 tp->t_flags |= TF_RCVD_SCALE; 356 357 /* Doing window scaling? */ 358 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 359 (TF_RCVD_SCALE | TF_REQ_SCALE)) { 360 tp->rcv_scale = tp->request_r_scale; 361 tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 362 } 363 } 364 365 /* 366 * Completes some final bits of initialization for just established connections 367 * and changes their state to TCPS_ESTABLISHED. 368 * 369 * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 370 */ 371 void 372 make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 373 uint16_t opt) 374 { 375 struct inpcb *inp = toep->inp; 376 struct socket *so = inp->inp_socket; 377 struct tcpcb *tp = intotcpcb(inp); 378 long bufsize; 379 uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 380 uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 381 uint16_t tcpopt = be16toh(opt); 382 struct flowc_tx_params ftxp; 383 384 INP_WLOCK_ASSERT(inp); 385 KASSERT(tp->t_state == TCPS_SYN_SENT || 386 tp->t_state == TCPS_SYN_RECEIVED, 387 ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 388 389 CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p", 390 __func__, toep->tid, so, inp, tp, toep); 391 392 tp->t_state = TCPS_ESTABLISHED; 393 tp->t_starttime = ticks; 394 TCPSTAT_INC(tcps_connects); 395 396 tp->irs = irs; 397 tcp_rcvseqinit(tp); 398 tp->rcv_wnd = toep->rx_credits << 10; 399 tp->rcv_adv += tp->rcv_wnd; 400 tp->last_ack_sent = tp->rcv_nxt; 401 402 /* 403 * If we were unable to send all rx credits via opt0, save the remainder 404 * in rx_credits so that they can be handed over with the next credit 405 * update. 406 */ 407 SOCKBUF_LOCK(&so->so_rcv); 408 bufsize = select_rcv_wnd(so); 409 SOCKBUF_UNLOCK(&so->so_rcv); 410 toep->rx_credits = bufsize - tp->rcv_wnd; 411 412 tp->iss = iss; 413 tcp_sendseqinit(tp); 414 tp->snd_una = iss + 1; 415 tp->snd_nxt = iss + 1; 416 tp->snd_max = iss + 1; 417 418 assign_rxopt(tp, tcpopt); 419 420 SOCKBUF_LOCK(&so->so_snd); 421 if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 422 bufsize = V_tcp_autosndbuf_max; 423 else 424 bufsize = sbspace(&so->so_snd); 425 SOCKBUF_UNLOCK(&so->so_snd); 426 427 ftxp.snd_nxt = tp->snd_nxt; 428 ftxp.rcv_nxt = tp->rcv_nxt; 429 ftxp.snd_space = bufsize; 430 ftxp.mss = tp->t_maxseg; 431 send_flowc_wr(toep, &ftxp); 432 433 soisconnected(so); 434 } 435 436 int 437 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 438 { 439 struct wrqe *wr; 440 struct cpl_rx_data_ack *req; 441 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 442 443 KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 444 445 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 446 if (wr == NULL) 447 return (0); 448 req = wrtod(wr); 449 450 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 451 req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 452 453 t4_wrq_tx(sc, wr); 454 return (credits); 455 } 456 457 void 458 send_rx_modulate(struct adapter *sc, struct toepcb *toep) 459 { 460 struct wrqe *wr; 461 struct cpl_rx_data_ack *req; 462 463 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 464 if (wr == NULL) 465 return; 466 req = wrtod(wr); 467 468 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 469 req->credit_dack = htobe32(F_RX_MODULATE_RX); 470 471 t4_wrq_tx(sc, wr); 472 } 473 474 void 475 t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) 476 { 477 struct adapter *sc = tod->tod_softc; 478 struct inpcb *inp = tp->t_inpcb; 479 struct socket *so = inp->inp_socket; 480 struct sockbuf *sb = &so->so_rcv; 481 struct toepcb *toep = tp->t_toe; 482 int credits; 483 484 INP_WLOCK_ASSERT(inp); 485 486 SOCKBUF_LOCK_ASSERT(sb); 487 KASSERT(toep->sb_cc >= sbused(sb), 488 ("%s: sb %p has more data (%d) than last time (%d).", 489 __func__, sb, sbused(sb), toep->sb_cc)); 490 491 credits = toep->sb_cc - sbused(sb); 492 toep->sb_cc = sbused(sb); 493 if (toep->ulp_mode == ULP_MODE_TLS) { 494 if (toep->tls.rcv_over >= credits) { 495 toep->tls.rcv_over -= credits; 496 credits = 0; 497 } else { 498 credits -= toep->tls.rcv_over; 499 toep->tls.rcv_over = 0; 500 } 501 } 502 toep->rx_credits += credits; 503 504 if (toep->rx_credits > 0 && 505 (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || 506 (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || 507 toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { 508 509 credits = send_rx_credits(sc, toep, toep->rx_credits); 510 toep->rx_credits -= credits; 511 tp->rcv_wnd += credits; 512 tp->rcv_adv += credits; 513 } else if (toep->flags & TPF_FORCE_CREDITS) 514 send_rx_modulate(sc, toep); 515 } 516 517 void 518 t4_rcvd(struct toedev *tod, struct tcpcb *tp) 519 { 520 struct inpcb *inp = tp->t_inpcb; 521 struct socket *so = inp->inp_socket; 522 struct sockbuf *sb = &so->so_rcv; 523 524 SOCKBUF_LOCK(sb); 525 t4_rcvd_locked(tod, tp); 526 SOCKBUF_UNLOCK(sb); 527 } 528 529 /* 530 * Close a connection by sending a CPL_CLOSE_CON_REQ message. 531 */ 532 int 533 t4_close_conn(struct adapter *sc, struct toepcb *toep) 534 { 535 struct wrqe *wr; 536 struct cpl_close_con_req *req; 537 unsigned int tid = toep->tid; 538 539 CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 540 toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 541 542 if (toep->flags & TPF_FIN_SENT) 543 return (0); 544 545 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 546 ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 547 548 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 549 if (wr == NULL) { 550 /* XXX */ 551 panic("%s: allocation failure.", __func__); 552 } 553 req = wrtod(wr); 554 555 req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 556 V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 557 req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 558 V_FW_WR_FLOWID(tid)); 559 req->wr.wr_lo = cpu_to_be64(0); 560 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 561 req->rsvd = 0; 562 563 toep->flags |= TPF_FIN_SENT; 564 toep->flags &= ~TPF_SEND_FIN; 565 t4_l2t_send(sc, wr, toep->l2te); 566 567 return (0); 568 } 569 570 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 571 #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 572 573 /* Maximum amount of immediate data we could stuff in a WR */ 574 static inline int 575 max_imm_payload(int tx_credits) 576 { 577 const int n = 2; /* Use only up to 2 desc for imm. data WR */ 578 579 KASSERT(tx_credits >= 0 && 580 tx_credits <= MAX_OFLD_TX_CREDITS, 581 ("%s: %d credits", __func__, tx_credits)); 582 583 if (tx_credits < MIN_OFLD_TX_CREDITS) 584 return (0); 585 586 if (tx_credits >= (n * EQ_ESIZE) / 16) 587 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 588 else 589 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 590 } 591 592 /* Maximum number of SGL entries we could stuff in a WR */ 593 static inline int 594 max_dsgl_nsegs(int tx_credits) 595 { 596 int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 597 int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 598 599 KASSERT(tx_credits >= 0 && 600 tx_credits <= MAX_OFLD_TX_CREDITS, 601 ("%s: %d credits", __func__, tx_credits)); 602 603 if (tx_credits < MIN_OFLD_TX_CREDITS) 604 return (0); 605 606 nseg += 2 * (sge_pair_credits * 16 / 24); 607 if ((sge_pair_credits * 16) % 24 == 16) 608 nseg++; 609 610 return (nseg); 611 } 612 613 static inline void 614 write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 615 unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) 616 { 617 struct fw_ofld_tx_data_wr *txwr = dst; 618 619 txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 620 V_FW_WR_IMMDLEN(immdlen)); 621 txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 622 V_FW_WR_LEN16(credits)); 623 txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | 624 V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); 625 txwr->plen = htobe32(plen); 626 627 if (txalign > 0) { 628 struct tcpcb *tp = intotcpcb(toep->inp); 629 630 if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) 631 txwr->lsodisable_to_flags |= 632 htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 633 else 634 txwr->lsodisable_to_flags |= 635 htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 636 (tp->t_flags & TF_NODELAY ? 0 : 637 F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 638 } 639 } 640 641 /* 642 * Generate a DSGL from a starting mbuf. The total number of segments and the 643 * maximum segments in any one mbuf are provided. 644 */ 645 static void 646 write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 647 { 648 struct mbuf *m; 649 struct ulptx_sgl *usgl = dst; 650 int i, j, rc; 651 struct sglist sg; 652 struct sglist_seg segs[n]; 653 654 KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 655 656 sglist_init(&sg, n, segs); 657 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 658 V_ULPTX_NSGE(nsegs)); 659 660 i = -1; 661 for (m = start; m != stop; m = m->m_next) { 662 if (IS_AIOTX_MBUF(m)) 663 rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m), 664 aiotx_mbuf_pgoff(m), m->m_len); 665 else 666 rc = sglist_append(&sg, mtod(m, void *), m->m_len); 667 if (__predict_false(rc != 0)) 668 panic("%s: sglist_append %d", __func__, rc); 669 670 for (j = 0; j < sg.sg_nseg; i++, j++) { 671 if (i < 0) { 672 usgl->len0 = htobe32(segs[j].ss_len); 673 usgl->addr0 = htobe64(segs[j].ss_paddr); 674 } else { 675 usgl->sge[i / 2].len[i & 1] = 676 htobe32(segs[j].ss_len); 677 usgl->sge[i / 2].addr[i & 1] = 678 htobe64(segs[j].ss_paddr); 679 } 680 #ifdef INVARIANTS 681 nsegs--; 682 #endif 683 } 684 sglist_reset(&sg); 685 } 686 if (i & 1) 687 usgl->sge[i / 2].len[1] = htobe32(0); 688 KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 689 __func__, nsegs, start, stop)); 690 } 691 692 /* 693 * Max number of SGL entries an offload tx work request can have. This is 41 694 * (1 + 40) for a full 512B work request. 695 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 696 */ 697 #define OFLD_SGL_LEN (41) 698 699 /* 700 * Send data and/or a FIN to the peer. 701 * 702 * The socket's so_snd buffer consists of a stream of data starting with sb_mb 703 * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 704 * was transmitted. 705 * 706 * drop indicates the number of bytes that should be dropped from the head of 707 * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 708 * contention on the send buffer lock (before this change it used to do 709 * sowwakeup and then t4_push_frames right after that when recovering from tx 710 * stalls). When drop is set this function MUST drop the bytes and wake up any 711 * writers. 712 */ 713 void 714 t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 715 { 716 struct mbuf *sndptr, *m, *sb_sndptr; 717 struct fw_ofld_tx_data_wr *txwr; 718 struct wrqe *wr; 719 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 720 struct inpcb *inp = toep->inp; 721 struct tcpcb *tp = intotcpcb(inp); 722 struct socket *so = inp->inp_socket; 723 struct sockbuf *sb = &so->so_snd; 724 int tx_credits, shove, compl, sowwakeup; 725 struct ofld_tx_sdesc *txsd; 726 bool aiotx_mbuf_seen; 727 728 INP_WLOCK_ASSERT(inp); 729 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 730 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 731 732 KASSERT(toep->ulp_mode == ULP_MODE_NONE || 733 toep->ulp_mode == ULP_MODE_TCPDDP || 734 toep->ulp_mode == ULP_MODE_TLS || 735 toep->ulp_mode == ULP_MODE_RDMA, 736 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 737 738 #ifdef VERBOSE_TRACES 739 CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", 740 __func__, toep->tid, toep->flags, tp->t_flags); 741 #endif 742 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 743 return; 744 745 #ifdef RATELIMIT 746 if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && 747 (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { 748 inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; 749 } 750 #endif 751 752 /* 753 * This function doesn't resume by itself. Someone else must clear the 754 * flag and call this function. 755 */ 756 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 757 KASSERT(drop == 0, 758 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 759 return; 760 } 761 762 txsd = &toep->txsd[toep->txsd_pidx]; 763 do { 764 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 765 max_imm = max_imm_payload(tx_credits); 766 max_nsegs = max_dsgl_nsegs(tx_credits); 767 768 SOCKBUF_LOCK(sb); 769 sowwakeup = drop; 770 if (drop) { 771 sbdrop_locked(sb, drop); 772 drop = 0; 773 } 774 sb_sndptr = sb->sb_sndptr; 775 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 776 plen = 0; 777 nsegs = 0; 778 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 779 aiotx_mbuf_seen = false; 780 for (m = sndptr; m != NULL; m = m->m_next) { 781 int n; 782 783 if (IS_AIOTX_MBUF(m)) 784 n = sglist_count_vmpages(aiotx_mbuf_pages(m), 785 aiotx_mbuf_pgoff(m), m->m_len); 786 else 787 n = sglist_count(mtod(m, void *), m->m_len); 788 789 nsegs += n; 790 plen += m->m_len; 791 792 /* This mbuf sent us _over_ the nsegs limit, back out */ 793 if (plen > max_imm && nsegs > max_nsegs) { 794 nsegs -= n; 795 plen -= m->m_len; 796 if (plen == 0) { 797 /* Too few credits */ 798 toep->flags |= TPF_TX_SUSPENDED; 799 if (sowwakeup) { 800 if (!TAILQ_EMPTY( 801 &toep->aiotx_jobq)) 802 t4_aiotx_queue_toep( 803 toep); 804 sowwakeup_locked(so); 805 } else 806 SOCKBUF_UNLOCK(sb); 807 SOCKBUF_UNLOCK_ASSERT(sb); 808 return; 809 } 810 break; 811 } 812 813 if (IS_AIOTX_MBUF(m)) 814 aiotx_mbuf_seen = true; 815 if (max_nsegs_1mbuf < n) 816 max_nsegs_1mbuf = n; 817 sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 818 819 /* This mbuf put us right at the max_nsegs limit */ 820 if (plen > max_imm && nsegs == max_nsegs) { 821 m = m->m_next; 822 break; 823 } 824 } 825 826 if (sbused(sb) > sb->sb_hiwat * 5 / 8 && 827 toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 828 compl = 1; 829 else 830 compl = 0; 831 832 if (sb->sb_flags & SB_AUTOSIZE && 833 V_tcp_do_autosndbuf && 834 sb->sb_hiwat < V_tcp_autosndbuf_max && 835 sbused(sb) >= sb->sb_hiwat * 7 / 8) { 836 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 837 V_tcp_autosndbuf_max); 838 839 if (!sbreserve_locked(sb, newsize, so, NULL)) 840 sb->sb_flags &= ~SB_AUTOSIZE; 841 else 842 sowwakeup = 1; /* room available */ 843 } 844 if (sowwakeup) { 845 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 846 t4_aiotx_queue_toep(toep); 847 sowwakeup_locked(so); 848 } else 849 SOCKBUF_UNLOCK(sb); 850 SOCKBUF_UNLOCK_ASSERT(sb); 851 852 /* nothing to send */ 853 if (plen == 0) { 854 KASSERT(m == NULL, 855 ("%s: nothing to send, but m != NULL", __func__)); 856 break; 857 } 858 859 if (__predict_false(toep->flags & TPF_FIN_SENT)) 860 panic("%s: excess tx.", __func__); 861 862 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 863 if (plen <= max_imm && !aiotx_mbuf_seen) { 864 865 /* Immediate data tx */ 866 867 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 868 toep->ofld_txq); 869 if (wr == NULL) { 870 /* XXX: how will we recover from this? */ 871 toep->flags |= TPF_TX_SUSPENDED; 872 return; 873 } 874 txwr = wrtod(wr); 875 credits = howmany(wr->wr_len, 16); 876 write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 877 sc->tt.tx_align); 878 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 879 nsegs = 0; 880 } else { 881 int wr_len; 882 883 /* DSGL tx */ 884 885 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 886 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 887 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 888 if (wr == NULL) { 889 /* XXX: how will we recover from this? */ 890 toep->flags |= TPF_TX_SUSPENDED; 891 return; 892 } 893 txwr = wrtod(wr); 894 credits = howmany(wr_len, 16); 895 write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 896 sc->tt.tx_align); 897 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 898 max_nsegs_1mbuf); 899 if (wr_len & 0xf) { 900 uint64_t *pad = (uint64_t *) 901 ((uintptr_t)txwr + wr_len); 902 *pad = 0; 903 } 904 } 905 906 KASSERT(toep->tx_credits >= credits, 907 ("%s: not enough credits", __func__)); 908 909 toep->tx_credits -= credits; 910 toep->tx_nocompl += credits; 911 toep->plen_nocompl += plen; 912 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 913 toep->tx_nocompl >= toep->tx_total / 4) 914 compl = 1; 915 916 if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 917 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 918 toep->tx_nocompl = 0; 919 toep->plen_nocompl = 0; 920 } 921 922 tp->snd_nxt += plen; 923 tp->snd_max += plen; 924 925 SOCKBUF_LOCK(sb); 926 KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 927 sb->sb_sndptr = sb_sndptr; 928 SOCKBUF_UNLOCK(sb); 929 930 toep->flags |= TPF_TX_DATA_SENT; 931 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 932 toep->flags |= TPF_TX_SUSPENDED; 933 934 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 935 txsd->plen = plen; 936 txsd->tx_credits = credits; 937 txsd++; 938 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 939 toep->txsd_pidx = 0; 940 txsd = &toep->txsd[0]; 941 } 942 toep->txsd_avail--; 943 944 t4_l2t_send(sc, wr, toep->l2te); 945 } while (m != NULL); 946 947 /* Send a FIN if requested, but only if there's no more data to send */ 948 if (m == NULL && toep->flags & TPF_SEND_FIN) 949 t4_close_conn(sc, toep); 950 } 951 952 static inline void 953 rqdrop_locked(struct mbufq *q, int plen) 954 { 955 struct mbuf *m; 956 957 while (plen > 0) { 958 m = mbufq_dequeue(q); 959 960 /* Too many credits. */ 961 MPASS(m != NULL); 962 M_ASSERTPKTHDR(m); 963 964 /* Partial credits. */ 965 MPASS(plen >= m->m_pkthdr.len); 966 967 plen -= m->m_pkthdr.len; 968 m_freem(m); 969 } 970 } 971 972 void 973 t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) 974 { 975 struct mbuf *sndptr, *m; 976 struct fw_ofld_tx_data_wr *txwr; 977 struct wrqe *wr; 978 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 979 u_int adjusted_plen, ulp_submode; 980 struct inpcb *inp = toep->inp; 981 struct tcpcb *tp = intotcpcb(inp); 982 int tx_credits, shove; 983 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 984 struct mbufq *pduq = &toep->ulp_pduq; 985 static const u_int ulp_extra_len[] = {0, 4, 4, 8}; 986 987 INP_WLOCK_ASSERT(inp); 988 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 989 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 990 KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, 991 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 992 993 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 994 return; 995 996 /* 997 * This function doesn't resume by itself. Someone else must clear the 998 * flag and call this function. 999 */ 1000 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 1001 KASSERT(drop == 0, 1002 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 1003 return; 1004 } 1005 1006 if (drop) 1007 rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); 1008 1009 while ((sndptr = mbufq_first(pduq)) != NULL) { 1010 M_ASSERTPKTHDR(sndptr); 1011 1012 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 1013 max_imm = max_imm_payload(tx_credits); 1014 max_nsegs = max_dsgl_nsegs(tx_credits); 1015 1016 plen = 0; 1017 nsegs = 0; 1018 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 1019 for (m = sndptr; m != NULL; m = m->m_next) { 1020 int n = sglist_count(mtod(m, void *), m->m_len); 1021 1022 nsegs += n; 1023 plen += m->m_len; 1024 1025 /* 1026 * This mbuf would send us _over_ the nsegs limit. 1027 * Suspend tx because the PDU can't be sent out. 1028 */ 1029 if (plen > max_imm && nsegs > max_nsegs) { 1030 toep->flags |= TPF_TX_SUSPENDED; 1031 return; 1032 } 1033 1034 if (max_nsegs_1mbuf < n) 1035 max_nsegs_1mbuf = n; 1036 } 1037 1038 if (__predict_false(toep->flags & TPF_FIN_SENT)) 1039 panic("%s: excess tx.", __func__); 1040 1041 /* 1042 * We have a PDU to send. All of it goes out in one WR so 'm' 1043 * is NULL. A PDU's length is always a multiple of 4. 1044 */ 1045 MPASS(m == NULL); 1046 MPASS((plen & 3) == 0); 1047 MPASS(sndptr->m_pkthdr.len == plen); 1048 1049 shove = !(tp->t_flags & TF_MORETOCOME); 1050 ulp_submode = mbuf_ulp_submode(sndptr); 1051 MPASS(ulp_submode < nitems(ulp_extra_len)); 1052 1053 /* 1054 * plen doesn't include header and data digests, which are 1055 * generated and inserted in the right places by the TOE, but 1056 * they do occupy TCP sequence space and need to be accounted 1057 * for. 1058 */ 1059 adjusted_plen = plen + ulp_extra_len[ulp_submode]; 1060 if (plen <= max_imm) { 1061 1062 /* Immediate data tx */ 1063 1064 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 1065 toep->ofld_txq); 1066 if (wr == NULL) { 1067 /* XXX: how will we recover from this? */ 1068 toep->flags |= TPF_TX_SUSPENDED; 1069 return; 1070 } 1071 txwr = wrtod(wr); 1072 credits = howmany(wr->wr_len, 16); 1073 write_tx_wr(txwr, toep, plen, adjusted_plen, credits, 1074 shove, ulp_submode, sc->tt.tx_align); 1075 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 1076 nsegs = 0; 1077 } else { 1078 int wr_len; 1079 1080 /* DSGL tx */ 1081 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 1082 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 1083 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 1084 if (wr == NULL) { 1085 /* XXX: how will we recover from this? */ 1086 toep->flags |= TPF_TX_SUSPENDED; 1087 return; 1088 } 1089 txwr = wrtod(wr); 1090 credits = howmany(wr_len, 16); 1091 write_tx_wr(txwr, toep, 0, adjusted_plen, credits, 1092 shove, ulp_submode, sc->tt.tx_align); 1093 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 1094 max_nsegs_1mbuf); 1095 if (wr_len & 0xf) { 1096 uint64_t *pad = (uint64_t *) 1097 ((uintptr_t)txwr + wr_len); 1098 *pad = 0; 1099 } 1100 } 1101 1102 KASSERT(toep->tx_credits >= credits, 1103 ("%s: not enough credits", __func__)); 1104 1105 m = mbufq_dequeue(pduq); 1106 MPASS(m == sndptr); 1107 mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); 1108 1109 toep->tx_credits -= credits; 1110 toep->tx_nocompl += credits; 1111 toep->plen_nocompl += plen; 1112 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 1113 toep->tx_nocompl >= toep->tx_total / 4) { 1114 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 1115 toep->tx_nocompl = 0; 1116 toep->plen_nocompl = 0; 1117 } 1118 1119 tp->snd_nxt += adjusted_plen; 1120 tp->snd_max += adjusted_plen; 1121 1122 toep->flags |= TPF_TX_DATA_SENT; 1123 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 1124 toep->flags |= TPF_TX_SUSPENDED; 1125 1126 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 1127 txsd->plen = plen; 1128 txsd->tx_credits = credits; 1129 txsd++; 1130 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 1131 toep->txsd_pidx = 0; 1132 txsd = &toep->txsd[0]; 1133 } 1134 toep->txsd_avail--; 1135 1136 t4_l2t_send(sc, wr, toep->l2te); 1137 } 1138 1139 /* Send a FIN if requested, but only if there are no more PDUs to send */ 1140 if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) 1141 t4_close_conn(sc, toep); 1142 } 1143 1144 int 1145 t4_tod_output(struct toedev *tod, struct tcpcb *tp) 1146 { 1147 struct adapter *sc = tod->tod_softc; 1148 #ifdef INVARIANTS 1149 struct inpcb *inp = tp->t_inpcb; 1150 #endif 1151 struct toepcb *toep = tp->t_toe; 1152 1153 INP_WLOCK_ASSERT(inp); 1154 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1155 ("%s: inp %p dropped.", __func__, inp)); 1156 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1157 1158 if (toep->ulp_mode == ULP_MODE_ISCSI) 1159 t4_push_pdus(sc, toep, 0); 1160 else if (tls_tx_key(toep)) 1161 t4_push_tls_records(sc, toep, 0); 1162 else 1163 t4_push_frames(sc, toep, 0); 1164 1165 return (0); 1166 } 1167 1168 int 1169 t4_send_fin(struct toedev *tod, struct tcpcb *tp) 1170 { 1171 struct adapter *sc = tod->tod_softc; 1172 #ifdef INVARIANTS 1173 struct inpcb *inp = tp->t_inpcb; 1174 #endif 1175 struct toepcb *toep = tp->t_toe; 1176 1177 INP_WLOCK_ASSERT(inp); 1178 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1179 ("%s: inp %p dropped.", __func__, inp)); 1180 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1181 1182 toep->flags |= TPF_SEND_FIN; 1183 if (tp->t_state >= TCPS_ESTABLISHED) { 1184 if (toep->ulp_mode == ULP_MODE_ISCSI) 1185 t4_push_pdus(sc, toep, 0); 1186 else if (tls_tx_key(toep)) 1187 t4_push_tls_records(sc, toep, 0); 1188 else 1189 t4_push_frames(sc, toep, 0); 1190 } 1191 1192 return (0); 1193 } 1194 1195 int 1196 t4_send_rst(struct toedev *tod, struct tcpcb *tp) 1197 { 1198 struct adapter *sc = tod->tod_softc; 1199 #if defined(INVARIANTS) 1200 struct inpcb *inp = tp->t_inpcb; 1201 #endif 1202 struct toepcb *toep = tp->t_toe; 1203 1204 INP_WLOCK_ASSERT(inp); 1205 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1206 ("%s: inp %p dropped.", __func__, inp)); 1207 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1208 1209 /* hmmmm */ 1210 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1211 ("%s: flowc for tid %u [%s] not sent already", 1212 __func__, toep->tid, tcpstates[tp->t_state])); 1213 1214 send_reset(sc, toep, 0); 1215 return (0); 1216 } 1217 1218 /* 1219 * Peer has sent us a FIN. 1220 */ 1221 static int 1222 do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1223 { 1224 struct adapter *sc = iq->adapter; 1225 const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1226 unsigned int tid = GET_TID(cpl); 1227 struct toepcb *toep = lookup_tid(sc, tid); 1228 struct inpcb *inp = toep->inp; 1229 struct tcpcb *tp = NULL; 1230 struct socket *so; 1231 #ifdef INVARIANTS 1232 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1233 #endif 1234 1235 KASSERT(opcode == CPL_PEER_CLOSE, 1236 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1237 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1238 1239 if (__predict_false(toep->flags & TPF_SYNQE)) { 1240 #ifdef INVARIANTS 1241 struct synq_entry *synqe = (void *)toep; 1242 1243 INP_WLOCK(synqe->lctx->inp); 1244 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1245 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1246 ("%s: listen socket closed but tid %u not aborted.", 1247 __func__, tid)); 1248 } else { 1249 /* 1250 * do_pass_accept_req is still running and will 1251 * eventually take care of this tid. 1252 */ 1253 } 1254 INP_WUNLOCK(synqe->lctx->inp); 1255 #endif 1256 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1257 toep, toep->flags); 1258 return (0); 1259 } 1260 1261 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1262 1263 CURVNET_SET(toep->vnet); 1264 INP_INFO_RLOCK(&V_tcbinfo); 1265 INP_WLOCK(inp); 1266 tp = intotcpcb(inp); 1267 1268 CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1269 tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1270 1271 if (toep->flags & TPF_ABORT_SHUTDOWN) 1272 goto done; 1273 1274 tp->rcv_nxt++; /* FIN */ 1275 1276 so = inp->inp_socket; 1277 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1278 DDP_LOCK(toep); 1279 if (__predict_false(toep->ddp.flags & 1280 (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) 1281 handle_ddp_close(toep, tp, cpl->rcv_nxt); 1282 DDP_UNLOCK(toep); 1283 } 1284 socantrcvmore(so); 1285 1286 if (toep->ulp_mode != ULP_MODE_RDMA) { 1287 KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1288 ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1289 be32toh(cpl->rcv_nxt))); 1290 } 1291 1292 switch (tp->t_state) { 1293 case TCPS_SYN_RECEIVED: 1294 tp->t_starttime = ticks; 1295 /* FALLTHROUGH */ 1296 1297 case TCPS_ESTABLISHED: 1298 tp->t_state = TCPS_CLOSE_WAIT; 1299 break; 1300 1301 case TCPS_FIN_WAIT_1: 1302 tp->t_state = TCPS_CLOSING; 1303 break; 1304 1305 case TCPS_FIN_WAIT_2: 1306 tcp_twstart(tp); 1307 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1308 INP_INFO_RUNLOCK(&V_tcbinfo); 1309 CURVNET_RESTORE(); 1310 1311 INP_WLOCK(inp); 1312 final_cpl_received(toep); 1313 return (0); 1314 1315 default: 1316 log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1317 __func__, tid, tp->t_state); 1318 } 1319 done: 1320 INP_WUNLOCK(inp); 1321 INP_INFO_RUNLOCK(&V_tcbinfo); 1322 CURVNET_RESTORE(); 1323 return (0); 1324 } 1325 1326 /* 1327 * Peer has ACK'd our FIN. 1328 */ 1329 static int 1330 do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1331 struct mbuf *m) 1332 { 1333 struct adapter *sc = iq->adapter; 1334 const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1335 unsigned int tid = GET_TID(cpl); 1336 struct toepcb *toep = lookup_tid(sc, tid); 1337 struct inpcb *inp = toep->inp; 1338 struct tcpcb *tp = NULL; 1339 struct socket *so = NULL; 1340 #ifdef INVARIANTS 1341 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1342 #endif 1343 1344 KASSERT(opcode == CPL_CLOSE_CON_RPL, 1345 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1346 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1347 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1348 1349 CURVNET_SET(toep->vnet); 1350 INP_INFO_RLOCK(&V_tcbinfo); 1351 INP_WLOCK(inp); 1352 tp = intotcpcb(inp); 1353 1354 CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1355 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1356 1357 if (toep->flags & TPF_ABORT_SHUTDOWN) 1358 goto done; 1359 1360 so = inp->inp_socket; 1361 tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1362 1363 switch (tp->t_state) { 1364 case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1365 tcp_twstart(tp); 1366 release: 1367 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1368 INP_INFO_RUNLOCK(&V_tcbinfo); 1369 CURVNET_RESTORE(); 1370 1371 INP_WLOCK(inp); 1372 final_cpl_received(toep); /* no more CPLs expected */ 1373 1374 return (0); 1375 case TCPS_LAST_ACK: 1376 if (tcp_close(tp)) 1377 INP_WUNLOCK(inp); 1378 goto release; 1379 1380 case TCPS_FIN_WAIT_1: 1381 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1382 soisdisconnected(so); 1383 tp->t_state = TCPS_FIN_WAIT_2; 1384 break; 1385 1386 default: 1387 log(LOG_ERR, 1388 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1389 __func__, tid, tcpstates[tp->t_state]); 1390 } 1391 done: 1392 INP_WUNLOCK(inp); 1393 INP_INFO_RUNLOCK(&V_tcbinfo); 1394 CURVNET_RESTORE(); 1395 return (0); 1396 } 1397 1398 void 1399 send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1400 int rst_status) 1401 { 1402 struct wrqe *wr; 1403 struct cpl_abort_rpl *cpl; 1404 1405 wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1406 if (wr == NULL) { 1407 /* XXX */ 1408 panic("%s: allocation failure.", __func__); 1409 } 1410 cpl = wrtod(wr); 1411 1412 INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1413 cpl->cmd = rst_status; 1414 1415 t4_wrq_tx(sc, wr); 1416 } 1417 1418 static int 1419 abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1420 { 1421 switch (abort_reason) { 1422 case CPL_ERR_BAD_SYN: 1423 case CPL_ERR_CONN_RESET: 1424 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1425 case CPL_ERR_XMIT_TIMEDOUT: 1426 case CPL_ERR_PERSIST_TIMEDOUT: 1427 case CPL_ERR_FINWAIT2_TIMEDOUT: 1428 case CPL_ERR_KEEPALIVE_TIMEDOUT: 1429 return (ETIMEDOUT); 1430 default: 1431 return (EIO); 1432 } 1433 } 1434 1435 /* 1436 * TCP RST from the peer, timeout, or some other such critical error. 1437 */ 1438 static int 1439 do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1440 { 1441 struct adapter *sc = iq->adapter; 1442 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1443 unsigned int tid = GET_TID(cpl); 1444 struct toepcb *toep = lookup_tid(sc, tid); 1445 struct sge_wrq *ofld_txq = toep->ofld_txq; 1446 struct inpcb *inp; 1447 struct tcpcb *tp; 1448 #ifdef INVARIANTS 1449 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1450 #endif 1451 1452 KASSERT(opcode == CPL_ABORT_REQ_RSS, 1453 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1454 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1455 1456 if (toep->flags & TPF_SYNQE) 1457 return (do_abort_req_synqe(iq, rss, m)); 1458 1459 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1460 1461 if (negative_advice(cpl->status)) { 1462 CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1463 __func__, cpl->status, tid, toep->flags); 1464 return (0); /* Ignore negative advice */ 1465 } 1466 1467 inp = toep->inp; 1468 CURVNET_SET(toep->vnet); 1469 INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ 1470 INP_WLOCK(inp); 1471 1472 tp = intotcpcb(inp); 1473 1474 CTR6(KTR_CXGBE, 1475 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1476 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1477 inp->inp_flags, cpl->status); 1478 1479 /* 1480 * If we'd initiated an abort earlier the reply to it is responsible for 1481 * cleaning up resources. Otherwise we tear everything down right here 1482 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1483 */ 1484 if (toep->flags & TPF_ABORT_SHUTDOWN) { 1485 INP_WUNLOCK(inp); 1486 goto done; 1487 } 1488 toep->flags |= TPF_ABORT_SHUTDOWN; 1489 1490 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1491 struct socket *so = inp->inp_socket; 1492 1493 if (so != NULL) 1494 so_error_set(so, abort_status_to_errno(tp, 1495 cpl->status)); 1496 tp = tcp_close(tp); 1497 if (tp == NULL) 1498 INP_WLOCK(inp); /* re-acquire */ 1499 } 1500 1501 final_cpl_received(toep); 1502 done: 1503 INP_INFO_RUNLOCK(&V_tcbinfo); 1504 CURVNET_RESTORE(); 1505 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1506 return (0); 1507 } 1508 1509 /* 1510 * Reply to the CPL_ABORT_REQ (send_reset) 1511 */ 1512 static int 1513 do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1514 { 1515 struct adapter *sc = iq->adapter; 1516 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1517 unsigned int tid = GET_TID(cpl); 1518 struct toepcb *toep = lookup_tid(sc, tid); 1519 struct inpcb *inp = toep->inp; 1520 #ifdef INVARIANTS 1521 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1522 #endif 1523 1524 KASSERT(opcode == CPL_ABORT_RPL_RSS, 1525 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1526 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1527 1528 if (toep->flags & TPF_SYNQE) 1529 return (do_abort_rpl_synqe(iq, rss, m)); 1530 1531 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1532 1533 CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1534 __func__, tid, toep, inp, cpl->status); 1535 1536 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1537 ("%s: wasn't expecting abort reply", __func__)); 1538 1539 INP_WLOCK(inp); 1540 final_cpl_received(toep); 1541 1542 return (0); 1543 } 1544 1545 static int 1546 do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1547 { 1548 struct adapter *sc = iq->adapter; 1549 const struct cpl_rx_data *cpl = mtod(m, const void *); 1550 unsigned int tid = GET_TID(cpl); 1551 struct toepcb *toep = lookup_tid(sc, tid); 1552 struct inpcb *inp = toep->inp; 1553 struct tcpcb *tp; 1554 struct socket *so; 1555 struct sockbuf *sb; 1556 int len; 1557 uint32_t ddp_placed = 0; 1558 1559 if (__predict_false(toep->flags & TPF_SYNQE)) { 1560 #ifdef INVARIANTS 1561 struct synq_entry *synqe = (void *)toep; 1562 1563 INP_WLOCK(synqe->lctx->inp); 1564 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1565 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1566 ("%s: listen socket closed but tid %u not aborted.", 1567 __func__, tid)); 1568 } else { 1569 /* 1570 * do_pass_accept_req is still running and will 1571 * eventually take care of this tid. 1572 */ 1573 } 1574 INP_WUNLOCK(synqe->lctx->inp); 1575 #endif 1576 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1577 toep, toep->flags); 1578 m_freem(m); 1579 return (0); 1580 } 1581 1582 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1583 1584 /* strip off CPL header */ 1585 m_adj(m, sizeof(*cpl)); 1586 len = m->m_pkthdr.len; 1587 1588 INP_WLOCK(inp); 1589 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1590 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1591 __func__, tid, len, inp->inp_flags); 1592 INP_WUNLOCK(inp); 1593 m_freem(m); 1594 return (0); 1595 } 1596 1597 tp = intotcpcb(inp); 1598 1599 if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1600 ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1601 1602 tp->rcv_nxt += len; 1603 if (tp->rcv_wnd < len) { 1604 KASSERT(toep->ulp_mode == ULP_MODE_RDMA, 1605 ("%s: negative window size", __func__)); 1606 } 1607 1608 tp->rcv_wnd -= len; 1609 tp->t_rcvtime = ticks; 1610 1611 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1612 DDP_LOCK(toep); 1613 so = inp_inpcbtosocket(inp); 1614 sb = &so->so_rcv; 1615 SOCKBUF_LOCK(sb); 1616 1617 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1618 CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1619 __func__, tid, len); 1620 m_freem(m); 1621 SOCKBUF_UNLOCK(sb); 1622 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1623 DDP_UNLOCK(toep); 1624 INP_WUNLOCK(inp); 1625 1626 CURVNET_SET(toep->vnet); 1627 INP_INFO_RLOCK(&V_tcbinfo); 1628 INP_WLOCK(inp); 1629 tp = tcp_drop(tp, ECONNRESET); 1630 if (tp) 1631 INP_WUNLOCK(inp); 1632 INP_INFO_RUNLOCK(&V_tcbinfo); 1633 CURVNET_RESTORE(); 1634 1635 return (0); 1636 } 1637 1638 /* receive buffer autosize */ 1639 MPASS(toep->vnet == so->so_vnet); 1640 CURVNET_SET(toep->vnet); 1641 if (sb->sb_flags & SB_AUTOSIZE && 1642 V_tcp_do_autorcvbuf && 1643 sb->sb_hiwat < V_tcp_autorcvbuf_max && 1644 len > (sbspace(sb) / 8 * 7)) { 1645 unsigned int hiwat = sb->sb_hiwat; 1646 unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1647 V_tcp_autorcvbuf_max); 1648 1649 if (!sbreserve_locked(sb, newsize, so, NULL)) 1650 sb->sb_flags &= ~SB_AUTOSIZE; 1651 else 1652 toep->rx_credits += newsize - hiwat; 1653 } 1654 1655 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1656 int changed = !(toep->ddp.flags & DDP_ON) ^ cpl->ddp_off; 1657 1658 if (toep->ddp.waiting_count != 0 || toep->ddp.active_count != 0) 1659 CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", 1660 __func__, tid, len); 1661 1662 if (changed) { 1663 if (toep->ddp.flags & DDP_SC_REQ) 1664 toep->ddp.flags ^= DDP_ON | DDP_SC_REQ; 1665 else { 1666 KASSERT(cpl->ddp_off == 1, 1667 ("%s: DDP switched on by itself.", 1668 __func__)); 1669 1670 /* Fell out of DDP mode */ 1671 toep->ddp.flags &= ~DDP_ON; 1672 CTR1(KTR_CXGBE, "%s: fell out of DDP mode", 1673 __func__); 1674 1675 insert_ddp_data(toep, ddp_placed); 1676 } 1677 } 1678 1679 if (toep->ddp.flags & DDP_ON) { 1680 /* 1681 * CPL_RX_DATA with DDP on can only be an indicate. 1682 * Start posting queued AIO requests via DDP. The 1683 * payload that arrived in this indicate is appended 1684 * to the socket buffer as usual. 1685 */ 1686 handle_ddp_indicate(toep); 1687 } 1688 } 1689 1690 KASSERT(toep->sb_cc >= sbused(sb), 1691 ("%s: sb %p has more data (%d) than last time (%d).", 1692 __func__, sb, sbused(sb), toep->sb_cc)); 1693 toep->rx_credits += toep->sb_cc - sbused(sb); 1694 sbappendstream_locked(sb, m, 0); 1695 toep->sb_cc = sbused(sb); 1696 if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { 1697 int credits; 1698 1699 credits = send_rx_credits(sc, toep, toep->rx_credits); 1700 toep->rx_credits -= credits; 1701 tp->rcv_wnd += credits; 1702 tp->rcv_adv += credits; 1703 } 1704 1705 if (toep->ulp_mode == ULP_MODE_TCPDDP && toep->ddp.waiting_count > 0 && 1706 sbavail(sb) != 0) { 1707 CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, 1708 tid); 1709 ddp_queue_toep(toep); 1710 } 1711 sorwakeup_locked(so); 1712 SOCKBUF_UNLOCK_ASSERT(sb); 1713 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1714 DDP_UNLOCK(toep); 1715 1716 INP_WUNLOCK(inp); 1717 CURVNET_RESTORE(); 1718 return (0); 1719 } 1720 1721 #define S_CPL_FW4_ACK_OPCODE 24 1722 #define M_CPL_FW4_ACK_OPCODE 0xff 1723 #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1724 #define G_CPL_FW4_ACK_OPCODE(x) \ 1725 (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1726 1727 #define S_CPL_FW4_ACK_FLOWID 0 1728 #define M_CPL_FW4_ACK_FLOWID 0xffffff 1729 #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1730 #define G_CPL_FW4_ACK_FLOWID(x) \ 1731 (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1732 1733 #define S_CPL_FW4_ACK_CR 24 1734 #define M_CPL_FW4_ACK_CR 0xff 1735 #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1736 #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1737 1738 #define S_CPL_FW4_ACK_SEQVAL 0 1739 #define M_CPL_FW4_ACK_SEQVAL 0x1 1740 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1741 #define G_CPL_FW4_ACK_SEQVAL(x) \ 1742 (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1743 #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1744 1745 static int 1746 do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1747 { 1748 struct adapter *sc = iq->adapter; 1749 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1750 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1751 struct toepcb *toep = lookup_tid(sc, tid); 1752 struct inpcb *inp; 1753 struct tcpcb *tp; 1754 struct socket *so; 1755 uint8_t credits = cpl->credits; 1756 struct ofld_tx_sdesc *txsd; 1757 int plen; 1758 #ifdef INVARIANTS 1759 unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1760 #endif 1761 1762 /* 1763 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1764 * now this comes back carrying the credits for the flowc. 1765 */ 1766 if (__predict_false(toep->flags & TPF_SYNQE)) { 1767 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1768 ("%s: credits for a synq entry %p", __func__, toep)); 1769 return (0); 1770 } 1771 1772 inp = toep->inp; 1773 1774 KASSERT(opcode == CPL_FW4_ACK, 1775 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1776 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1777 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1778 1779 INP_WLOCK(inp); 1780 1781 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1782 INP_WUNLOCK(inp); 1783 return (0); 1784 } 1785 1786 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1787 ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1788 1789 tp = intotcpcb(inp); 1790 1791 if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1792 tcp_seq snd_una = be32toh(cpl->snd_una); 1793 1794 #ifdef INVARIANTS 1795 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1796 log(LOG_ERR, 1797 "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1798 __func__, snd_una, toep->tid, tp->snd_una); 1799 } 1800 #endif 1801 1802 if (tp->snd_una != snd_una) { 1803 tp->snd_una = snd_una; 1804 tp->ts_recent_age = tcp_ts_getticks(); 1805 } 1806 } 1807 1808 #ifdef VERBOSE_TRACES 1809 CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); 1810 #endif 1811 so = inp->inp_socket; 1812 txsd = &toep->txsd[toep->txsd_cidx]; 1813 plen = 0; 1814 while (credits) { 1815 KASSERT(credits >= txsd->tx_credits, 1816 ("%s: too many (or partial) credits", __func__)); 1817 credits -= txsd->tx_credits; 1818 toep->tx_credits += txsd->tx_credits; 1819 plen += txsd->plen; 1820 if (txsd->iv_buffer) { 1821 free(txsd->iv_buffer, M_CXGBE); 1822 txsd->iv_buffer = NULL; 1823 } 1824 txsd++; 1825 toep->txsd_avail++; 1826 KASSERT(toep->txsd_avail <= toep->txsd_total, 1827 ("%s: txsd avail > total", __func__)); 1828 if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1829 txsd = &toep->txsd[0]; 1830 toep->txsd_cidx = 0; 1831 } 1832 } 1833 1834 if (toep->tx_credits == toep->tx_total) { 1835 toep->tx_nocompl = 0; 1836 toep->plen_nocompl = 0; 1837 } 1838 1839 if (toep->flags & TPF_TX_SUSPENDED && 1840 toep->tx_credits >= toep->tx_total / 4) { 1841 #ifdef VERBOSE_TRACES 1842 CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, 1843 tid); 1844 #endif 1845 toep->flags &= ~TPF_TX_SUSPENDED; 1846 CURVNET_SET(toep->vnet); 1847 if (toep->ulp_mode == ULP_MODE_ISCSI) 1848 t4_push_pdus(sc, toep, plen); 1849 else if (tls_tx_key(toep)) 1850 t4_push_tls_records(sc, toep, plen); 1851 else 1852 t4_push_frames(sc, toep, plen); 1853 CURVNET_RESTORE(); 1854 } else if (plen > 0) { 1855 struct sockbuf *sb = &so->so_snd; 1856 int sbu; 1857 1858 SOCKBUF_LOCK(sb); 1859 sbu = sbused(sb); 1860 if (toep->ulp_mode == ULP_MODE_ISCSI) { 1861 1862 if (__predict_false(sbu > 0)) { 1863 /* 1864 * The data trasmitted before the tid's ULP mode 1865 * changed to ISCSI is still in so_snd. 1866 * Incoming credits should account for so_snd 1867 * first. 1868 */ 1869 sbdrop_locked(sb, min(sbu, plen)); 1870 plen -= min(sbu, plen); 1871 } 1872 sowwakeup_locked(so); /* unlocks so_snd */ 1873 rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); 1874 } else { 1875 #ifdef VERBOSE_TRACES 1876 CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, 1877 tid, plen); 1878 #endif 1879 sbdrop_locked(sb, plen); 1880 if (tls_tx_key(toep)) { 1881 struct tls_ofld_info *tls_ofld = &toep->tls; 1882 1883 MPASS(tls_ofld->sb_off >= plen); 1884 tls_ofld->sb_off -= plen; 1885 } 1886 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 1887 t4_aiotx_queue_toep(toep); 1888 sowwakeup_locked(so); /* unlocks so_snd */ 1889 } 1890 SOCKBUF_UNLOCK_ASSERT(sb); 1891 } 1892 1893 INP_WUNLOCK(inp); 1894 1895 return (0); 1896 } 1897 1898 int 1899 do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1900 { 1901 struct adapter *sc = iq->adapter; 1902 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 1903 unsigned int tid = GET_TID(cpl); 1904 struct toepcb *toep; 1905 #ifdef INVARIANTS 1906 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1907 #endif 1908 1909 KASSERT(opcode == CPL_SET_TCB_RPL, 1910 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1911 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1912 MPASS(iq != &sc->sge.fwq); 1913 1914 toep = lookup_tid(sc, tid); 1915 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1916 handle_ddp_tcb_rpl(toep, cpl); 1917 return (0); 1918 } 1919 1920 /* 1921 * TOM and/or other ULPs don't request replies for CPL_SET_TCB or 1922 * CPL_SET_TCB_FIELD requests. This can easily change and when it does 1923 * the dispatch code will go here. 1924 */ 1925 #ifdef INVARIANTS 1926 panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__, 1927 tid, iq); 1928 #else 1929 log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n", 1930 __func__, tid, iq); 1931 #endif 1932 1933 return (0); 1934 } 1935 1936 void 1937 t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, struct toepcb *toep, 1938 uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie) 1939 { 1940 struct wrqe *wr; 1941 struct cpl_set_tcb_field *req; 1942 struct ofld_tx_sdesc *txsd; 1943 1944 MPASS((cookie & ~M_COOKIE) == 0); 1945 1946 wr = alloc_wrqe(sizeof(*req), wrq); 1947 if (wr == NULL) { 1948 /* XXX */ 1949 panic("%s: allocation failure.", __func__); 1950 } 1951 req = wrtod(wr); 1952 1953 INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, toep->tid); 1954 req->reply_ctrl = htobe16(V_QUEUENO(toep->ofld_rxq->iq.abs_id)); 1955 if (reply == 0) 1956 req->reply_ctrl |= htobe16(F_NO_REPLY); 1957 req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); 1958 req->mask = htobe64(mask); 1959 req->val = htobe64(val); 1960 if ((wrq->eq.flags & EQ_TYPEMASK) == EQ_OFLD) { 1961 txsd = &toep->txsd[toep->txsd_pidx]; 1962 txsd->tx_credits = howmany(sizeof(*req), 16); 1963 txsd->plen = 0; 1964 KASSERT(toep->tx_credits >= txsd->tx_credits && 1965 toep->txsd_avail > 0, 1966 ("%s: not enough credits (%d)", __func__, 1967 toep->tx_credits)); 1968 toep->tx_credits -= txsd->tx_credits; 1969 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 1970 toep->txsd_pidx = 0; 1971 toep->txsd_avail--; 1972 } 1973 1974 t4_wrq_tx(sc, wr); 1975 } 1976 1977 void 1978 t4_init_cpl_io_handlers(void) 1979 { 1980 1981 t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 1982 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 1983 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 1984 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 1985 t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); 1986 t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); 1987 } 1988 1989 void 1990 t4_uninit_cpl_io_handlers(void) 1991 { 1992 1993 t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); 1994 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); 1995 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); 1996 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); 1997 t4_register_cpl_handler(CPL_RX_DATA, NULL); 1998 t4_register_cpl_handler(CPL_FW4_ACK, NULL); 1999 } 2000 2001 /* 2002 * Use the 'backend3' field in AIO jobs to store the amount of data 2003 * sent by the AIO job so far and the 'backend4' field to hold an 2004 * error that should be reported when the job is completed. 2005 */ 2006 #define aio_sent backend3 2007 #define aio_error backend4 2008 2009 #define jobtotid(job) \ 2010 (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) 2011 2012 static void 2013 free_aiotx_buffer(struct aiotx_buffer *ab) 2014 { 2015 struct kaiocb *job; 2016 long status; 2017 int error; 2018 2019 if (refcount_release(&ab->refcount) == 0) 2020 return; 2021 2022 job = ab->job; 2023 error = job->aio_error; 2024 status = job->aio_sent; 2025 vm_page_unhold_pages(ab->ps.pages, ab->ps.npages); 2026 free(ab, M_CXGBE); 2027 #ifdef VERBOSE_TRACES 2028 CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, 2029 jobtotid(job), job, status, error); 2030 #endif 2031 if (error == ECANCELED && status != 0) 2032 error = 0; 2033 if (error == ECANCELED) 2034 aio_cancel(job); 2035 else if (error) 2036 aio_complete(job, -1, error); 2037 else 2038 aio_complete(job, status, 0); 2039 } 2040 2041 static void 2042 t4_aiotx_mbuf_free(struct mbuf *m) 2043 { 2044 struct aiotx_buffer *ab = m->m_ext.ext_arg1; 2045 2046 #ifdef VERBOSE_TRACES 2047 CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, 2048 m->m_len, jobtotid(ab->job)); 2049 #endif 2050 free_aiotx_buffer(ab); 2051 } 2052 2053 /* 2054 * Hold the buffer backing an AIO request and return an AIO transmit 2055 * buffer. 2056 */ 2057 static int 2058 hold_aio(struct kaiocb *job) 2059 { 2060 struct aiotx_buffer *ab; 2061 struct vmspace *vm; 2062 vm_map_t map; 2063 vm_offset_t start, end, pgoff; 2064 int n; 2065 2066 MPASS(job->backend1 == NULL); 2067 2068 /* 2069 * The AIO subsystem will cancel and drain all requests before 2070 * permitting a process to exit or exec, so p_vmspace should 2071 * be stable here. 2072 */ 2073 vm = job->userproc->p_vmspace; 2074 map = &vm->vm_map; 2075 start = (uintptr_t)job->uaiocb.aio_buf; 2076 pgoff = start & PAGE_MASK; 2077 end = round_page(start + job->uaiocb.aio_nbytes); 2078 start = trunc_page(start); 2079 n = atop(end - start); 2080 2081 ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | 2082 M_ZERO); 2083 refcount_init(&ab->refcount, 1); 2084 ab->ps.pages = (vm_page_t *)(ab + 1); 2085 ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start, 2086 VM_PROT_WRITE, ab->ps.pages, n); 2087 if (ab->ps.npages < 0) { 2088 free(ab, M_CXGBE); 2089 return (EFAULT); 2090 } 2091 2092 KASSERT(ab->ps.npages == n, 2093 ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n)); 2094 2095 ab->ps.offset = pgoff; 2096 ab->ps.len = job->uaiocb.aio_nbytes; 2097 ab->job = job; 2098 job->backend1 = ab; 2099 #ifdef VERBOSE_TRACES 2100 CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", 2101 __func__, jobtotid(job), &ab->ps, job, ab->ps.npages); 2102 #endif 2103 return (0); 2104 } 2105 2106 static void 2107 t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) 2108 { 2109 struct adapter *sc; 2110 struct sockbuf *sb; 2111 struct file *fp; 2112 struct aiotx_buffer *ab; 2113 struct inpcb *inp; 2114 struct tcpcb *tp; 2115 struct mbuf *m; 2116 int error; 2117 bool moretocome, sendmore; 2118 2119 sc = td_adapter(toep->td); 2120 sb = &so->so_snd; 2121 SOCKBUF_UNLOCK(sb); 2122 fp = job->fd_file; 2123 ab = job->backend1; 2124 m = NULL; 2125 2126 #ifdef MAC 2127 error = mac_socket_check_send(fp->f_cred, so); 2128 if (error != 0) 2129 goto out; 2130 #endif 2131 2132 if (ab == NULL) { 2133 error = hold_aio(job); 2134 if (error != 0) 2135 goto out; 2136 ab = job->backend1; 2137 } 2138 2139 /* Inline sosend_generic(). */ 2140 2141 job->msgsnd = 1; 2142 2143 error = sblock(sb, SBL_WAIT); 2144 MPASS(error == 0); 2145 2146 sendanother: 2147 m = m_get(M_WAITOK, MT_DATA); 2148 2149 SOCKBUF_LOCK(sb); 2150 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2151 SOCKBUF_UNLOCK(sb); 2152 sbunlock(sb); 2153 if ((so->so_options & SO_NOSIGPIPE) == 0) { 2154 PROC_LOCK(job->userproc); 2155 kern_psignal(job->userproc, SIGPIPE); 2156 PROC_UNLOCK(job->userproc); 2157 } 2158 error = EPIPE; 2159 goto out; 2160 } 2161 if (so->so_error) { 2162 error = so->so_error; 2163 so->so_error = 0; 2164 SOCKBUF_UNLOCK(sb); 2165 sbunlock(sb); 2166 goto out; 2167 } 2168 if ((so->so_state & SS_ISCONNECTED) == 0) { 2169 SOCKBUF_UNLOCK(sb); 2170 sbunlock(sb); 2171 error = ENOTCONN; 2172 goto out; 2173 } 2174 if (sbspace(sb) < sb->sb_lowat) { 2175 MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); 2176 2177 /* 2178 * Don't block if there is too little room in the socket 2179 * buffer. Instead, requeue the request. 2180 */ 2181 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2182 SOCKBUF_UNLOCK(sb); 2183 sbunlock(sb); 2184 error = ECANCELED; 2185 goto out; 2186 } 2187 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2188 SOCKBUF_UNLOCK(sb); 2189 sbunlock(sb); 2190 goto out; 2191 } 2192 2193 /* 2194 * Write as much data as the socket permits, but no more than a 2195 * a single sndbuf at a time. 2196 */ 2197 m->m_len = sbspace(sb); 2198 if (m->m_len > ab->ps.len - job->aio_sent) { 2199 m->m_len = ab->ps.len - job->aio_sent; 2200 moretocome = false; 2201 } else 2202 moretocome = true; 2203 if (m->m_len > sc->tt.sndbuf) { 2204 m->m_len = sc->tt.sndbuf; 2205 sendmore = true; 2206 } else 2207 sendmore = false; 2208 2209 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 2210 moretocome = true; 2211 SOCKBUF_UNLOCK(sb); 2212 MPASS(m->m_len != 0); 2213 2214 /* Inlined tcp_usr_send(). */ 2215 2216 inp = toep->inp; 2217 INP_WLOCK(inp); 2218 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 2219 INP_WUNLOCK(inp); 2220 sbunlock(sb); 2221 error = ECONNRESET; 2222 goto out; 2223 } 2224 2225 refcount_acquire(&ab->refcount); 2226 m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab, 2227 (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV); 2228 m->m_ext.ext_flags |= EXT_FLAG_AIOTX; 2229 job->aio_sent += m->m_len; 2230 2231 sbappendstream(sb, m, 0); 2232 m = NULL; 2233 2234 if (!(inp->inp_flags & INP_DROPPED)) { 2235 tp = intotcpcb(inp); 2236 if (moretocome) 2237 tp->t_flags |= TF_MORETOCOME; 2238 error = tp->t_fb->tfb_tcp_output(tp); 2239 if (moretocome) 2240 tp->t_flags &= ~TF_MORETOCOME; 2241 } 2242 2243 INP_WUNLOCK(inp); 2244 if (sendmore) 2245 goto sendanother; 2246 sbunlock(sb); 2247 2248 if (error) 2249 goto out; 2250 2251 /* 2252 * If this is a non-blocking socket and the request has not 2253 * been fully completed, requeue it until the socket is ready 2254 * again. 2255 */ 2256 if (job->aio_sent < job->uaiocb.aio_nbytes && 2257 !(so->so_state & SS_NBIO)) { 2258 SOCKBUF_LOCK(sb); 2259 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2260 SOCKBUF_UNLOCK(sb); 2261 error = ECANCELED; 2262 goto out; 2263 } 2264 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2265 return; 2266 } 2267 2268 /* 2269 * If the request will not be requeued, drop a reference on 2270 * the aiotx buffer. Any mbufs in flight should still 2271 * contain a reference, but this drops the reference that the 2272 * job owns while it is waiting to queue mbufs to the socket. 2273 */ 2274 free_aiotx_buffer(ab); 2275 2276 out: 2277 if (error) { 2278 if (ab != NULL) { 2279 job->aio_error = error; 2280 free_aiotx_buffer(ab); 2281 } else { 2282 MPASS(job->aio_sent == 0); 2283 aio_complete(job, -1, error); 2284 } 2285 } 2286 if (m != NULL) 2287 m_free(m); 2288 SOCKBUF_LOCK(sb); 2289 } 2290 2291 static void 2292 t4_aiotx_task(void *context, int pending) 2293 { 2294 struct toepcb *toep = context; 2295 struct inpcb *inp = toep->inp; 2296 struct socket *so = inp->inp_socket; 2297 struct kaiocb *job; 2298 2299 CURVNET_SET(toep->vnet); 2300 SOCKBUF_LOCK(&so->so_snd); 2301 while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { 2302 job = TAILQ_FIRST(&toep->aiotx_jobq); 2303 TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2304 if (!aio_clear_cancel_function(job)) 2305 continue; 2306 2307 t4_aiotx_process_job(toep, so, job); 2308 } 2309 toep->aiotx_task_active = false; 2310 SOCKBUF_UNLOCK(&so->so_snd); 2311 CURVNET_RESTORE(); 2312 2313 free_toepcb(toep); 2314 } 2315 2316 static void 2317 t4_aiotx_queue_toep(struct toepcb *toep) 2318 { 2319 2320 SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); 2321 #ifdef VERBOSE_TRACES 2322 CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", 2323 __func__, toep->tid, toep->aiotx_task_active ? "true" : "false"); 2324 #endif 2325 if (toep->aiotx_task_active) 2326 return; 2327 toep->aiotx_task_active = true; 2328 hold_toepcb(toep); 2329 soaio_enqueue(&toep->aiotx_task); 2330 } 2331 2332 static void 2333 t4_aiotx_cancel(struct kaiocb *job) 2334 { 2335 struct aiotx_buffer *ab; 2336 struct socket *so; 2337 struct sockbuf *sb; 2338 struct tcpcb *tp; 2339 struct toepcb *toep; 2340 2341 so = job->fd_file->f_data; 2342 tp = so_sototcpcb(so); 2343 toep = tp->t_toe; 2344 MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); 2345 sb = &so->so_snd; 2346 2347 SOCKBUF_LOCK(sb); 2348 if (!aio_cancel_cleared(job)) 2349 TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2350 SOCKBUF_UNLOCK(sb); 2351 2352 ab = job->backend1; 2353 if (ab != NULL) 2354 free_aiotx_buffer(ab); 2355 else 2356 aio_cancel(job); 2357 } 2358 2359 int 2360 t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) 2361 { 2362 struct tcpcb *tp = so_sototcpcb(so); 2363 struct toepcb *toep = tp->t_toe; 2364 struct adapter *sc = td_adapter(toep->td); 2365 2366 /* This only handles writes. */ 2367 if (job->uaiocb.aio_lio_opcode != LIO_WRITE) 2368 return (EOPNOTSUPP); 2369 2370 if (!sc->tt.tx_zcopy) 2371 return (EOPNOTSUPP); 2372 2373 if (tls_tx_key(toep)) 2374 return (EOPNOTSUPP); 2375 2376 SOCKBUF_LOCK(&so->so_snd); 2377 #ifdef VERBOSE_TRACES 2378 CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); 2379 #endif 2380 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) 2381 panic("new job was cancelled"); 2382 TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); 2383 if (sowriteable(so)) 2384 t4_aiotx_queue_toep(toep); 2385 SOCKBUF_UNLOCK(&so->so_snd); 2386 return (0); 2387 } 2388 2389 void 2390 aiotx_init_toep(struct toepcb *toep) 2391 { 2392 2393 TAILQ_INIT(&toep->aiotx_jobq); 2394 TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); 2395 } 2396 #endif 2397