1 /*- 2 * Copyright (c) 2012, 2015 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 #include "opt_ratelimit.h" 34 35 #ifdef TCP_OFFLOAD 36 #include <sys/param.h> 37 #include <sys/aio.h> 38 #include <sys/file.h> 39 #include <sys/kernel.h> 40 #include <sys/ktr.h> 41 #include <sys/module.h> 42 #include <sys/proc.h> 43 #include <sys/protosw.h> 44 #include <sys/domain.h> 45 #include <sys/socket.h> 46 #include <sys/socketvar.h> 47 #include <sys/sglist.h> 48 #include <sys/taskqueue.h> 49 #include <netinet/in.h> 50 #include <netinet/in_pcb.h> 51 #include <netinet/ip.h> 52 #include <netinet/ip6.h> 53 #define TCPSTATES 54 #include <netinet/tcp_fsm.h> 55 #include <netinet/tcp_seq.h> 56 #include <netinet/tcp_var.h> 57 #include <netinet/toecore.h> 58 59 #include <security/mac/mac_framework.h> 60 61 #include <vm/vm.h> 62 #include <vm/vm_extern.h> 63 #include <vm/pmap.h> 64 #include <vm/vm_map.h> 65 #include <vm/vm_page.h> 66 67 #include "common/common.h" 68 #include "common/t4_msg.h" 69 #include "common/t4_regs.h" 70 #include "common/t4_tcb.h" 71 #include "tom/t4_tom_l2t.h" 72 #include "tom/t4_tom.h" 73 74 VNET_DECLARE(int, tcp_do_autosndbuf); 75 #define V_tcp_do_autosndbuf VNET(tcp_do_autosndbuf) 76 VNET_DECLARE(int, tcp_autosndbuf_inc); 77 #define V_tcp_autosndbuf_inc VNET(tcp_autosndbuf_inc) 78 VNET_DECLARE(int, tcp_autosndbuf_max); 79 #define V_tcp_autosndbuf_max VNET(tcp_autosndbuf_max) 80 VNET_DECLARE(int, tcp_do_autorcvbuf); 81 #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 82 VNET_DECLARE(int, tcp_autorcvbuf_inc); 83 #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 84 VNET_DECLARE(int, tcp_autorcvbuf_max); 85 #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 86 87 #define IS_AIOTX_MBUF(m) \ 88 ((m)->m_flags & M_EXT && (m)->m_ext.ext_flags & EXT_FLAG_AIOTX) 89 90 static void t4_aiotx_cancel(struct kaiocb *job); 91 static void t4_aiotx_queue_toep(struct toepcb *toep); 92 93 static size_t 94 aiotx_mbuf_pgoff(struct mbuf *m) 95 { 96 struct aiotx_buffer *ab; 97 98 MPASS(IS_AIOTX_MBUF(m)); 99 ab = m->m_ext.ext_arg1; 100 return ((ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) % PAGE_SIZE); 101 } 102 103 static vm_page_t * 104 aiotx_mbuf_pages(struct mbuf *m) 105 { 106 struct aiotx_buffer *ab; 107 int npages; 108 109 MPASS(IS_AIOTX_MBUF(m)); 110 ab = m->m_ext.ext_arg1; 111 npages = (ab->ps.offset + (uintptr_t)m->m_ext.ext_arg2) / PAGE_SIZE; 112 return (ab->ps.pages + npages); 113 } 114 115 void 116 send_flowc_wr(struct toepcb *toep, struct flowc_tx_params *ftxp) 117 { 118 struct wrqe *wr; 119 struct fw_flowc_wr *flowc; 120 unsigned int nparams = ftxp ? 8 : 6, flowclen; 121 struct vi_info *vi = toep->vi; 122 struct port_info *pi = vi->pi; 123 struct adapter *sc = pi->adapter; 124 unsigned int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; 125 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 126 127 KASSERT(!(toep->flags & TPF_FLOWC_WR_SENT), 128 ("%s: flowc for tid %u sent already", __func__, toep->tid)); 129 130 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 131 132 wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 133 if (wr == NULL) { 134 /* XXX */ 135 panic("%s: allocation failure.", __func__); 136 } 137 flowc = wrtod(wr); 138 memset(flowc, 0, wr->wr_len); 139 140 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 141 V_FW_FLOWC_WR_NPARAMS(nparams)); 142 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 143 V_FW_WR_FLOWID(toep->tid)); 144 145 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; 146 flowc->mnemval[0].val = htobe32(pfvf); 147 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; 148 flowc->mnemval[1].val = htobe32(pi->tx_chan); 149 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; 150 flowc->mnemval[2].val = htobe32(pi->tx_chan); 151 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; 152 flowc->mnemval[3].val = htobe32(toep->ofld_rxq->iq.abs_id); 153 if (ftxp) { 154 uint32_t sndbuf = min(ftxp->snd_space, sc->tt.sndbuf); 155 156 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT; 157 flowc->mnemval[4].val = htobe32(ftxp->snd_nxt); 158 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT; 159 flowc->mnemval[5].val = htobe32(ftxp->rcv_nxt); 160 flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF; 161 flowc->mnemval[6].val = htobe32(sndbuf); 162 flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS; 163 flowc->mnemval[7].val = htobe32(ftxp->mss); 164 165 CTR6(KTR_CXGBE, 166 "%s: tid %u, mss %u, sndbuf %u, snd_nxt 0x%x, rcv_nxt 0x%x", 167 __func__, toep->tid, ftxp->mss, sndbuf, ftxp->snd_nxt, 168 ftxp->rcv_nxt); 169 } else { 170 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF; 171 flowc->mnemval[4].val = htobe32(512); 172 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS; 173 flowc->mnemval[5].val = htobe32(512); 174 175 CTR2(KTR_CXGBE, "%s: tid %u", __func__, toep->tid); 176 } 177 178 txsd->tx_credits = howmany(flowclen, 16); 179 txsd->plen = 0; 180 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 181 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 182 toep->tx_credits -= txsd->tx_credits; 183 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 184 toep->txsd_pidx = 0; 185 toep->txsd_avail--; 186 187 toep->flags |= TPF_FLOWC_WR_SENT; 188 t4_wrq_tx(sc, wr); 189 } 190 191 #ifdef RATELIMIT 192 /* 193 * Input is Bytes/second (so_max_pacing-rate), chip counts in Kilobits/second. 194 */ 195 static int 196 update_tx_rate_limit(struct adapter *sc, struct toepcb *toep, u_int Bps) 197 { 198 int tc_idx, rc; 199 const u_int kbps = (u_int) (uint64_t)Bps * 8ULL / 1000; 200 const int port_id = toep->vi->pi->port_id; 201 202 CTR3(KTR_CXGBE, "%s: tid %u, rate %uKbps", __func__, toep->tid, kbps); 203 204 if (kbps == 0) { 205 /* unbind */ 206 tc_idx = -1; 207 } else { 208 rc = t4_reserve_cl_rl_kbps(sc, port_id, kbps, &tc_idx); 209 if (rc != 0) 210 return (rc); 211 MPASS(tc_idx >= 0 && tc_idx < sc->chip_params->nsched_cls); 212 } 213 214 if (toep->tc_idx != tc_idx) { 215 struct wrqe *wr; 216 struct fw_flowc_wr *flowc; 217 int nparams = 1, flowclen, flowclen16; 218 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 219 220 flowclen = sizeof(*flowc) + nparams * sizeof(struct 221 fw_flowc_mnemval); 222 flowclen16 = howmany(flowclen, 16); 223 if (toep->tx_credits < flowclen16 || toep->txsd_avail == 0 || 224 (wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq)) == NULL) { 225 if (tc_idx >= 0) 226 t4_release_cl_rl_kbps(sc, port_id, tc_idx); 227 return (ENOMEM); 228 } 229 230 flowc = wrtod(wr); 231 memset(flowc, 0, wr->wr_len); 232 233 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 234 V_FW_FLOWC_WR_NPARAMS(nparams)); 235 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(flowclen16) | 236 V_FW_WR_FLOWID(toep->tid)); 237 238 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; 239 if (tc_idx == -1) 240 flowc->mnemval[0].val = htobe32(0xff); 241 else 242 flowc->mnemval[0].val = htobe32(tc_idx); 243 244 txsd->tx_credits = flowclen16; 245 txsd->plen = 0; 246 toep->tx_credits -= txsd->tx_credits; 247 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 248 toep->txsd_pidx = 0; 249 toep->txsd_avail--; 250 t4_wrq_tx(sc, wr); 251 } 252 253 if (toep->tc_idx >= 0) 254 t4_release_cl_rl_kbps(sc, port_id, toep->tc_idx); 255 toep->tc_idx = tc_idx; 256 257 return (0); 258 } 259 #endif 260 261 void 262 send_reset(struct adapter *sc, struct toepcb *toep, uint32_t snd_nxt) 263 { 264 struct wrqe *wr; 265 struct cpl_abort_req *req; 266 int tid = toep->tid; 267 struct inpcb *inp = toep->inp; 268 struct tcpcb *tp = intotcpcb(inp); /* don't use if INP_DROPPED */ 269 270 INP_WLOCK_ASSERT(inp); 271 272 CTR6(KTR_CXGBE, "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x%s", 273 __func__, toep->tid, 274 inp->inp_flags & INP_DROPPED ? "inp dropped" : 275 tcpstates[tp->t_state], 276 toep->flags, inp->inp_flags, 277 toep->flags & TPF_ABORT_SHUTDOWN ? 278 " (abort already in progress)" : ""); 279 280 if (toep->flags & TPF_ABORT_SHUTDOWN) 281 return; /* abort already in progress */ 282 283 toep->flags |= TPF_ABORT_SHUTDOWN; 284 285 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 286 ("%s: flowc_wr not sent for tid %d.", __func__, tid)); 287 288 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 289 if (wr == NULL) { 290 /* XXX */ 291 panic("%s: allocation failure.", __func__); 292 } 293 req = wrtod(wr); 294 295 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, tid); 296 if (inp->inp_flags & INP_DROPPED) 297 req->rsvd0 = htobe32(snd_nxt); 298 else 299 req->rsvd0 = htobe32(tp->snd_nxt); 300 req->rsvd1 = !(toep->flags & TPF_TX_DATA_SENT); 301 req->cmd = CPL_ABORT_SEND_RST; 302 303 /* 304 * XXX: What's the correct way to tell that the inp hasn't been detached 305 * from its socket? Should I even be flushing the snd buffer here? 306 */ 307 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 308 struct socket *so = inp->inp_socket; 309 310 if (so != NULL) /* because I'm not sure. See comment above */ 311 sbflush(&so->so_snd); 312 } 313 314 t4_l2t_send(sc, wr, toep->l2te); 315 } 316 317 /* 318 * Called when a connection is established to translate the TCP options 319 * reported by HW to FreeBSD's native format. 320 */ 321 static void 322 assign_rxopt(struct tcpcb *tp, unsigned int opt) 323 { 324 struct toepcb *toep = tp->t_toe; 325 struct inpcb *inp = tp->t_inpcb; 326 struct adapter *sc = td_adapter(toep->td); 327 int n; 328 329 INP_LOCK_ASSERT(inp); 330 331 if (inp->inp_inc.inc_flags & INC_ISIPV6) 332 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 333 else 334 n = sizeof(struct ip) + sizeof(struct tcphdr); 335 if (V_tcp_do_rfc1323) 336 n += TCPOLEN_TSTAMP_APPA; 337 tp->t_maxseg = sc->params.mtus[G_TCPOPT_MSS(opt)] - n; 338 339 CTR4(KTR_CXGBE, "%s: tid %d, mtu_idx %u (%u)", __func__, toep->tid, 340 G_TCPOPT_MSS(opt), sc->params.mtus[G_TCPOPT_MSS(opt)]); 341 342 if (G_TCPOPT_TSTAMP(opt)) { 343 tp->t_flags |= TF_RCVD_TSTMP; /* timestamps ok */ 344 tp->ts_recent = 0; /* hmmm */ 345 tp->ts_recent_age = tcp_ts_getticks(); 346 } 347 348 if (G_TCPOPT_SACK(opt)) 349 tp->t_flags |= TF_SACK_PERMIT; /* should already be set */ 350 else 351 tp->t_flags &= ~TF_SACK_PERMIT; /* sack disallowed by peer */ 352 353 if (G_TCPOPT_WSCALE_OK(opt)) 354 tp->t_flags |= TF_RCVD_SCALE; 355 356 /* Doing window scaling? */ 357 if ((tp->t_flags & (TF_RCVD_SCALE | TF_REQ_SCALE)) == 358 (TF_RCVD_SCALE | TF_REQ_SCALE)) { 359 tp->rcv_scale = tp->request_r_scale; 360 tp->snd_scale = G_TCPOPT_SND_WSCALE(opt); 361 } 362 } 363 364 /* 365 * Completes some final bits of initialization for just established connections 366 * and changes their state to TCPS_ESTABLISHED. 367 * 368 * The ISNs are from after the exchange of SYNs. i.e., the true ISN + 1. 369 */ 370 void 371 make_established(struct toepcb *toep, uint32_t snd_isn, uint32_t rcv_isn, 372 uint16_t opt) 373 { 374 struct inpcb *inp = toep->inp; 375 struct socket *so = inp->inp_socket; 376 struct tcpcb *tp = intotcpcb(inp); 377 long bufsize; 378 uint32_t iss = be32toh(snd_isn) - 1; /* true ISS */ 379 uint32_t irs = be32toh(rcv_isn) - 1; /* true IRS */ 380 uint16_t tcpopt = be16toh(opt); 381 struct flowc_tx_params ftxp; 382 383 INP_WLOCK_ASSERT(inp); 384 KASSERT(tp->t_state == TCPS_SYN_SENT || 385 tp->t_state == TCPS_SYN_RECEIVED, 386 ("%s: TCP state %s", __func__, tcpstates[tp->t_state])); 387 388 CTR6(KTR_CXGBE, "%s: tid %d, so %p, inp %p, tp %p, toep %p", 389 __func__, toep->tid, so, inp, tp, toep); 390 391 tp->t_state = TCPS_ESTABLISHED; 392 tp->t_starttime = ticks; 393 TCPSTAT_INC(tcps_connects); 394 395 tp->irs = irs; 396 tcp_rcvseqinit(tp); 397 tp->rcv_wnd = toep->rx_credits << 10; 398 tp->rcv_adv += tp->rcv_wnd; 399 tp->last_ack_sent = tp->rcv_nxt; 400 401 /* 402 * If we were unable to send all rx credits via opt0, save the remainder 403 * in rx_credits so that they can be handed over with the next credit 404 * update. 405 */ 406 SOCKBUF_LOCK(&so->so_rcv); 407 bufsize = select_rcv_wnd(so); 408 SOCKBUF_UNLOCK(&so->so_rcv); 409 toep->rx_credits = bufsize - tp->rcv_wnd; 410 411 tp->iss = iss; 412 tcp_sendseqinit(tp); 413 tp->snd_una = iss + 1; 414 tp->snd_nxt = iss + 1; 415 tp->snd_max = iss + 1; 416 417 assign_rxopt(tp, tcpopt); 418 419 SOCKBUF_LOCK(&so->so_snd); 420 if (so->so_snd.sb_flags & SB_AUTOSIZE && V_tcp_do_autosndbuf) 421 bufsize = V_tcp_autosndbuf_max; 422 else 423 bufsize = sbspace(&so->so_snd); 424 SOCKBUF_UNLOCK(&so->so_snd); 425 426 ftxp.snd_nxt = tp->snd_nxt; 427 ftxp.rcv_nxt = tp->rcv_nxt; 428 ftxp.snd_space = bufsize; 429 ftxp.mss = tp->t_maxseg; 430 send_flowc_wr(toep, &ftxp); 431 432 soisconnected(so); 433 } 434 435 static int 436 send_rx_credits(struct adapter *sc, struct toepcb *toep, int credits) 437 { 438 struct wrqe *wr; 439 struct cpl_rx_data_ack *req; 440 uint32_t dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 441 442 KASSERT(credits >= 0, ("%s: %d credits", __func__, credits)); 443 444 wr = alloc_wrqe(sizeof(*req), toep->ctrlq); 445 if (wr == NULL) 446 return (0); 447 req = wrtod(wr); 448 449 INIT_TP_WR_MIT_CPL(req, CPL_RX_DATA_ACK, toep->tid); 450 req->credit_dack = htobe32(dack | V_RX_CREDITS(credits)); 451 452 t4_wrq_tx(sc, wr); 453 return (credits); 454 } 455 456 void 457 t4_rcvd_locked(struct toedev *tod, struct tcpcb *tp) 458 { 459 struct adapter *sc = tod->tod_softc; 460 struct inpcb *inp = tp->t_inpcb; 461 struct socket *so = inp->inp_socket; 462 struct sockbuf *sb = &so->so_rcv; 463 struct toepcb *toep = tp->t_toe; 464 int credits; 465 466 INP_WLOCK_ASSERT(inp); 467 468 SOCKBUF_LOCK_ASSERT(sb); 469 KASSERT(toep->sb_cc >= sbused(sb), 470 ("%s: sb %p has more data (%d) than last time (%d).", 471 __func__, sb, sbused(sb), toep->sb_cc)); 472 473 toep->rx_credits += toep->sb_cc - sbused(sb); 474 toep->sb_cc = sbused(sb); 475 476 if (toep->rx_credits > 0 && 477 (tp->rcv_wnd <= 32 * 1024 || toep->rx_credits >= 64 * 1024 || 478 (toep->rx_credits >= 16 * 1024 && tp->rcv_wnd <= 128 * 1024) || 479 toep->sb_cc + tp->rcv_wnd < sb->sb_lowat)) { 480 481 credits = send_rx_credits(sc, toep, toep->rx_credits); 482 toep->rx_credits -= credits; 483 tp->rcv_wnd += credits; 484 tp->rcv_adv += credits; 485 } 486 } 487 488 void 489 t4_rcvd(struct toedev *tod, struct tcpcb *tp) 490 { 491 struct inpcb *inp = tp->t_inpcb; 492 struct socket *so = inp->inp_socket; 493 struct sockbuf *sb = &so->so_rcv; 494 495 SOCKBUF_LOCK(sb); 496 t4_rcvd_locked(tod, tp); 497 SOCKBUF_UNLOCK(sb); 498 } 499 500 /* 501 * Close a connection by sending a CPL_CLOSE_CON_REQ message. 502 */ 503 static int 504 close_conn(struct adapter *sc, struct toepcb *toep) 505 { 506 struct wrqe *wr; 507 struct cpl_close_con_req *req; 508 unsigned int tid = toep->tid; 509 510 CTR3(KTR_CXGBE, "%s: tid %u%s", __func__, toep->tid, 511 toep->flags & TPF_FIN_SENT ? ", IGNORED" : ""); 512 513 if (toep->flags & TPF_FIN_SENT) 514 return (0); 515 516 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 517 ("%s: flowc_wr not sent for tid %u.", __func__, tid)); 518 519 wr = alloc_wrqe(sizeof(*req), toep->ofld_txq); 520 if (wr == NULL) { 521 /* XXX */ 522 panic("%s: allocation failure.", __func__); 523 } 524 req = wrtod(wr); 525 526 req->wr.wr_hi = htonl(V_FW_WR_OP(FW_TP_WR) | 527 V_FW_WR_IMMDLEN(sizeof(*req) - sizeof(req->wr))); 528 req->wr.wr_mid = htonl(V_FW_WR_LEN16(howmany(sizeof(*req), 16)) | 529 V_FW_WR_FLOWID(tid)); 530 req->wr.wr_lo = cpu_to_be64(0); 531 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 532 req->rsvd = 0; 533 534 toep->flags |= TPF_FIN_SENT; 535 toep->flags &= ~TPF_SEND_FIN; 536 t4_l2t_send(sc, wr, toep->l2te); 537 538 return (0); 539 } 540 541 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16) 542 #define MIN_OFLD_TX_CREDITS (howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16)) 543 544 /* Maximum amount of immediate data we could stuff in a WR */ 545 static inline int 546 max_imm_payload(int tx_credits) 547 { 548 const int n = 2; /* Use only up to 2 desc for imm. data WR */ 549 550 KASSERT(tx_credits >= 0 && 551 tx_credits <= MAX_OFLD_TX_CREDITS, 552 ("%s: %d credits", __func__, tx_credits)); 553 554 if (tx_credits < MIN_OFLD_TX_CREDITS) 555 return (0); 556 557 if (tx_credits >= (n * EQ_ESIZE) / 16) 558 return ((n * EQ_ESIZE) - sizeof(struct fw_ofld_tx_data_wr)); 559 else 560 return (tx_credits * 16 - sizeof(struct fw_ofld_tx_data_wr)); 561 } 562 563 /* Maximum number of SGL entries we could stuff in a WR */ 564 static inline int 565 max_dsgl_nsegs(int tx_credits) 566 { 567 int nseg = 1; /* ulptx_sgl has room for 1, rest ulp_tx_sge_pair */ 568 int sge_pair_credits = tx_credits - MIN_OFLD_TX_CREDITS; 569 570 KASSERT(tx_credits >= 0 && 571 tx_credits <= MAX_OFLD_TX_CREDITS, 572 ("%s: %d credits", __func__, tx_credits)); 573 574 if (tx_credits < MIN_OFLD_TX_CREDITS) 575 return (0); 576 577 nseg += 2 * (sge_pair_credits * 16 / 24); 578 if ((sge_pair_credits * 16) % 24 == 16) 579 nseg++; 580 581 return (nseg); 582 } 583 584 static inline void 585 write_tx_wr(void *dst, struct toepcb *toep, unsigned int immdlen, 586 unsigned int plen, uint8_t credits, int shove, int ulp_submode, int txalign) 587 { 588 struct fw_ofld_tx_data_wr *txwr = dst; 589 590 txwr->op_to_immdlen = htobe32(V_WR_OP(FW_OFLD_TX_DATA_WR) | 591 V_FW_WR_IMMDLEN(immdlen)); 592 txwr->flowid_len16 = htobe32(V_FW_WR_FLOWID(toep->tid) | 593 V_FW_WR_LEN16(credits)); 594 txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(toep->ulp_mode) | 595 V_TX_ULP_SUBMODE(ulp_submode) | V_TX_URG(0) | V_TX_SHOVE(shove)); 596 txwr->plen = htobe32(plen); 597 598 if (txalign > 0) { 599 struct tcpcb *tp = intotcpcb(toep->inp); 600 601 if (plen < 2 * tp->t_maxseg || is_10G_port(toep->vi->pi)) 602 txwr->lsodisable_to_flags |= 603 htobe32(F_FW_OFLD_TX_DATA_WR_LSODISABLE); 604 else 605 txwr->lsodisable_to_flags |= 606 htobe32(F_FW_OFLD_TX_DATA_WR_ALIGNPLD | 607 (tp->t_flags & TF_NODELAY ? 0 : 608 F_FW_OFLD_TX_DATA_WR_ALIGNPLDSHOVE)); 609 } 610 } 611 612 /* 613 * Generate a DSGL from a starting mbuf. The total number of segments and the 614 * maximum segments in any one mbuf are provided. 615 */ 616 static void 617 write_tx_sgl(void *dst, struct mbuf *start, struct mbuf *stop, int nsegs, int n) 618 { 619 struct mbuf *m; 620 struct ulptx_sgl *usgl = dst; 621 int i, j, rc; 622 struct sglist sg; 623 struct sglist_seg segs[n]; 624 625 KASSERT(nsegs > 0, ("%s: nsegs 0", __func__)); 626 627 sglist_init(&sg, n, segs); 628 usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | 629 V_ULPTX_NSGE(nsegs)); 630 631 i = -1; 632 for (m = start; m != stop; m = m->m_next) { 633 if (IS_AIOTX_MBUF(m)) 634 rc = sglist_append_vmpages(&sg, aiotx_mbuf_pages(m), 635 aiotx_mbuf_pgoff(m), m->m_len); 636 else 637 rc = sglist_append(&sg, mtod(m, void *), m->m_len); 638 if (__predict_false(rc != 0)) 639 panic("%s: sglist_append %d", __func__, rc); 640 641 for (j = 0; j < sg.sg_nseg; i++, j++) { 642 if (i < 0) { 643 usgl->len0 = htobe32(segs[j].ss_len); 644 usgl->addr0 = htobe64(segs[j].ss_paddr); 645 } else { 646 usgl->sge[i / 2].len[i & 1] = 647 htobe32(segs[j].ss_len); 648 usgl->sge[i / 2].addr[i & 1] = 649 htobe64(segs[j].ss_paddr); 650 } 651 #ifdef INVARIANTS 652 nsegs--; 653 #endif 654 } 655 sglist_reset(&sg); 656 } 657 if (i & 1) 658 usgl->sge[i / 2].len[1] = htobe32(0); 659 KASSERT(nsegs == 0, ("%s: nsegs %d, start %p, stop %p", 660 __func__, nsegs, start, stop)); 661 } 662 663 /* 664 * Max number of SGL entries an offload tx work request can have. This is 41 665 * (1 + 40) for a full 512B work request. 666 * fw_ofld_tx_data_wr(16B) + ulptx_sgl(16B, 1) + ulptx_sge_pair(480B, 40) 667 */ 668 #define OFLD_SGL_LEN (41) 669 670 /* 671 * Send data and/or a FIN to the peer. 672 * 673 * The socket's so_snd buffer consists of a stream of data starting with sb_mb 674 * and linked together with m_next. sb_sndptr, if set, is the last mbuf that 675 * was transmitted. 676 * 677 * drop indicates the number of bytes that should be dropped from the head of 678 * the send buffer. It is an optimization that lets do_fw4_ack avoid creating 679 * contention on the send buffer lock (before this change it used to do 680 * sowwakeup and then t4_push_frames right after that when recovering from tx 681 * stalls). When drop is set this function MUST drop the bytes and wake up any 682 * writers. 683 */ 684 void 685 t4_push_frames(struct adapter *sc, struct toepcb *toep, int drop) 686 { 687 struct mbuf *sndptr, *m, *sb_sndptr; 688 struct fw_ofld_tx_data_wr *txwr; 689 struct wrqe *wr; 690 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 691 struct inpcb *inp = toep->inp; 692 struct tcpcb *tp = intotcpcb(inp); 693 struct socket *so = inp->inp_socket; 694 struct sockbuf *sb = &so->so_snd; 695 int tx_credits, shove, compl, sowwakeup; 696 struct ofld_tx_sdesc *txsd; 697 bool aiotx_mbuf_seen; 698 699 INP_WLOCK_ASSERT(inp); 700 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 701 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 702 703 KASSERT(toep->ulp_mode == ULP_MODE_NONE || 704 toep->ulp_mode == ULP_MODE_TCPDDP || 705 toep->ulp_mode == ULP_MODE_RDMA, 706 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 707 708 #ifdef VERBOSE_TRACES 709 CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d", 710 __func__, toep->tid, toep->flags, tp->t_flags); 711 #endif 712 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 713 return; 714 715 #ifdef RATELIMIT 716 if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) && 717 (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) { 718 inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED; 719 } 720 #endif 721 722 /* 723 * This function doesn't resume by itself. Someone else must clear the 724 * flag and call this function. 725 */ 726 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 727 KASSERT(drop == 0, 728 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 729 return; 730 } 731 732 txsd = &toep->txsd[toep->txsd_pidx]; 733 do { 734 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 735 max_imm = max_imm_payload(tx_credits); 736 max_nsegs = max_dsgl_nsegs(tx_credits); 737 738 SOCKBUF_LOCK(sb); 739 sowwakeup = drop; 740 if (drop) { 741 sbdrop_locked(sb, drop); 742 drop = 0; 743 } 744 sb_sndptr = sb->sb_sndptr; 745 sndptr = sb_sndptr ? sb_sndptr->m_next : sb->sb_mb; 746 plen = 0; 747 nsegs = 0; 748 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 749 aiotx_mbuf_seen = false; 750 for (m = sndptr; m != NULL; m = m->m_next) { 751 int n; 752 753 if (IS_AIOTX_MBUF(m)) 754 n = sglist_count_vmpages(aiotx_mbuf_pages(m), 755 aiotx_mbuf_pgoff(m), m->m_len); 756 else 757 n = sglist_count(mtod(m, void *), m->m_len); 758 759 nsegs += n; 760 plen += m->m_len; 761 762 /* This mbuf sent us _over_ the nsegs limit, back out */ 763 if (plen > max_imm && nsegs > max_nsegs) { 764 nsegs -= n; 765 plen -= m->m_len; 766 if (plen == 0) { 767 /* Too few credits */ 768 toep->flags |= TPF_TX_SUSPENDED; 769 if (sowwakeup) { 770 if (!TAILQ_EMPTY( 771 &toep->aiotx_jobq)) 772 t4_aiotx_queue_toep( 773 toep); 774 sowwakeup_locked(so); 775 } else 776 SOCKBUF_UNLOCK(sb); 777 SOCKBUF_UNLOCK_ASSERT(sb); 778 return; 779 } 780 break; 781 } 782 783 if (IS_AIOTX_MBUF(m)) 784 aiotx_mbuf_seen = true; 785 if (max_nsegs_1mbuf < n) 786 max_nsegs_1mbuf = n; 787 sb_sndptr = m; /* new sb->sb_sndptr if all goes well */ 788 789 /* This mbuf put us right at the max_nsegs limit */ 790 if (plen > max_imm && nsegs == max_nsegs) { 791 m = m->m_next; 792 break; 793 } 794 } 795 796 if (sbused(sb) > sb->sb_hiwat * 5 / 8 && 797 toep->plen_nocompl + plen >= sb->sb_hiwat / 4) 798 compl = 1; 799 else 800 compl = 0; 801 802 if (sb->sb_flags & SB_AUTOSIZE && 803 V_tcp_do_autosndbuf && 804 sb->sb_hiwat < V_tcp_autosndbuf_max && 805 sbused(sb) >= sb->sb_hiwat * 7 / 8) { 806 int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc, 807 V_tcp_autosndbuf_max); 808 809 if (!sbreserve_locked(sb, newsize, so, NULL)) 810 sb->sb_flags &= ~SB_AUTOSIZE; 811 else 812 sowwakeup = 1; /* room available */ 813 } 814 if (sowwakeup) { 815 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 816 t4_aiotx_queue_toep(toep); 817 sowwakeup_locked(so); 818 } else 819 SOCKBUF_UNLOCK(sb); 820 SOCKBUF_UNLOCK_ASSERT(sb); 821 822 /* nothing to send */ 823 if (plen == 0) { 824 KASSERT(m == NULL, 825 ("%s: nothing to send, but m != NULL", __func__)); 826 break; 827 } 828 829 if (__predict_false(toep->flags & TPF_FIN_SENT)) 830 panic("%s: excess tx.", __func__); 831 832 shove = m == NULL && !(tp->t_flags & TF_MORETOCOME); 833 if (plen <= max_imm && !aiotx_mbuf_seen) { 834 835 /* Immediate data tx */ 836 837 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 838 toep->ofld_txq); 839 if (wr == NULL) { 840 /* XXX: how will we recover from this? */ 841 toep->flags |= TPF_TX_SUSPENDED; 842 return; 843 } 844 txwr = wrtod(wr); 845 credits = howmany(wr->wr_len, 16); 846 write_tx_wr(txwr, toep, plen, plen, credits, shove, 0, 847 sc->tt.tx_align); 848 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 849 nsegs = 0; 850 } else { 851 int wr_len; 852 853 /* DSGL tx */ 854 855 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 856 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 857 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 858 if (wr == NULL) { 859 /* XXX: how will we recover from this? */ 860 toep->flags |= TPF_TX_SUSPENDED; 861 return; 862 } 863 txwr = wrtod(wr); 864 credits = howmany(wr_len, 16); 865 write_tx_wr(txwr, toep, 0, plen, credits, shove, 0, 866 sc->tt.tx_align); 867 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 868 max_nsegs_1mbuf); 869 if (wr_len & 0xf) { 870 uint64_t *pad = (uint64_t *) 871 ((uintptr_t)txwr + wr_len); 872 *pad = 0; 873 } 874 } 875 876 KASSERT(toep->tx_credits >= credits, 877 ("%s: not enough credits", __func__)); 878 879 toep->tx_credits -= credits; 880 toep->tx_nocompl += credits; 881 toep->plen_nocompl += plen; 882 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 883 toep->tx_nocompl >= toep->tx_total / 4) 884 compl = 1; 885 886 if (compl || toep->ulp_mode == ULP_MODE_RDMA) { 887 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 888 toep->tx_nocompl = 0; 889 toep->plen_nocompl = 0; 890 } 891 892 tp->snd_nxt += plen; 893 tp->snd_max += plen; 894 895 SOCKBUF_LOCK(sb); 896 KASSERT(sb_sndptr, ("%s: sb_sndptr is NULL", __func__)); 897 sb->sb_sndptr = sb_sndptr; 898 SOCKBUF_UNLOCK(sb); 899 900 toep->flags |= TPF_TX_DATA_SENT; 901 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 902 toep->flags |= TPF_TX_SUSPENDED; 903 904 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 905 txsd->plen = plen; 906 txsd->tx_credits = credits; 907 txsd++; 908 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 909 toep->txsd_pidx = 0; 910 txsd = &toep->txsd[0]; 911 } 912 toep->txsd_avail--; 913 914 t4_l2t_send(sc, wr, toep->l2te); 915 } while (m != NULL); 916 917 /* Send a FIN if requested, but only if there's no more data to send */ 918 if (m == NULL && toep->flags & TPF_SEND_FIN) 919 close_conn(sc, toep); 920 } 921 922 static inline void 923 rqdrop_locked(struct mbufq *q, int plen) 924 { 925 struct mbuf *m; 926 927 while (plen > 0) { 928 m = mbufq_dequeue(q); 929 930 /* Too many credits. */ 931 MPASS(m != NULL); 932 M_ASSERTPKTHDR(m); 933 934 /* Partial credits. */ 935 MPASS(plen >= m->m_pkthdr.len); 936 937 plen -= m->m_pkthdr.len; 938 m_freem(m); 939 } 940 } 941 942 void 943 t4_push_pdus(struct adapter *sc, struct toepcb *toep, int drop) 944 { 945 struct mbuf *sndptr, *m; 946 struct fw_ofld_tx_data_wr *txwr; 947 struct wrqe *wr; 948 u_int plen, nsegs, credits, max_imm, max_nsegs, max_nsegs_1mbuf; 949 u_int adjusted_plen, ulp_submode; 950 struct inpcb *inp = toep->inp; 951 struct tcpcb *tp = intotcpcb(inp); 952 int tx_credits, shove; 953 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 954 struct mbufq *pduq = &toep->ulp_pduq; 955 static const u_int ulp_extra_len[] = {0, 4, 4, 8}; 956 957 INP_WLOCK_ASSERT(inp); 958 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 959 ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid)); 960 KASSERT(toep->ulp_mode == ULP_MODE_ISCSI, 961 ("%s: ulp_mode %u for toep %p", __func__, toep->ulp_mode, toep)); 962 963 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) 964 return; 965 966 /* 967 * This function doesn't resume by itself. Someone else must clear the 968 * flag and call this function. 969 */ 970 if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) { 971 KASSERT(drop == 0, 972 ("%s: drop (%d) != 0 but tx is suspended", __func__, drop)); 973 return; 974 } 975 976 if (drop) 977 rqdrop_locked(&toep->ulp_pdu_reclaimq, drop); 978 979 while ((sndptr = mbufq_first(pduq)) != NULL) { 980 M_ASSERTPKTHDR(sndptr); 981 982 tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS); 983 max_imm = max_imm_payload(tx_credits); 984 max_nsegs = max_dsgl_nsegs(tx_credits); 985 986 plen = 0; 987 nsegs = 0; 988 max_nsegs_1mbuf = 0; /* max # of SGL segments in any one mbuf */ 989 for (m = sndptr; m != NULL; m = m->m_next) { 990 int n = sglist_count(mtod(m, void *), m->m_len); 991 992 nsegs += n; 993 plen += m->m_len; 994 995 /* 996 * This mbuf would send us _over_ the nsegs limit. 997 * Suspend tx because the PDU can't be sent out. 998 */ 999 if (plen > max_imm && nsegs > max_nsegs) { 1000 toep->flags |= TPF_TX_SUSPENDED; 1001 return; 1002 } 1003 1004 if (max_nsegs_1mbuf < n) 1005 max_nsegs_1mbuf = n; 1006 } 1007 1008 if (__predict_false(toep->flags & TPF_FIN_SENT)) 1009 panic("%s: excess tx.", __func__); 1010 1011 /* 1012 * We have a PDU to send. All of it goes out in one WR so 'm' 1013 * is NULL. A PDU's length is always a multiple of 4. 1014 */ 1015 MPASS(m == NULL); 1016 MPASS((plen & 3) == 0); 1017 MPASS(sndptr->m_pkthdr.len == plen); 1018 1019 shove = !(tp->t_flags & TF_MORETOCOME); 1020 ulp_submode = mbuf_ulp_submode(sndptr); 1021 MPASS(ulp_submode < nitems(ulp_extra_len)); 1022 1023 /* 1024 * plen doesn't include header and data digests, which are 1025 * generated and inserted in the right places by the TOE, but 1026 * they do occupy TCP sequence space and need to be accounted 1027 * for. 1028 */ 1029 adjusted_plen = plen + ulp_extra_len[ulp_submode]; 1030 if (plen <= max_imm) { 1031 1032 /* Immediate data tx */ 1033 1034 wr = alloc_wrqe(roundup2(sizeof(*txwr) + plen, 16), 1035 toep->ofld_txq); 1036 if (wr == NULL) { 1037 /* XXX: how will we recover from this? */ 1038 toep->flags |= TPF_TX_SUSPENDED; 1039 return; 1040 } 1041 txwr = wrtod(wr); 1042 credits = howmany(wr->wr_len, 16); 1043 write_tx_wr(txwr, toep, plen, adjusted_plen, credits, 1044 shove, ulp_submode, sc->tt.tx_align); 1045 m_copydata(sndptr, 0, plen, (void *)(txwr + 1)); 1046 nsegs = 0; 1047 } else { 1048 int wr_len; 1049 1050 /* DSGL tx */ 1051 wr_len = sizeof(*txwr) + sizeof(struct ulptx_sgl) + 1052 ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8; 1053 wr = alloc_wrqe(roundup2(wr_len, 16), toep->ofld_txq); 1054 if (wr == NULL) { 1055 /* XXX: how will we recover from this? */ 1056 toep->flags |= TPF_TX_SUSPENDED; 1057 return; 1058 } 1059 txwr = wrtod(wr); 1060 credits = howmany(wr_len, 16); 1061 write_tx_wr(txwr, toep, 0, adjusted_plen, credits, 1062 shove, ulp_submode, sc->tt.tx_align); 1063 write_tx_sgl(txwr + 1, sndptr, m, nsegs, 1064 max_nsegs_1mbuf); 1065 if (wr_len & 0xf) { 1066 uint64_t *pad = (uint64_t *) 1067 ((uintptr_t)txwr + wr_len); 1068 *pad = 0; 1069 } 1070 } 1071 1072 KASSERT(toep->tx_credits >= credits, 1073 ("%s: not enough credits", __func__)); 1074 1075 m = mbufq_dequeue(pduq); 1076 MPASS(m == sndptr); 1077 mbufq_enqueue(&toep->ulp_pdu_reclaimq, m); 1078 1079 toep->tx_credits -= credits; 1080 toep->tx_nocompl += credits; 1081 toep->plen_nocompl += plen; 1082 if (toep->tx_credits <= toep->tx_total * 3 / 8 && 1083 toep->tx_nocompl >= toep->tx_total / 4) { 1084 txwr->op_to_immdlen |= htobe32(F_FW_WR_COMPL); 1085 toep->tx_nocompl = 0; 1086 toep->plen_nocompl = 0; 1087 } 1088 1089 tp->snd_nxt += adjusted_plen; 1090 tp->snd_max += adjusted_plen; 1091 1092 toep->flags |= TPF_TX_DATA_SENT; 1093 if (toep->tx_credits < MIN_OFLD_TX_CREDITS) 1094 toep->flags |= TPF_TX_SUSPENDED; 1095 1096 KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__)); 1097 txsd->plen = plen; 1098 txsd->tx_credits = credits; 1099 txsd++; 1100 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) { 1101 toep->txsd_pidx = 0; 1102 txsd = &toep->txsd[0]; 1103 } 1104 toep->txsd_avail--; 1105 1106 t4_l2t_send(sc, wr, toep->l2te); 1107 } 1108 1109 /* Send a FIN if requested, but only if there are no more PDUs to send */ 1110 if (mbufq_first(pduq) == NULL && toep->flags & TPF_SEND_FIN) 1111 close_conn(sc, toep); 1112 } 1113 1114 int 1115 t4_tod_output(struct toedev *tod, struct tcpcb *tp) 1116 { 1117 struct adapter *sc = tod->tod_softc; 1118 #ifdef INVARIANTS 1119 struct inpcb *inp = tp->t_inpcb; 1120 #endif 1121 struct toepcb *toep = tp->t_toe; 1122 1123 INP_WLOCK_ASSERT(inp); 1124 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1125 ("%s: inp %p dropped.", __func__, inp)); 1126 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1127 1128 if (toep->ulp_mode == ULP_MODE_ISCSI) 1129 t4_push_pdus(sc, toep, 0); 1130 else 1131 t4_push_frames(sc, toep, 0); 1132 1133 return (0); 1134 } 1135 1136 int 1137 t4_send_fin(struct toedev *tod, struct tcpcb *tp) 1138 { 1139 struct adapter *sc = tod->tod_softc; 1140 #ifdef INVARIANTS 1141 struct inpcb *inp = tp->t_inpcb; 1142 #endif 1143 struct toepcb *toep = tp->t_toe; 1144 1145 INP_WLOCK_ASSERT(inp); 1146 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1147 ("%s: inp %p dropped.", __func__, inp)); 1148 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1149 1150 toep->flags |= TPF_SEND_FIN; 1151 if (tp->t_state >= TCPS_ESTABLISHED) { 1152 if (toep->ulp_mode == ULP_MODE_ISCSI) 1153 t4_push_pdus(sc, toep, 0); 1154 else 1155 t4_push_frames(sc, toep, 0); 1156 } 1157 1158 return (0); 1159 } 1160 1161 int 1162 t4_send_rst(struct toedev *tod, struct tcpcb *tp) 1163 { 1164 struct adapter *sc = tod->tod_softc; 1165 #if defined(INVARIANTS) 1166 struct inpcb *inp = tp->t_inpcb; 1167 #endif 1168 struct toepcb *toep = tp->t_toe; 1169 1170 INP_WLOCK_ASSERT(inp); 1171 KASSERT((inp->inp_flags & INP_DROPPED) == 0, 1172 ("%s: inp %p dropped.", __func__, inp)); 1173 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 1174 1175 /* hmmmm */ 1176 KASSERT(toep->flags & TPF_FLOWC_WR_SENT, 1177 ("%s: flowc for tid %u [%s] not sent already", 1178 __func__, toep->tid, tcpstates[tp->t_state])); 1179 1180 send_reset(sc, toep, 0); 1181 return (0); 1182 } 1183 1184 /* 1185 * Peer has sent us a FIN. 1186 */ 1187 static int 1188 do_peer_close(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1189 { 1190 struct adapter *sc = iq->adapter; 1191 const struct cpl_peer_close *cpl = (const void *)(rss + 1); 1192 unsigned int tid = GET_TID(cpl); 1193 struct toepcb *toep = lookup_tid(sc, tid); 1194 struct inpcb *inp = toep->inp; 1195 struct tcpcb *tp = NULL; 1196 struct socket *so; 1197 #ifdef INVARIANTS 1198 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1199 #endif 1200 1201 KASSERT(opcode == CPL_PEER_CLOSE, 1202 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1203 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1204 1205 if (__predict_false(toep->flags & TPF_SYNQE)) { 1206 #ifdef INVARIANTS 1207 struct synq_entry *synqe = (void *)toep; 1208 1209 INP_WLOCK(synqe->lctx->inp); 1210 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1211 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1212 ("%s: listen socket closed but tid %u not aborted.", 1213 __func__, tid)); 1214 } else { 1215 /* 1216 * do_pass_accept_req is still running and will 1217 * eventually take care of this tid. 1218 */ 1219 } 1220 INP_WUNLOCK(synqe->lctx->inp); 1221 #endif 1222 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1223 toep, toep->flags); 1224 return (0); 1225 } 1226 1227 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1228 1229 CURVNET_SET(toep->vnet); 1230 INP_INFO_RLOCK(&V_tcbinfo); 1231 INP_WLOCK(inp); 1232 tp = intotcpcb(inp); 1233 1234 CTR5(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x, inp %p", __func__, 1235 tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, inp); 1236 1237 if (toep->flags & TPF_ABORT_SHUTDOWN) 1238 goto done; 1239 1240 tp->rcv_nxt++; /* FIN */ 1241 1242 so = inp->inp_socket; 1243 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1244 DDP_LOCK(toep); 1245 if (__predict_false(toep->ddp_flags & 1246 (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE))) 1247 handle_ddp_close(toep, tp, cpl->rcv_nxt); 1248 DDP_UNLOCK(toep); 1249 } 1250 socantrcvmore(so); 1251 1252 if (toep->ulp_mode != ULP_MODE_RDMA) { 1253 KASSERT(tp->rcv_nxt == be32toh(cpl->rcv_nxt), 1254 ("%s: rcv_nxt mismatch: %u %u", __func__, tp->rcv_nxt, 1255 be32toh(cpl->rcv_nxt))); 1256 } 1257 1258 switch (tp->t_state) { 1259 case TCPS_SYN_RECEIVED: 1260 tp->t_starttime = ticks; 1261 /* FALLTHROUGH */ 1262 1263 case TCPS_ESTABLISHED: 1264 tp->t_state = TCPS_CLOSE_WAIT; 1265 break; 1266 1267 case TCPS_FIN_WAIT_1: 1268 tp->t_state = TCPS_CLOSING; 1269 break; 1270 1271 case TCPS_FIN_WAIT_2: 1272 tcp_twstart(tp); 1273 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1274 INP_INFO_RUNLOCK(&V_tcbinfo); 1275 CURVNET_RESTORE(); 1276 1277 INP_WLOCK(inp); 1278 final_cpl_received(toep); 1279 return (0); 1280 1281 default: 1282 log(LOG_ERR, "%s: TID %u received CPL_PEER_CLOSE in state %d\n", 1283 __func__, tid, tp->t_state); 1284 } 1285 done: 1286 INP_WUNLOCK(inp); 1287 INP_INFO_RUNLOCK(&V_tcbinfo); 1288 CURVNET_RESTORE(); 1289 return (0); 1290 } 1291 1292 /* 1293 * Peer has ACK'd our FIN. 1294 */ 1295 static int 1296 do_close_con_rpl(struct sge_iq *iq, const struct rss_header *rss, 1297 struct mbuf *m) 1298 { 1299 struct adapter *sc = iq->adapter; 1300 const struct cpl_close_con_rpl *cpl = (const void *)(rss + 1); 1301 unsigned int tid = GET_TID(cpl); 1302 struct toepcb *toep = lookup_tid(sc, tid); 1303 struct inpcb *inp = toep->inp; 1304 struct tcpcb *tp = NULL; 1305 struct socket *so = NULL; 1306 #ifdef INVARIANTS 1307 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1308 #endif 1309 1310 KASSERT(opcode == CPL_CLOSE_CON_RPL, 1311 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1312 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1313 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1314 1315 CURVNET_SET(toep->vnet); 1316 INP_INFO_RLOCK(&V_tcbinfo); 1317 INP_WLOCK(inp); 1318 tp = intotcpcb(inp); 1319 1320 CTR4(KTR_CXGBE, "%s: tid %u (%s), toep_flags 0x%x", 1321 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags); 1322 1323 if (toep->flags & TPF_ABORT_SHUTDOWN) 1324 goto done; 1325 1326 so = inp->inp_socket; 1327 tp->snd_una = be32toh(cpl->snd_nxt) - 1; /* exclude FIN */ 1328 1329 switch (tp->t_state) { 1330 case TCPS_CLOSING: /* see TCPS_FIN_WAIT_2 in do_peer_close too */ 1331 tcp_twstart(tp); 1332 release: 1333 INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ 1334 INP_INFO_RUNLOCK(&V_tcbinfo); 1335 CURVNET_RESTORE(); 1336 1337 INP_WLOCK(inp); 1338 final_cpl_received(toep); /* no more CPLs expected */ 1339 1340 return (0); 1341 case TCPS_LAST_ACK: 1342 if (tcp_close(tp)) 1343 INP_WUNLOCK(inp); 1344 goto release; 1345 1346 case TCPS_FIN_WAIT_1: 1347 if (so->so_rcv.sb_state & SBS_CANTRCVMORE) 1348 soisdisconnected(so); 1349 tp->t_state = TCPS_FIN_WAIT_2; 1350 break; 1351 1352 default: 1353 log(LOG_ERR, 1354 "%s: TID %u received CPL_CLOSE_CON_RPL in state %s\n", 1355 __func__, tid, tcpstates[tp->t_state]); 1356 } 1357 done: 1358 INP_WUNLOCK(inp); 1359 INP_INFO_RUNLOCK(&V_tcbinfo); 1360 CURVNET_RESTORE(); 1361 return (0); 1362 } 1363 1364 void 1365 send_abort_rpl(struct adapter *sc, struct sge_wrq *ofld_txq, int tid, 1366 int rst_status) 1367 { 1368 struct wrqe *wr; 1369 struct cpl_abort_rpl *cpl; 1370 1371 wr = alloc_wrqe(sizeof(*cpl), ofld_txq); 1372 if (wr == NULL) { 1373 /* XXX */ 1374 panic("%s: allocation failure.", __func__); 1375 } 1376 cpl = wrtod(wr); 1377 1378 INIT_TP_WR_MIT_CPL(cpl, CPL_ABORT_RPL, tid); 1379 cpl->cmd = rst_status; 1380 1381 t4_wrq_tx(sc, wr); 1382 } 1383 1384 static int 1385 abort_status_to_errno(struct tcpcb *tp, unsigned int abort_reason) 1386 { 1387 switch (abort_reason) { 1388 case CPL_ERR_BAD_SYN: 1389 case CPL_ERR_CONN_RESET: 1390 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 1391 case CPL_ERR_XMIT_TIMEDOUT: 1392 case CPL_ERR_PERSIST_TIMEDOUT: 1393 case CPL_ERR_FINWAIT2_TIMEDOUT: 1394 case CPL_ERR_KEEPALIVE_TIMEDOUT: 1395 return (ETIMEDOUT); 1396 default: 1397 return (EIO); 1398 } 1399 } 1400 1401 /* 1402 * TCP RST from the peer, timeout, or some other such critical error. 1403 */ 1404 static int 1405 do_abort_req(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1406 { 1407 struct adapter *sc = iq->adapter; 1408 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1); 1409 unsigned int tid = GET_TID(cpl); 1410 struct toepcb *toep = lookup_tid(sc, tid); 1411 struct sge_wrq *ofld_txq = toep->ofld_txq; 1412 struct inpcb *inp; 1413 struct tcpcb *tp; 1414 #ifdef INVARIANTS 1415 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1416 #endif 1417 1418 KASSERT(opcode == CPL_ABORT_REQ_RSS, 1419 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1420 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1421 1422 if (toep->flags & TPF_SYNQE) 1423 return (do_abort_req_synqe(iq, rss, m)); 1424 1425 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1426 1427 if (negative_advice(cpl->status)) { 1428 CTR4(KTR_CXGBE, "%s: negative advice %d for tid %d (0x%x)", 1429 __func__, cpl->status, tid, toep->flags); 1430 return (0); /* Ignore negative advice */ 1431 } 1432 1433 inp = toep->inp; 1434 CURVNET_SET(toep->vnet); 1435 INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ 1436 INP_WLOCK(inp); 1437 1438 tp = intotcpcb(inp); 1439 1440 CTR6(KTR_CXGBE, 1441 "%s: tid %d (%s), toep_flags 0x%x, inp_flags 0x%x, status %d", 1442 __func__, tid, tp ? tcpstates[tp->t_state] : "no tp", toep->flags, 1443 inp->inp_flags, cpl->status); 1444 1445 /* 1446 * If we'd initiated an abort earlier the reply to it is responsible for 1447 * cleaning up resources. Otherwise we tear everything down right here 1448 * right now. We owe the T4 a CPL_ABORT_RPL no matter what. 1449 */ 1450 if (toep->flags & TPF_ABORT_SHUTDOWN) { 1451 INP_WUNLOCK(inp); 1452 goto done; 1453 } 1454 toep->flags |= TPF_ABORT_SHUTDOWN; 1455 1456 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 1457 struct socket *so = inp->inp_socket; 1458 1459 if (so != NULL) 1460 so_error_set(so, abort_status_to_errno(tp, 1461 cpl->status)); 1462 tp = tcp_close(tp); 1463 if (tp == NULL) 1464 INP_WLOCK(inp); /* re-acquire */ 1465 } 1466 1467 final_cpl_received(toep); 1468 done: 1469 INP_INFO_RUNLOCK(&V_tcbinfo); 1470 CURVNET_RESTORE(); 1471 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); 1472 return (0); 1473 } 1474 1475 /* 1476 * Reply to the CPL_ABORT_REQ (send_reset) 1477 */ 1478 static int 1479 do_abort_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1480 { 1481 struct adapter *sc = iq->adapter; 1482 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1); 1483 unsigned int tid = GET_TID(cpl); 1484 struct toepcb *toep = lookup_tid(sc, tid); 1485 struct inpcb *inp = toep->inp; 1486 #ifdef INVARIANTS 1487 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1488 #endif 1489 1490 KASSERT(opcode == CPL_ABORT_RPL_RSS, 1491 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1492 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1493 1494 if (toep->flags & TPF_SYNQE) 1495 return (do_abort_rpl_synqe(iq, rss, m)); 1496 1497 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1498 1499 CTR5(KTR_CXGBE, "%s: tid %u, toep %p, inp %p, status %d", 1500 __func__, tid, toep, inp, cpl->status); 1501 1502 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1503 ("%s: wasn't expecting abort reply", __func__)); 1504 1505 INP_WLOCK(inp); 1506 final_cpl_received(toep); 1507 1508 return (0); 1509 } 1510 1511 static int 1512 do_rx_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1513 { 1514 struct adapter *sc = iq->adapter; 1515 const struct cpl_rx_data *cpl = mtod(m, const void *); 1516 unsigned int tid = GET_TID(cpl); 1517 struct toepcb *toep = lookup_tid(sc, tid); 1518 struct inpcb *inp = toep->inp; 1519 struct tcpcb *tp; 1520 struct socket *so; 1521 struct sockbuf *sb; 1522 int len; 1523 uint32_t ddp_placed = 0; 1524 1525 if (__predict_false(toep->flags & TPF_SYNQE)) { 1526 #ifdef INVARIANTS 1527 struct synq_entry *synqe = (void *)toep; 1528 1529 INP_WLOCK(synqe->lctx->inp); 1530 if (synqe->flags & TPF_SYNQE_HAS_L2TE) { 1531 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN, 1532 ("%s: listen socket closed but tid %u not aborted.", 1533 __func__, tid)); 1534 } else { 1535 /* 1536 * do_pass_accept_req is still running and will 1537 * eventually take care of this tid. 1538 */ 1539 } 1540 INP_WUNLOCK(synqe->lctx->inp); 1541 #endif 1542 CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid, 1543 toep, toep->flags); 1544 m_freem(m); 1545 return (0); 1546 } 1547 1548 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1549 1550 /* strip off CPL header */ 1551 m_adj(m, sizeof(*cpl)); 1552 len = m->m_pkthdr.len; 1553 1554 INP_WLOCK(inp); 1555 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 1556 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 1557 __func__, tid, len, inp->inp_flags); 1558 INP_WUNLOCK(inp); 1559 m_freem(m); 1560 return (0); 1561 } 1562 1563 tp = intotcpcb(inp); 1564 1565 if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) 1566 ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt; 1567 1568 tp->rcv_nxt += len; 1569 if (tp->rcv_wnd < len) { 1570 KASSERT(toep->ulp_mode == ULP_MODE_RDMA, 1571 ("%s: negative window size", __func__)); 1572 } 1573 1574 tp->rcv_wnd -= len; 1575 tp->t_rcvtime = ticks; 1576 1577 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1578 DDP_LOCK(toep); 1579 so = inp_inpcbtosocket(inp); 1580 sb = &so->so_rcv; 1581 SOCKBUF_LOCK(sb); 1582 1583 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 1584 CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)", 1585 __func__, tid, len); 1586 m_freem(m); 1587 SOCKBUF_UNLOCK(sb); 1588 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1589 DDP_UNLOCK(toep); 1590 INP_WUNLOCK(inp); 1591 1592 CURVNET_SET(toep->vnet); 1593 INP_INFO_RLOCK(&V_tcbinfo); 1594 INP_WLOCK(inp); 1595 tp = tcp_drop(tp, ECONNRESET); 1596 if (tp) 1597 INP_WUNLOCK(inp); 1598 INP_INFO_RUNLOCK(&V_tcbinfo); 1599 CURVNET_RESTORE(); 1600 1601 return (0); 1602 } 1603 1604 /* receive buffer autosize */ 1605 MPASS(toep->vnet == so->so_vnet); 1606 CURVNET_SET(toep->vnet); 1607 if (sb->sb_flags & SB_AUTOSIZE && 1608 V_tcp_do_autorcvbuf && 1609 sb->sb_hiwat < V_tcp_autorcvbuf_max && 1610 len > (sbspace(sb) / 8 * 7)) { 1611 unsigned int hiwat = sb->sb_hiwat; 1612 unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 1613 V_tcp_autorcvbuf_max); 1614 1615 if (!sbreserve_locked(sb, newsize, so, NULL)) 1616 sb->sb_flags &= ~SB_AUTOSIZE; 1617 else 1618 toep->rx_credits += newsize - hiwat; 1619 } 1620 1621 if (toep->ddp_waiting_count != 0 || toep->ddp_active_count != 0) 1622 CTR3(KTR_CXGBE, "%s: tid %u, non-ddp rx (%d bytes)", __func__, 1623 tid, len); 1624 1625 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1626 int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off; 1627 1628 if (changed) { 1629 if (toep->ddp_flags & DDP_SC_REQ) 1630 toep->ddp_flags ^= DDP_ON | DDP_SC_REQ; 1631 else { 1632 KASSERT(cpl->ddp_off == 1, 1633 ("%s: DDP switched on by itself.", 1634 __func__)); 1635 1636 /* Fell out of DDP mode */ 1637 toep->ddp_flags &= ~DDP_ON; 1638 CTR1(KTR_CXGBE, "%s: fell out of DDP mode", 1639 __func__); 1640 1641 insert_ddp_data(toep, ddp_placed); 1642 } 1643 } 1644 1645 if (toep->ddp_flags & DDP_ON) { 1646 /* 1647 * CPL_RX_DATA with DDP on can only be an indicate. 1648 * Start posting queued AIO requests via DDP. The 1649 * payload that arrived in this indicate is appended 1650 * to the socket buffer as usual. 1651 */ 1652 handle_ddp_indicate(toep); 1653 } 1654 } 1655 1656 KASSERT(toep->sb_cc >= sbused(sb), 1657 ("%s: sb %p has more data (%d) than last time (%d).", 1658 __func__, sb, sbused(sb), toep->sb_cc)); 1659 toep->rx_credits += toep->sb_cc - sbused(sb); 1660 sbappendstream_locked(sb, m, 0); 1661 toep->sb_cc = sbused(sb); 1662 if (toep->rx_credits > 0 && toep->sb_cc + tp->rcv_wnd < sb->sb_lowat) { 1663 int credits; 1664 1665 credits = send_rx_credits(sc, toep, toep->rx_credits); 1666 toep->rx_credits -= credits; 1667 tp->rcv_wnd += credits; 1668 tp->rcv_adv += credits; 1669 } 1670 1671 if (toep->ddp_waiting_count > 0 && sbavail(sb) != 0) { 1672 CTR2(KTR_CXGBE, "%s: tid %u queueing AIO task", __func__, 1673 tid); 1674 ddp_queue_toep(toep); 1675 } 1676 sorwakeup_locked(so); 1677 SOCKBUF_UNLOCK_ASSERT(sb); 1678 if (toep->ulp_mode == ULP_MODE_TCPDDP) 1679 DDP_UNLOCK(toep); 1680 1681 INP_WUNLOCK(inp); 1682 CURVNET_RESTORE(); 1683 return (0); 1684 } 1685 1686 #define S_CPL_FW4_ACK_OPCODE 24 1687 #define M_CPL_FW4_ACK_OPCODE 0xff 1688 #define V_CPL_FW4_ACK_OPCODE(x) ((x) << S_CPL_FW4_ACK_OPCODE) 1689 #define G_CPL_FW4_ACK_OPCODE(x) \ 1690 (((x) >> S_CPL_FW4_ACK_OPCODE) & M_CPL_FW4_ACK_OPCODE) 1691 1692 #define S_CPL_FW4_ACK_FLOWID 0 1693 #define M_CPL_FW4_ACK_FLOWID 0xffffff 1694 #define V_CPL_FW4_ACK_FLOWID(x) ((x) << S_CPL_FW4_ACK_FLOWID) 1695 #define G_CPL_FW4_ACK_FLOWID(x) \ 1696 (((x) >> S_CPL_FW4_ACK_FLOWID) & M_CPL_FW4_ACK_FLOWID) 1697 1698 #define S_CPL_FW4_ACK_CR 24 1699 #define M_CPL_FW4_ACK_CR 0xff 1700 #define V_CPL_FW4_ACK_CR(x) ((x) << S_CPL_FW4_ACK_CR) 1701 #define G_CPL_FW4_ACK_CR(x) (((x) >> S_CPL_FW4_ACK_CR) & M_CPL_FW4_ACK_CR) 1702 1703 #define S_CPL_FW4_ACK_SEQVAL 0 1704 #define M_CPL_FW4_ACK_SEQVAL 0x1 1705 #define V_CPL_FW4_ACK_SEQVAL(x) ((x) << S_CPL_FW4_ACK_SEQVAL) 1706 #define G_CPL_FW4_ACK_SEQVAL(x) \ 1707 (((x) >> S_CPL_FW4_ACK_SEQVAL) & M_CPL_FW4_ACK_SEQVAL) 1708 #define F_CPL_FW4_ACK_SEQVAL V_CPL_FW4_ACK_SEQVAL(1U) 1709 1710 static int 1711 do_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1712 { 1713 struct adapter *sc = iq->adapter; 1714 const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); 1715 unsigned int tid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); 1716 struct toepcb *toep = lookup_tid(sc, tid); 1717 struct inpcb *inp; 1718 struct tcpcb *tp; 1719 struct socket *so; 1720 uint8_t credits = cpl->credits; 1721 struct ofld_tx_sdesc *txsd; 1722 int plen; 1723 #ifdef INVARIANTS 1724 unsigned int opcode = G_CPL_FW4_ACK_OPCODE(be32toh(OPCODE_TID(cpl))); 1725 #endif 1726 1727 /* 1728 * Very unusual case: we'd sent a flowc + abort_req for a synq entry and 1729 * now this comes back carrying the credits for the flowc. 1730 */ 1731 if (__predict_false(toep->flags & TPF_SYNQE)) { 1732 KASSERT(toep->flags & TPF_ABORT_SHUTDOWN, 1733 ("%s: credits for a synq entry %p", __func__, toep)); 1734 return (0); 1735 } 1736 1737 inp = toep->inp; 1738 1739 KASSERT(opcode == CPL_FW4_ACK, 1740 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1741 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1742 KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); 1743 1744 INP_WLOCK(inp); 1745 1746 if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN)) { 1747 INP_WUNLOCK(inp); 1748 return (0); 1749 } 1750 1751 KASSERT((inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) == 0, 1752 ("%s: inp_flags 0x%x", __func__, inp->inp_flags)); 1753 1754 tp = intotcpcb(inp); 1755 1756 if (cpl->flags & CPL_FW4_ACK_FLAGS_SEQVAL) { 1757 tcp_seq snd_una = be32toh(cpl->snd_una); 1758 1759 #ifdef INVARIANTS 1760 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 1761 log(LOG_ERR, 1762 "%s: unexpected seq# %x for TID %u, snd_una %x\n", 1763 __func__, snd_una, toep->tid, tp->snd_una); 1764 } 1765 #endif 1766 1767 if (tp->snd_una != snd_una) { 1768 tp->snd_una = snd_una; 1769 tp->ts_recent_age = tcp_ts_getticks(); 1770 } 1771 } 1772 1773 #ifdef VERBOSE_TRACES 1774 CTR3(KTR_CXGBE, "%s: tid %d credits %u", __func__, tid, credits); 1775 #endif 1776 so = inp->inp_socket; 1777 txsd = &toep->txsd[toep->txsd_cidx]; 1778 plen = 0; 1779 while (credits) { 1780 KASSERT(credits >= txsd->tx_credits, 1781 ("%s: too many (or partial) credits", __func__)); 1782 credits -= txsd->tx_credits; 1783 toep->tx_credits += txsd->tx_credits; 1784 plen += txsd->plen; 1785 txsd++; 1786 toep->txsd_avail++; 1787 KASSERT(toep->txsd_avail <= toep->txsd_total, 1788 ("%s: txsd avail > total", __func__)); 1789 if (__predict_false(++toep->txsd_cidx == toep->txsd_total)) { 1790 txsd = &toep->txsd[0]; 1791 toep->txsd_cidx = 0; 1792 } 1793 } 1794 1795 if (toep->tx_credits == toep->tx_total) { 1796 toep->tx_nocompl = 0; 1797 toep->plen_nocompl = 0; 1798 } 1799 1800 if (toep->flags & TPF_TX_SUSPENDED && 1801 toep->tx_credits >= toep->tx_total / 4) { 1802 #ifdef VERBOSE_TRACES 1803 CTR2(KTR_CXGBE, "%s: tid %d calling t4_push_frames", __func__, 1804 tid); 1805 #endif 1806 toep->flags &= ~TPF_TX_SUSPENDED; 1807 CURVNET_SET(toep->vnet); 1808 if (toep->ulp_mode == ULP_MODE_ISCSI) 1809 t4_push_pdus(sc, toep, plen); 1810 else 1811 t4_push_frames(sc, toep, plen); 1812 CURVNET_RESTORE(); 1813 } else if (plen > 0) { 1814 struct sockbuf *sb = &so->so_snd; 1815 int sbu; 1816 1817 SOCKBUF_LOCK(sb); 1818 sbu = sbused(sb); 1819 if (toep->ulp_mode == ULP_MODE_ISCSI) { 1820 1821 if (__predict_false(sbu > 0)) { 1822 /* 1823 * The data trasmitted before the tid's ULP mode 1824 * changed to ISCSI is still in so_snd. 1825 * Incoming credits should account for so_snd 1826 * first. 1827 */ 1828 sbdrop_locked(sb, min(sbu, plen)); 1829 plen -= min(sbu, plen); 1830 } 1831 sowwakeup_locked(so); /* unlocks so_snd */ 1832 rqdrop_locked(&toep->ulp_pdu_reclaimq, plen); 1833 } else { 1834 #ifdef VERBOSE_TRACES 1835 CTR3(KTR_CXGBE, "%s: tid %d dropped %d bytes", __func__, 1836 tid, plen); 1837 #endif 1838 sbdrop_locked(sb, plen); 1839 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 1840 t4_aiotx_queue_toep(toep); 1841 sowwakeup_locked(so); /* unlocks so_snd */ 1842 } 1843 SOCKBUF_UNLOCK_ASSERT(sb); 1844 } 1845 1846 INP_WUNLOCK(inp); 1847 1848 return (0); 1849 } 1850 1851 int 1852 do_set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 1853 { 1854 struct adapter *sc = iq->adapter; 1855 const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1); 1856 unsigned int tid = GET_TID(cpl); 1857 struct toepcb *toep; 1858 #ifdef INVARIANTS 1859 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl))); 1860 #endif 1861 1862 KASSERT(opcode == CPL_SET_TCB_RPL, 1863 ("%s: unexpected opcode 0x%x", __func__, opcode)); 1864 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 1865 MPASS(iq != &sc->sge.fwq); 1866 1867 toep = lookup_tid(sc, tid); 1868 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1869 handle_ddp_tcb_rpl(toep, cpl); 1870 return (0); 1871 } 1872 1873 /* 1874 * TOM and/or other ULPs don't request replies for CPL_SET_TCB or 1875 * CPL_SET_TCB_FIELD requests. This can easily change and when it does 1876 * the dispatch code will go here. 1877 */ 1878 #ifdef INVARIANTS 1879 panic("%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p", __func__, 1880 tid, iq); 1881 #else 1882 log(LOG_ERR, "%s: Unexpected CPL_SET_TCB_RPL for tid %u on iq %p\n", 1883 __func__, tid, iq); 1884 #endif 1885 1886 return (0); 1887 } 1888 1889 void 1890 t4_set_tcb_field(struct adapter *sc, struct sge_wrq *wrq, int tid, 1891 uint16_t word, uint64_t mask, uint64_t val, int reply, int cookie, int iqid) 1892 { 1893 struct wrqe *wr; 1894 struct cpl_set_tcb_field *req; 1895 1896 MPASS((cookie & ~M_COOKIE) == 0); 1897 MPASS((iqid & ~M_QUEUENO) == 0); 1898 1899 wr = alloc_wrqe(sizeof(*req), wrq); 1900 if (wr == NULL) { 1901 /* XXX */ 1902 panic("%s: allocation failure.", __func__); 1903 } 1904 req = wrtod(wr); 1905 1906 INIT_TP_WR_MIT_CPL(req, CPL_SET_TCB_FIELD, tid); 1907 req->reply_ctrl = htobe16(V_QUEUENO(iqid)); 1908 if (reply == 0) 1909 req->reply_ctrl |= htobe16(F_NO_REPLY); 1910 req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(cookie)); 1911 req->mask = htobe64(mask); 1912 req->val = htobe64(val); 1913 1914 t4_wrq_tx(sc, wr); 1915 } 1916 1917 void 1918 t4_init_cpl_io_handlers(void) 1919 { 1920 1921 t4_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 1922 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 1923 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 1924 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 1925 t4_register_cpl_handler(CPL_RX_DATA, do_rx_data); 1926 t4_register_cpl_handler(CPL_FW4_ACK, do_fw4_ack); 1927 } 1928 1929 void 1930 t4_uninit_cpl_io_handlers(void) 1931 { 1932 1933 t4_register_cpl_handler(CPL_PEER_CLOSE, NULL); 1934 t4_register_cpl_handler(CPL_CLOSE_CON_RPL, NULL); 1935 t4_register_cpl_handler(CPL_ABORT_REQ_RSS, NULL); 1936 t4_register_cpl_handler(CPL_ABORT_RPL_RSS, NULL); 1937 t4_register_cpl_handler(CPL_RX_DATA, NULL); 1938 t4_register_cpl_handler(CPL_FW4_ACK, NULL); 1939 } 1940 1941 /* 1942 * Use the 'backend3' field in AIO jobs to store the amount of data 1943 * sent by the AIO job so far and the 'backend4' field to hold an 1944 * error that should be reported when the job is completed. 1945 */ 1946 #define aio_sent backend3 1947 #define aio_error backend4 1948 1949 #define jobtotid(job) \ 1950 (((struct toepcb *)(so_sototcpcb((job)->fd_file->f_data)->t_toe))->tid) 1951 1952 static void 1953 free_aiotx_buffer(struct aiotx_buffer *ab) 1954 { 1955 struct kaiocb *job; 1956 long status; 1957 int error; 1958 1959 if (refcount_release(&ab->refcount) == 0) 1960 return; 1961 1962 job = ab->job; 1963 error = job->aio_error; 1964 status = job->aio_sent; 1965 vm_page_unhold_pages(ab->ps.pages, ab->ps.npages); 1966 free(ab, M_CXGBE); 1967 #ifdef VERBOSE_TRACES 1968 CTR5(KTR_CXGBE, "%s: tid %d completed %p len %ld, error %d", __func__, 1969 jobtotid(job), job, status, error); 1970 #endif 1971 if (error == ECANCELED && status != 0) 1972 error = 0; 1973 if (error == ECANCELED) 1974 aio_cancel(job); 1975 else if (error) 1976 aio_complete(job, -1, error); 1977 else 1978 aio_complete(job, status, 0); 1979 } 1980 1981 static void 1982 t4_aiotx_mbuf_free(struct mbuf *m, void *buffer, void *arg) 1983 { 1984 struct aiotx_buffer *ab = buffer; 1985 1986 #ifdef VERBOSE_TRACES 1987 CTR3(KTR_CXGBE, "%s: completed %d bytes for tid %d", __func__, 1988 m->m_len, jobtotid(ab->job)); 1989 #endif 1990 free_aiotx_buffer(ab); 1991 } 1992 1993 /* 1994 * Hold the buffer backing an AIO request and return an AIO transmit 1995 * buffer. 1996 */ 1997 static int 1998 hold_aio(struct kaiocb *job) 1999 { 2000 struct aiotx_buffer *ab; 2001 struct vmspace *vm; 2002 vm_map_t map; 2003 vm_offset_t start, end, pgoff; 2004 int n; 2005 2006 MPASS(job->backend1 == NULL); 2007 2008 /* 2009 * The AIO subsystem will cancel and drain all requests before 2010 * permitting a process to exit or exec, so p_vmspace should 2011 * be stable here. 2012 */ 2013 vm = job->userproc->p_vmspace; 2014 map = &vm->vm_map; 2015 start = (uintptr_t)job->uaiocb.aio_buf; 2016 pgoff = start & PAGE_MASK; 2017 end = round_page(start + job->uaiocb.aio_nbytes); 2018 start = trunc_page(start); 2019 n = atop(end - start); 2020 2021 ab = malloc(sizeof(*ab) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | 2022 M_ZERO); 2023 refcount_init(&ab->refcount, 1); 2024 ab->ps.pages = (vm_page_t *)(ab + 1); 2025 ab->ps.npages = vm_fault_quick_hold_pages(map, start, end - start, 2026 VM_PROT_WRITE, ab->ps.pages, n); 2027 if (ab->ps.npages < 0) { 2028 free(ab, M_CXGBE); 2029 return (EFAULT); 2030 } 2031 2032 KASSERT(ab->ps.npages == n, 2033 ("hold_aio: page count mismatch: %d vs %d", ab->ps.npages, n)); 2034 2035 ab->ps.offset = pgoff; 2036 ab->ps.len = job->uaiocb.aio_nbytes; 2037 ab->job = job; 2038 job->backend1 = ab; 2039 #ifdef VERBOSE_TRACES 2040 CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", 2041 __func__, jobtotid(job), &ab->ps, job, ab->ps.npages); 2042 #endif 2043 return (0); 2044 } 2045 2046 static void 2047 t4_aiotx_process_job(struct toepcb *toep, struct socket *so, struct kaiocb *job) 2048 { 2049 struct adapter *sc; 2050 struct sockbuf *sb; 2051 struct file *fp; 2052 struct aiotx_buffer *ab; 2053 struct inpcb *inp; 2054 struct tcpcb *tp; 2055 struct mbuf *m; 2056 int error; 2057 bool moretocome, sendmore; 2058 2059 sc = td_adapter(toep->td); 2060 sb = &so->so_snd; 2061 SOCKBUF_UNLOCK(sb); 2062 fp = job->fd_file; 2063 ab = job->backend1; 2064 m = NULL; 2065 2066 #ifdef MAC 2067 error = mac_socket_check_send(fp->f_cred, so); 2068 if (error != 0) 2069 goto out; 2070 #endif 2071 2072 if (ab == NULL) { 2073 error = hold_aio(job); 2074 if (error != 0) 2075 goto out; 2076 ab = job->backend1; 2077 } 2078 2079 /* Inline sosend_generic(). */ 2080 2081 job->msgsnd = 1; 2082 2083 error = sblock(sb, SBL_WAIT); 2084 MPASS(error == 0); 2085 2086 sendanother: 2087 m = m_get(M_WAITOK, MT_DATA); 2088 2089 SOCKBUF_LOCK(sb); 2090 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2091 SOCKBUF_UNLOCK(sb); 2092 sbunlock(sb); 2093 if ((so->so_options & SO_NOSIGPIPE) == 0) { 2094 PROC_LOCK(job->userproc); 2095 kern_psignal(job->userproc, SIGPIPE); 2096 PROC_UNLOCK(job->userproc); 2097 } 2098 error = EPIPE; 2099 goto out; 2100 } 2101 if (so->so_error) { 2102 error = so->so_error; 2103 so->so_error = 0; 2104 SOCKBUF_UNLOCK(sb); 2105 sbunlock(sb); 2106 goto out; 2107 } 2108 if ((so->so_state & SS_ISCONNECTED) == 0) { 2109 SOCKBUF_UNLOCK(sb); 2110 sbunlock(sb); 2111 error = ENOTCONN; 2112 goto out; 2113 } 2114 if (sbspace(sb) < sb->sb_lowat) { 2115 MPASS(job->aio_sent == 0 || !(so->so_state & SS_NBIO)); 2116 2117 /* 2118 * Don't block if there is too little room in the socket 2119 * buffer. Instead, requeue the request. 2120 */ 2121 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2122 SOCKBUF_UNLOCK(sb); 2123 sbunlock(sb); 2124 error = ECANCELED; 2125 goto out; 2126 } 2127 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2128 SOCKBUF_UNLOCK(sb); 2129 sbunlock(sb); 2130 goto out; 2131 } 2132 2133 /* 2134 * Write as much data as the socket permits, but no more than a 2135 * a single sndbuf at a time. 2136 */ 2137 m->m_len = sbspace(sb); 2138 if (m->m_len > ab->ps.len - job->aio_sent) { 2139 m->m_len = ab->ps.len - job->aio_sent; 2140 moretocome = false; 2141 } else 2142 moretocome = true; 2143 if (m->m_len > sc->tt.sndbuf) { 2144 m->m_len = sc->tt.sndbuf; 2145 sendmore = true; 2146 } else 2147 sendmore = false; 2148 2149 if (!TAILQ_EMPTY(&toep->aiotx_jobq)) 2150 moretocome = true; 2151 SOCKBUF_UNLOCK(sb); 2152 MPASS(m->m_len != 0); 2153 2154 /* Inlined tcp_usr_send(). */ 2155 2156 inp = toep->inp; 2157 INP_WLOCK(inp); 2158 if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { 2159 INP_WUNLOCK(inp); 2160 sbunlock(sb); 2161 error = ECONNRESET; 2162 goto out; 2163 } 2164 2165 refcount_acquire(&ab->refcount); 2166 m_extadd(m, NULL, ab->ps.len, t4_aiotx_mbuf_free, ab, 2167 (void *)(uintptr_t)job->aio_sent, 0, EXT_NET_DRV); 2168 m->m_ext.ext_flags |= EXT_FLAG_AIOTX; 2169 job->aio_sent += m->m_len; 2170 2171 sbappendstream(sb, m, 0); 2172 m = NULL; 2173 2174 if (!(inp->inp_flags & INP_DROPPED)) { 2175 tp = intotcpcb(inp); 2176 if (moretocome) 2177 tp->t_flags |= TF_MORETOCOME; 2178 error = tp->t_fb->tfb_tcp_output(tp); 2179 if (moretocome) 2180 tp->t_flags &= ~TF_MORETOCOME; 2181 } 2182 2183 INP_WUNLOCK(inp); 2184 if (sendmore) 2185 goto sendanother; 2186 sbunlock(sb); 2187 2188 if (error) 2189 goto out; 2190 2191 /* 2192 * If this is a non-blocking socket and the request has not 2193 * been fully completed, requeue it until the socket is ready 2194 * again. 2195 */ 2196 if (job->aio_sent < job->uaiocb.aio_nbytes && 2197 !(so->so_state & SS_NBIO)) { 2198 SOCKBUF_LOCK(sb); 2199 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) { 2200 SOCKBUF_UNLOCK(sb); 2201 error = ECANCELED; 2202 goto out; 2203 } 2204 TAILQ_INSERT_HEAD(&toep->aiotx_jobq, job, list); 2205 return; 2206 } 2207 2208 /* 2209 * If the request will not be requeued, drop a reference on 2210 * the aiotx buffer. Any mbufs in flight should still 2211 * contain a reference, but this drops the reference that the 2212 * job owns while it is waiting to queue mbufs to the socket. 2213 */ 2214 free_aiotx_buffer(ab); 2215 2216 out: 2217 if (error) { 2218 if (ab != NULL) { 2219 job->aio_error = error; 2220 free_aiotx_buffer(ab); 2221 } else { 2222 MPASS(job->aio_sent == 0); 2223 aio_complete(job, -1, error); 2224 } 2225 } 2226 if (m != NULL) 2227 m_free(m); 2228 SOCKBUF_LOCK(sb); 2229 } 2230 2231 static void 2232 t4_aiotx_task(void *context, int pending) 2233 { 2234 struct toepcb *toep = context; 2235 struct inpcb *inp = toep->inp; 2236 struct socket *so = inp->inp_socket; 2237 struct kaiocb *job; 2238 2239 CURVNET_SET(toep->vnet); 2240 SOCKBUF_LOCK(&so->so_snd); 2241 while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { 2242 job = TAILQ_FIRST(&toep->aiotx_jobq); 2243 TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2244 if (!aio_clear_cancel_function(job)) 2245 continue; 2246 2247 t4_aiotx_process_job(toep, so, job); 2248 } 2249 toep->aiotx_task_active = false; 2250 SOCKBUF_UNLOCK(&so->so_snd); 2251 CURVNET_RESTORE(); 2252 2253 free_toepcb(toep); 2254 } 2255 2256 static void 2257 t4_aiotx_queue_toep(struct toepcb *toep) 2258 { 2259 2260 SOCKBUF_LOCK_ASSERT(&toep->inp->inp_socket->so_snd); 2261 #ifdef VERBOSE_TRACES 2262 CTR3(KTR_CXGBE, "%s: queueing aiotx task for tid %d, active = %s", 2263 __func__, toep->tid, toep->aiotx_task_active ? "true" : "false"); 2264 #endif 2265 if (toep->aiotx_task_active) 2266 return; 2267 toep->aiotx_task_active = true; 2268 hold_toepcb(toep); 2269 soaio_enqueue(&toep->aiotx_task); 2270 } 2271 2272 static void 2273 t4_aiotx_cancel(struct kaiocb *job) 2274 { 2275 struct aiotx_buffer *ab; 2276 struct socket *so; 2277 struct sockbuf *sb; 2278 struct tcpcb *tp; 2279 struct toepcb *toep; 2280 2281 so = job->fd_file->f_data; 2282 tp = so_sototcpcb(so); 2283 toep = tp->t_toe; 2284 MPASS(job->uaiocb.aio_lio_opcode == LIO_WRITE); 2285 sb = &so->so_snd; 2286 2287 SOCKBUF_LOCK(sb); 2288 if (!aio_cancel_cleared(job)) 2289 TAILQ_REMOVE(&toep->aiotx_jobq, job, list); 2290 SOCKBUF_UNLOCK(sb); 2291 2292 ab = job->backend1; 2293 if (ab != NULL) 2294 free_aiotx_buffer(ab); 2295 else 2296 aio_cancel(job); 2297 } 2298 2299 int 2300 t4_aio_queue_aiotx(struct socket *so, struct kaiocb *job) 2301 { 2302 struct tcpcb *tp = so_sototcpcb(so); 2303 struct toepcb *toep = tp->t_toe; 2304 struct adapter *sc = td_adapter(toep->td); 2305 2306 /* This only handles writes. */ 2307 if (job->uaiocb.aio_lio_opcode != LIO_WRITE) 2308 return (EOPNOTSUPP); 2309 2310 if (!sc->tt.tx_zcopy) 2311 return (EOPNOTSUPP); 2312 2313 SOCKBUF_LOCK(&so->so_snd); 2314 #ifdef VERBOSE_TRACES 2315 CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); 2316 #endif 2317 if (!aio_set_cancel_function(job, t4_aiotx_cancel)) 2318 panic("new job was cancelled"); 2319 TAILQ_INSERT_TAIL(&toep->aiotx_jobq, job, list); 2320 if (sowriteable(so)) 2321 t4_aiotx_queue_toep(toep); 2322 SOCKBUF_UNLOCK(&so->so_snd); 2323 return (0); 2324 } 2325 2326 void 2327 aiotx_init_toep(struct toepcb *toep) 2328 { 2329 2330 TAILQ_INIT(&toep->aiotx_jobq); 2331 TASK_INIT(&toep->aiotx_task, 0, t4_aiotx_task, toep); 2332 } 2333 #endif 2334