1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ratelimit.h" 36 37 #include <sys/param.h> 38 #include <sys/types.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/ktr.h> 42 #include <sys/lock.h> 43 #include <sys/limits.h> 44 #include <sys/module.h> 45 #include <sys/protosw.h> 46 #include <sys/domain.h> 47 #include <sys/refcount.h> 48 #include <sys/rmlock.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/taskqueue.h> 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_types.h> 55 #include <net/if_vlan_var.h> 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip6.h> 61 #include <netinet6/scope6_var.h> 62 #define TCPSTATES 63 #include <netinet/tcp_fsm.h> 64 #include <netinet/tcp_timer.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/toecore.h> 67 68 #ifdef TCP_OFFLOAD 69 #include "common/common.h" 70 #include "common/t4_msg.h" 71 #include "common/t4_regs.h" 72 #include "common/t4_regs_values.h" 73 #include "common/t4_tcb.h" 74 #include "tom/t4_tom_l2t.h" 75 #include "tom/t4_tom.h" 76 #include "tom/t4_tls.h" 77 78 static struct protosw toe_protosw; 79 static struct pr_usrreqs toe_usrreqs; 80 81 static struct protosw toe6_protosw; 82 static struct pr_usrreqs toe6_usrreqs; 83 84 /* Module ops */ 85 static int t4_tom_mod_load(void); 86 static int t4_tom_mod_unload(void); 87 static int t4_tom_modevent(module_t, int, void *); 88 89 /* ULD ops and helpers */ 90 static int t4_tom_activate(struct adapter *); 91 static int t4_tom_deactivate(struct adapter *); 92 93 static struct uld_info tom_uld_info = { 94 .uld_id = ULD_TOM, 95 .activate = t4_tom_activate, 96 .deactivate = t4_tom_deactivate, 97 }; 98 99 static void release_offload_resources(struct toepcb *); 100 static int alloc_tid_tabs(struct tid_info *); 101 static void free_tid_tabs(struct tid_info *); 102 static int add_lip(struct adapter *, struct in6_addr *); 103 static int delete_lip(struct adapter *, struct in6_addr *); 104 static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 105 static void init_clip_table(struct adapter *, struct tom_data *); 106 static void update_clip(struct adapter *, void *); 107 static void t4_clip_task(void *, int); 108 static void update_clip_table(struct adapter *, struct tom_data *); 109 static void destroy_clip_table(struct adapter *, struct tom_data *); 110 static void free_tom_data(struct adapter *, struct tom_data *); 111 static void reclaim_wr_resources(void *, int); 112 113 static int in6_ifaddr_gen; 114 static eventhandler_tag ifaddr_evhandler; 115 static struct timeout_task clip_task; 116 117 struct toepcb * 118 alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags) 119 { 120 struct port_info *pi = vi->pi; 121 struct adapter *sc = pi->adapter; 122 struct toepcb *toep; 123 int tx_credits, txsd_total, len; 124 125 /* 126 * The firmware counts tx work request credits in units of 16 bytes 127 * each. Reserve room for an ABORT_REQ so the driver never has to worry 128 * about tx credits if it wants to abort a connection. 129 */ 130 tx_credits = sc->params.ofldq_wr_cred; 131 tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 132 133 /* 134 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 135 * immediate payload, and firmware counts tx work request credits in 136 * units of 16 byte. Calculate the maximum work requests possible. 137 */ 138 txsd_total = tx_credits / 139 howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); 140 141 KASSERT(txqid >= vi->first_ofld_txq && 142 txqid < vi->first_ofld_txq + vi->nofldtxq, 143 ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi, 144 vi->first_ofld_txq, vi->nofldtxq)); 145 146 KASSERT(rxqid >= vi->first_ofld_rxq && 147 rxqid < vi->first_ofld_rxq + vi->nofldrxq, 148 ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi, 149 vi->first_ofld_rxq, vi->nofldrxq)); 150 151 len = offsetof(struct toepcb, txsd) + 152 txsd_total * sizeof(struct ofld_tx_sdesc); 153 154 toep = malloc(len, M_CXGBE, M_ZERO | flags); 155 if (toep == NULL) 156 return (NULL); 157 158 refcount_init(&toep->refcount, 1); 159 toep->td = sc->tom_softc; 160 toep->vi = vi; 161 toep->tc_idx = -1; 162 toep->tx_total = tx_credits; 163 toep->tx_credits = tx_credits; 164 toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 165 toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 166 toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 167 mbufq_init(&toep->ulp_pduq, INT_MAX); 168 mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); 169 toep->txsd_total = txsd_total; 170 toep->txsd_avail = txsd_total; 171 toep->txsd_pidx = 0; 172 toep->txsd_cidx = 0; 173 aiotx_init_toep(toep); 174 175 return (toep); 176 } 177 178 struct toepcb * 179 hold_toepcb(struct toepcb *toep) 180 { 181 182 refcount_acquire(&toep->refcount); 183 return (toep); 184 } 185 186 void 187 free_toepcb(struct toepcb *toep) 188 { 189 190 if (refcount_release(&toep->refcount) == 0) 191 return; 192 193 KASSERT(!(toep->flags & TPF_ATTACHED), 194 ("%s: attached to an inpcb", __func__)); 195 KASSERT(!(toep->flags & TPF_CPL_PENDING), 196 ("%s: CPL pending", __func__)); 197 198 if (toep->ulp_mode == ULP_MODE_TCPDDP) 199 ddp_uninit_toep(toep); 200 tls_uninit_toep(toep); 201 free(toep, M_CXGBE); 202 } 203 204 /* 205 * Set up the socket for TCP offload. 206 */ 207 void 208 offload_socket(struct socket *so, struct toepcb *toep) 209 { 210 struct tom_data *td = toep->td; 211 struct inpcb *inp = sotoinpcb(so); 212 struct tcpcb *tp = intotcpcb(inp); 213 struct sockbuf *sb; 214 215 INP_WLOCK_ASSERT(inp); 216 217 /* Update socket */ 218 sb = &so->so_snd; 219 SOCKBUF_LOCK(sb); 220 sb->sb_flags |= SB_NOCOALESCE; 221 SOCKBUF_UNLOCK(sb); 222 sb = &so->so_rcv; 223 SOCKBUF_LOCK(sb); 224 sb->sb_flags |= SB_NOCOALESCE; 225 if (inp->inp_vflag & INP_IPV6) 226 so->so_proto = &toe6_protosw; 227 else 228 so->so_proto = &toe_protosw; 229 SOCKBUF_UNLOCK(sb); 230 231 /* Update TCP PCB */ 232 tp->tod = &td->tod; 233 tp->t_toe = toep; 234 tp->t_flags |= TF_TOE; 235 236 /* Install an extra hold on inp */ 237 toep->inp = inp; 238 toep->flags |= TPF_ATTACHED; 239 in_pcbref(inp); 240 241 /* Add the TOE PCB to the active list */ 242 mtx_lock(&td->toep_list_lock); 243 TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 244 mtx_unlock(&td->toep_list_lock); 245 } 246 247 /* This is _not_ the normal way to "unoffload" a socket. */ 248 void 249 undo_offload_socket(struct socket *so) 250 { 251 struct inpcb *inp = sotoinpcb(so); 252 struct tcpcb *tp = intotcpcb(inp); 253 struct toepcb *toep = tp->t_toe; 254 struct tom_data *td = toep->td; 255 struct sockbuf *sb; 256 257 INP_WLOCK_ASSERT(inp); 258 259 sb = &so->so_snd; 260 SOCKBUF_LOCK(sb); 261 sb->sb_flags &= ~SB_NOCOALESCE; 262 SOCKBUF_UNLOCK(sb); 263 sb = &so->so_rcv; 264 SOCKBUF_LOCK(sb); 265 sb->sb_flags &= ~SB_NOCOALESCE; 266 SOCKBUF_UNLOCK(sb); 267 268 tp->tod = NULL; 269 tp->t_toe = NULL; 270 tp->t_flags &= ~TF_TOE; 271 272 toep->inp = NULL; 273 toep->flags &= ~TPF_ATTACHED; 274 if (in_pcbrele_wlocked(inp)) 275 panic("%s: inp freed.", __func__); 276 277 mtx_lock(&td->toep_list_lock); 278 TAILQ_REMOVE(&td->toep_list, toep, link); 279 mtx_unlock(&td->toep_list_lock); 280 } 281 282 static void 283 release_offload_resources(struct toepcb *toep) 284 { 285 struct tom_data *td = toep->td; 286 struct adapter *sc = td_adapter(td); 287 int tid = toep->tid; 288 289 KASSERT(!(toep->flags & TPF_CPL_PENDING), 290 ("%s: %p has CPL pending.", __func__, toep)); 291 KASSERT(!(toep->flags & TPF_ATTACHED), 292 ("%s: %p is still attached.", __func__, toep)); 293 294 CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 295 __func__, toep, tid, toep->l2te, toep->ce); 296 297 /* 298 * These queues should have been emptied at approximately the same time 299 * that a normal connection's socket's so_snd would have been purged or 300 * drained. Do _not_ clean up here. 301 */ 302 MPASS(mbufq_len(&toep->ulp_pduq) == 0); 303 MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); 304 #ifdef INVARIANTS 305 if (toep->ulp_mode == ULP_MODE_TCPDDP) 306 ddp_assert_empty(toep); 307 #endif 308 309 if (toep->l2te) 310 t4_l2t_release(toep->l2te); 311 312 if (tid >= 0) { 313 remove_tid(sc, tid, toep->ce ? 2 : 1); 314 release_tid(sc, tid, toep->ctrlq); 315 } 316 317 if (toep->ce) 318 release_lip(td, toep->ce); 319 320 if (toep->tc_idx != -1) 321 t4_release_cl_rl(sc, toep->vi->pi->port_id, toep->tc_idx); 322 323 mtx_lock(&td->toep_list_lock); 324 TAILQ_REMOVE(&td->toep_list, toep, link); 325 mtx_unlock(&td->toep_list_lock); 326 327 free_toepcb(toep); 328 } 329 330 /* 331 * The kernel is done with the TCP PCB and this is our opportunity to unhook the 332 * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 333 * pending CPL) then it is time to release all resources tied to the toepcb. 334 * 335 * Also gets called when an offloaded active open fails and the TOM wants the 336 * kernel to take the TCP PCB back. 337 */ 338 static void 339 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 340 { 341 #if defined(KTR) || defined(INVARIANTS) 342 struct inpcb *inp = tp->t_inpcb; 343 #endif 344 struct toepcb *toep = tp->t_toe; 345 346 INP_WLOCK_ASSERT(inp); 347 348 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 349 KASSERT(toep->flags & TPF_ATTACHED, 350 ("%s: not attached", __func__)); 351 352 #ifdef KTR 353 if (tp->t_state == TCPS_SYN_SENT) { 354 CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 355 __func__, toep->tid, toep, toep->flags, inp, 356 inp->inp_flags); 357 } else { 358 CTR6(KTR_CXGBE, 359 "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 360 toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 361 inp->inp_flags); 362 } 363 #endif 364 365 tp->t_toe = NULL; 366 tp->t_flags &= ~TF_TOE; 367 toep->flags &= ~TPF_ATTACHED; 368 369 if (!(toep->flags & TPF_CPL_PENDING)) 370 release_offload_resources(toep); 371 } 372 373 /* 374 * setsockopt handler. 375 */ 376 static void 377 t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) 378 { 379 struct adapter *sc = tod->tod_softc; 380 struct toepcb *toep = tp->t_toe; 381 382 if (dir == SOPT_GET) 383 return; 384 385 CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); 386 387 switch (name) { 388 case TCP_NODELAY: 389 if (tp->t_state != TCPS_ESTABLISHED) 390 break; 391 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, 392 V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1), 393 0, 0); 394 break; 395 default: 396 break; 397 } 398 } 399 400 static inline int 401 get_tcb_bit(u_char *tcb, int bit) 402 { 403 int ix, shift; 404 405 ix = 127 - (bit >> 3); 406 shift = bit & 0x7; 407 408 return ((tcb[ix] >> shift) & 1); 409 } 410 411 static inline uint64_t 412 get_tcb_bits(u_char *tcb, int hi, int lo) 413 { 414 uint64_t rc = 0; 415 416 while (hi >= lo) { 417 rc = (rc << 1) | get_tcb_bit(tcb, hi); 418 --hi; 419 } 420 421 return (rc); 422 } 423 424 /* 425 * Called by the kernel to allow the TOE driver to "refine" values filled up in 426 * the tcp_info for an offloaded connection. 427 */ 428 static void 429 t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti) 430 { 431 int i, j, k, rc; 432 struct adapter *sc = tod->tod_softc; 433 struct toepcb *toep = tp->t_toe; 434 uint32_t addr, v; 435 uint32_t buf[TCB_SIZE / sizeof(uint32_t)]; 436 u_char *tcb, tmp; 437 438 INP_WLOCK_ASSERT(tp->t_inpcb); 439 MPASS(ti != NULL); 440 441 ti->tcpi_toe_tid = toep->tid; 442 443 addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + toep->tid * TCB_SIZE; 444 rc = read_via_memwin(sc, 2, addr, &buf[0], TCB_SIZE); 445 if (rc != 0) 446 return; 447 448 tcb = (u_char *)&buf[0]; 449 for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) { 450 for (k = 0; k < 16; k++) { 451 tmp = tcb[i + k]; 452 tcb[i + k] = tcb[j + k]; 453 tcb[j + k] = tmp; 454 } 455 } 456 457 ti->tcpi_state = get_tcb_bits(tcb, 115, 112); 458 459 v = get_tcb_bits(tcb, 271, 256); 460 ti->tcpi_rtt = tcp_ticks_to_us(sc, v); 461 462 v = get_tcb_bits(tcb, 287, 272); 463 ti->tcpi_rttvar = tcp_ticks_to_us(sc, v); 464 465 ti->tcpi_snd_ssthresh = get_tcb_bits(tcb, 487, 460); 466 ti->tcpi_snd_cwnd = get_tcb_bits(tcb, 459, 432); 467 ti->tcpi_rcv_nxt = get_tcb_bits(tcb, 553, 522); 468 469 ti->tcpi_snd_nxt = get_tcb_bits(tcb, 319, 288) - 470 get_tcb_bits(tcb, 375, 348); 471 472 /* Receive window being advertised by us. */ 473 ti->tcpi_rcv_space = get_tcb_bits(tcb, 581, 554); 474 475 /* Send window ceiling. */ 476 v = get_tcb_bits(tcb, 159, 144) << get_tcb_bits(tcb, 131, 128); 477 ti->tcpi_snd_wnd = min(v, ti->tcpi_snd_cwnd); 478 } 479 480 /* 481 * The TOE driver will not receive any more CPLs for the tid associated with the 482 * toepcb; release the hold on the inpcb. 483 */ 484 void 485 final_cpl_received(struct toepcb *toep) 486 { 487 struct inpcb *inp = toep->inp; 488 489 KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 490 INP_WLOCK_ASSERT(inp); 491 KASSERT(toep->flags & TPF_CPL_PENDING, 492 ("%s: CPL not pending already?", __func__)); 493 494 CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 495 __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 496 497 if (toep->ulp_mode == ULP_MODE_TCPDDP) 498 release_ddp_resources(toep); 499 toep->inp = NULL; 500 toep->flags &= ~TPF_CPL_PENDING; 501 mbufq_drain(&toep->ulp_pdu_reclaimq); 502 503 if (!(toep->flags & TPF_ATTACHED)) 504 release_offload_resources(toep); 505 506 if (!in_pcbrele_wlocked(inp)) 507 INP_WUNLOCK(inp); 508 } 509 510 void 511 insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) 512 { 513 struct tid_info *t = &sc->tids; 514 515 MPASS(tid >= t->tid_base); 516 MPASS(tid - t->tid_base < t->ntids); 517 518 t->tid_tab[tid - t->tid_base] = ctx; 519 atomic_add_int(&t->tids_in_use, ntids); 520 } 521 522 void * 523 lookup_tid(struct adapter *sc, int tid) 524 { 525 struct tid_info *t = &sc->tids; 526 527 return (t->tid_tab[tid - t->tid_base]); 528 } 529 530 void 531 update_tid(struct adapter *sc, int tid, void *ctx) 532 { 533 struct tid_info *t = &sc->tids; 534 535 t->tid_tab[tid - t->tid_base] = ctx; 536 } 537 538 void 539 remove_tid(struct adapter *sc, int tid, int ntids) 540 { 541 struct tid_info *t = &sc->tids; 542 543 t->tid_tab[tid - t->tid_base] = NULL; 544 atomic_subtract_int(&t->tids_in_use, ntids); 545 } 546 547 /* 548 * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt 549 * have the MSS that we should advertise in our SYN. Advertised MSS doesn't 550 * account for any TCP options so the effective MSS (only payload, no headers or 551 * options) could be different. We fill up tp->t_maxseg with the effective MSS 552 * at the end of the 3-way handshake. 553 */ 554 int 555 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, 556 struct offload_settings *s) 557 { 558 unsigned short *mtus = &sc->params.mtus[0]; 559 int i, mss, mtu; 560 561 MPASS(inc != NULL); 562 563 mss = s->mss > 0 ? s->mss : tcp_mssopt(inc); 564 if (inc->inc_flags & INC_ISIPV6) 565 mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 566 else 567 mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr); 568 569 for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++) 570 continue; 571 572 return (i); 573 } 574 575 /* 576 * Determine the receive window size for a socket. 577 */ 578 u_long 579 select_rcv_wnd(struct socket *so) 580 { 581 unsigned long wnd; 582 583 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 584 585 wnd = sbspace(&so->so_rcv); 586 if (wnd < MIN_RCV_WND) 587 wnd = MIN_RCV_WND; 588 589 return min(wnd, MAX_RCV_WND); 590 } 591 592 int 593 select_rcv_wscale(void) 594 { 595 int wscale = 0; 596 unsigned long space = sb_max; 597 598 if (space > MAX_RCV_WND) 599 space = MAX_RCV_WND; 600 601 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 602 wscale++; 603 604 return (wscale); 605 } 606 607 /* 608 * socket so could be a listening socket too. 609 */ 610 uint64_t 611 calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e, 612 int mtu_idx, int rscale, int rx_credits, int ulp_mode, 613 struct offload_settings *s) 614 { 615 int keepalive; 616 uint64_t opt0; 617 618 MPASS(so != NULL); 619 MPASS(vi != NULL); 620 KASSERT(rx_credits <= M_RCV_BUFSIZ, 621 ("%s: rcv_bufsiz too high", __func__)); 622 623 opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 624 V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits) | 625 V_L2T_IDX(e->idx) | V_SMAC_SEL(vi->smt_idx) | 626 V_TX_CHAN(vi->pi->tx_chan); 627 628 keepalive = tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE; 629 opt0 |= V_KEEP_ALIVE(keepalive != 0); 630 631 if (s->nagle < 0) { 632 struct inpcb *inp = sotoinpcb(so); 633 struct tcpcb *tp = intotcpcb(inp); 634 635 opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 636 } else 637 opt0 |= V_NAGLE(s->nagle != 0); 638 639 return htobe64(opt0); 640 } 641 642 uint64_t 643 select_ntuple(struct vi_info *vi, struct l2t_entry *e) 644 { 645 struct adapter *sc = vi->pi->adapter; 646 struct tp_params *tp = &sc->params.tp; 647 uint16_t viid = vi->viid; 648 uint64_t ntuple = 0; 649 650 /* 651 * Initialize each of the fields which we care about which are present 652 * in the Compressed Filter Tuple. 653 */ 654 if (tp->vlan_shift >= 0 && EVL_VLANOFTAG(e->vlan) != CPL_L2T_VLAN_NONE) 655 ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; 656 657 if (tp->port_shift >= 0) 658 ntuple |= (uint64_t)e->lport << tp->port_shift; 659 660 if (tp->protocol_shift >= 0) 661 ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; 662 663 if (tp->vnic_shift >= 0 && tp->ingress_config & F_VNIC) { 664 uint32_t vf = G_FW_VIID_VIN(viid); 665 uint32_t pf = G_FW_VIID_PFN(viid); 666 uint32_t vld = G_FW_VIID_VIVLD(viid); 667 668 ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) | 669 V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; 670 } 671 672 if (is_t4(sc)) 673 return (htobe32((uint32_t)ntuple)); 674 else 675 return (htobe64(V_FILTER_TUPLE(ntuple))); 676 } 677 678 static int 679 is_tls_sock(struct socket *so, struct adapter *sc) 680 { 681 struct inpcb *inp = sotoinpcb(so); 682 int i, rc; 683 684 /* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */ 685 rc = 0; 686 ADAPTER_LOCK(sc); 687 for (i = 0; i < sc->tt.num_tls_rx_ports; i++) { 688 if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) || 689 inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) { 690 rc = 1; 691 break; 692 } 693 } 694 ADAPTER_UNLOCK(sc); 695 return (rc); 696 } 697 698 int 699 select_ulp_mode(struct socket *so, struct adapter *sc, 700 struct offload_settings *s) 701 { 702 703 if (can_tls_offload(sc) && 704 (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc)))) 705 return (ULP_MODE_TLS); 706 else if (s->ddp > 0 || 707 (s->ddp < 0 && sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)) 708 return (ULP_MODE_TCPDDP); 709 else 710 return (ULP_MODE_NONE); 711 } 712 713 void 714 set_ulp_mode(struct toepcb *toep, int ulp_mode) 715 { 716 717 CTR4(KTR_CXGBE, "%s: toep %p (tid %d) ulp_mode %d", 718 __func__, toep, toep->tid, ulp_mode); 719 toep->ulp_mode = ulp_mode; 720 tls_init_toep(toep); 721 if (toep->ulp_mode == ULP_MODE_TCPDDP) 722 ddp_init_toep(toep); 723 } 724 725 int 726 negative_advice(int status) 727 { 728 729 return (status == CPL_ERR_RTX_NEG_ADVICE || 730 status == CPL_ERR_PERSIST_NEG_ADVICE || 731 status == CPL_ERR_KEEPALV_NEG_ADVICE); 732 } 733 734 static int 735 alloc_tid_tab(struct tid_info *t, int flags) 736 { 737 738 MPASS(t->ntids > 0); 739 MPASS(t->tid_tab == NULL); 740 741 t->tid_tab = malloc(t->ntids * sizeof(*t->tid_tab), M_CXGBE, 742 M_ZERO | flags); 743 if (t->tid_tab == NULL) 744 return (ENOMEM); 745 atomic_store_rel_int(&t->tids_in_use, 0); 746 747 return (0); 748 } 749 750 static void 751 free_tid_tab(struct tid_info *t) 752 { 753 754 KASSERT(t->tids_in_use == 0, 755 ("%s: %d tids still in use.", __func__, t->tids_in_use)); 756 757 free(t->tid_tab, M_CXGBE); 758 t->tid_tab = NULL; 759 } 760 761 static int 762 alloc_stid_tab(struct tid_info *t, int flags) 763 { 764 765 MPASS(t->nstids > 0); 766 MPASS(t->stid_tab == NULL); 767 768 t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE, 769 M_ZERO | flags); 770 if (t->stid_tab == NULL) 771 return (ENOMEM); 772 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 773 t->stids_in_use = 0; 774 TAILQ_INIT(&t->stids); 775 t->nstids_free_head = t->nstids; 776 777 return (0); 778 } 779 780 static void 781 free_stid_tab(struct tid_info *t) 782 { 783 784 KASSERT(t->stids_in_use == 0, 785 ("%s: %d tids still in use.", __func__, t->stids_in_use)); 786 787 if (mtx_initialized(&t->stid_lock)) 788 mtx_destroy(&t->stid_lock); 789 free(t->stid_tab, M_CXGBE); 790 t->stid_tab = NULL; 791 } 792 793 static void 794 free_tid_tabs(struct tid_info *t) 795 { 796 797 free_tid_tab(t); 798 free_atid_tab(t); 799 free_stid_tab(t); 800 } 801 802 static int 803 alloc_tid_tabs(struct tid_info *t) 804 { 805 int rc; 806 807 rc = alloc_tid_tab(t, M_NOWAIT); 808 if (rc != 0) 809 goto failed; 810 811 rc = alloc_atid_tab(t, M_NOWAIT); 812 if (rc != 0) 813 goto failed; 814 815 rc = alloc_stid_tab(t, M_NOWAIT); 816 if (rc != 0) 817 goto failed; 818 819 return (0); 820 failed: 821 free_tid_tabs(t); 822 return (rc); 823 } 824 825 static int 826 add_lip(struct adapter *sc, struct in6_addr *lip) 827 { 828 struct fw_clip_cmd c; 829 830 ASSERT_SYNCHRONIZED_OP(sc); 831 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 832 833 memset(&c, 0, sizeof(c)); 834 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 835 F_FW_CMD_WRITE); 836 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 837 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 838 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 839 840 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 841 } 842 843 static int 844 delete_lip(struct adapter *sc, struct in6_addr *lip) 845 { 846 struct fw_clip_cmd c; 847 848 ASSERT_SYNCHRONIZED_OP(sc); 849 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 850 851 memset(&c, 0, sizeof(c)); 852 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 853 F_FW_CMD_READ); 854 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 855 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 856 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 857 858 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 859 } 860 861 static struct clip_entry * 862 search_lip(struct tom_data *td, struct in6_addr *lip) 863 { 864 struct clip_entry *ce; 865 866 mtx_assert(&td->clip_table_lock, MA_OWNED); 867 868 TAILQ_FOREACH(ce, &td->clip_table, link) { 869 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 870 return (ce); 871 } 872 873 return (NULL); 874 } 875 876 struct clip_entry * 877 hold_lip(struct tom_data *td, struct in6_addr *lip, struct clip_entry *ce) 878 { 879 880 mtx_lock(&td->clip_table_lock); 881 if (ce == NULL) 882 ce = search_lip(td, lip); 883 if (ce != NULL) 884 ce->refcount++; 885 mtx_unlock(&td->clip_table_lock); 886 887 return (ce); 888 } 889 890 void 891 release_lip(struct tom_data *td, struct clip_entry *ce) 892 { 893 894 mtx_lock(&td->clip_table_lock); 895 KASSERT(search_lip(td, &ce->lip) == ce, 896 ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 897 KASSERT(ce->refcount > 0, 898 ("%s: CLIP entry %p has refcount 0", __func__, ce)); 899 --ce->refcount; 900 mtx_unlock(&td->clip_table_lock); 901 } 902 903 static void 904 init_clip_table(struct adapter *sc, struct tom_data *td) 905 { 906 907 ASSERT_SYNCHRONIZED_OP(sc); 908 909 mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 910 TAILQ_INIT(&td->clip_table); 911 td->clip_gen = -1; 912 913 update_clip_table(sc, td); 914 } 915 916 static void 917 update_clip(struct adapter *sc, void *arg __unused) 918 { 919 920 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc")) 921 return; 922 923 if (uld_active(sc, ULD_TOM)) 924 update_clip_table(sc, sc->tom_softc); 925 926 end_synchronized_op(sc, LOCK_HELD); 927 } 928 929 static void 930 t4_clip_task(void *arg, int count) 931 { 932 933 t4_iterate(update_clip, NULL); 934 } 935 936 static void 937 update_clip_table(struct adapter *sc, struct tom_data *td) 938 { 939 struct rm_priotracker in6_ifa_tracker; 940 struct in6_ifaddr *ia; 941 struct in6_addr *lip, tlip; 942 struct clip_head stale; 943 struct clip_entry *ce, *ce_temp; 944 struct vi_info *vi; 945 int rc, gen, i, j; 946 uintptr_t last_vnet; 947 948 ASSERT_SYNCHRONIZED_OP(sc); 949 950 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 951 mtx_lock(&td->clip_table_lock); 952 953 gen = atomic_load_acq_int(&in6_ifaddr_gen); 954 if (gen == td->clip_gen) 955 goto done; 956 957 TAILQ_INIT(&stale); 958 TAILQ_CONCAT(&stale, &td->clip_table, link); 959 960 /* 961 * last_vnet optimizes the common cases where all if_vnet = NULL (no 962 * VIMAGE) or all if_vnet = vnet0. 963 */ 964 last_vnet = (uintptr_t)(-1); 965 for_each_port(sc, i) 966 for_each_vi(sc->port[i], j, vi) { 967 if (last_vnet == (uintptr_t)vi->ifp->if_vnet) 968 continue; 969 970 /* XXX: races with if_vmove */ 971 CURVNET_SET(vi->ifp->if_vnet); 972 CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 973 lip = &ia->ia_addr.sin6_addr; 974 975 KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 976 ("%s: mcast address in in6_ifaddr list", __func__)); 977 978 if (IN6_IS_ADDR_LOOPBACK(lip)) 979 continue; 980 if (IN6_IS_SCOPE_EMBED(lip)) { 981 /* Remove the embedded scope */ 982 tlip = *lip; 983 lip = &tlip; 984 in6_clearscope(lip); 985 } 986 /* 987 * XXX: how to weed out the link local address for the 988 * loopback interface? It's fe80::1 usually (always?). 989 */ 990 991 /* 992 * If it's in the main list then we already know it's 993 * not stale. 994 */ 995 TAILQ_FOREACH(ce, &td->clip_table, link) { 996 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 997 goto next; 998 } 999 1000 /* 1001 * If it's in the stale list we should move it to the 1002 * main list. 1003 */ 1004 TAILQ_FOREACH(ce, &stale, link) { 1005 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { 1006 TAILQ_REMOVE(&stale, ce, link); 1007 TAILQ_INSERT_TAIL(&td->clip_table, ce, 1008 link); 1009 goto next; 1010 } 1011 } 1012 1013 /* A new IP6 address; add it to the CLIP table */ 1014 ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 1015 memcpy(&ce->lip, lip, sizeof(ce->lip)); 1016 ce->refcount = 0; 1017 rc = add_lip(sc, lip); 1018 if (rc == 0) 1019 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 1020 else { 1021 char ip[INET6_ADDRSTRLEN]; 1022 1023 inet_ntop(AF_INET6, &ce->lip, &ip[0], 1024 sizeof(ip)); 1025 log(LOG_ERR, "%s: could not add %s (%d)\n", 1026 __func__, ip, rc); 1027 free(ce, M_CXGBE); 1028 } 1029 next: 1030 continue; 1031 } 1032 CURVNET_RESTORE(); 1033 last_vnet = (uintptr_t)vi->ifp->if_vnet; 1034 } 1035 1036 /* 1037 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are 1038 * no longer referenced by the driver. 1039 */ 1040 TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { 1041 if (ce->refcount == 0) { 1042 rc = delete_lip(sc, &ce->lip); 1043 if (rc == 0) { 1044 TAILQ_REMOVE(&stale, ce, link); 1045 free(ce, M_CXGBE); 1046 } else { 1047 char ip[INET6_ADDRSTRLEN]; 1048 1049 inet_ntop(AF_INET6, &ce->lip, &ip[0], 1050 sizeof(ip)); 1051 log(LOG_ERR, "%s: could not delete %s (%d)\n", 1052 __func__, ip, rc); 1053 } 1054 } 1055 } 1056 /* The ones that are still referenced need to stay in the CLIP table */ 1057 TAILQ_CONCAT(&td->clip_table, &stale, link); 1058 1059 td->clip_gen = gen; 1060 done: 1061 mtx_unlock(&td->clip_table_lock); 1062 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 1063 } 1064 1065 static void 1066 destroy_clip_table(struct adapter *sc, struct tom_data *td) 1067 { 1068 struct clip_entry *ce, *ce_temp; 1069 1070 if (mtx_initialized(&td->clip_table_lock)) { 1071 mtx_lock(&td->clip_table_lock); 1072 TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 1073 KASSERT(ce->refcount == 0, 1074 ("%s: CLIP entry %p still in use (%d)", __func__, 1075 ce, ce->refcount)); 1076 TAILQ_REMOVE(&td->clip_table, ce, link); 1077 delete_lip(sc, &ce->lip); 1078 free(ce, M_CXGBE); 1079 } 1080 mtx_unlock(&td->clip_table_lock); 1081 mtx_destroy(&td->clip_table_lock); 1082 } 1083 } 1084 1085 static void 1086 free_tom_data(struct adapter *sc, struct tom_data *td) 1087 { 1088 1089 ASSERT_SYNCHRONIZED_OP(sc); 1090 1091 KASSERT(TAILQ_EMPTY(&td->toep_list), 1092 ("%s: TOE PCB list is not empty.", __func__)); 1093 KASSERT(td->lctx_count == 0, 1094 ("%s: lctx hash table is not empty.", __func__)); 1095 1096 t4_free_ppod_region(&td->pr); 1097 destroy_clip_table(sc, td); 1098 1099 if (td->listen_mask != 0) 1100 hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 1101 1102 if (mtx_initialized(&td->unsent_wr_lock)) 1103 mtx_destroy(&td->unsent_wr_lock); 1104 if (mtx_initialized(&td->lctx_hash_lock)) 1105 mtx_destroy(&td->lctx_hash_lock); 1106 if (mtx_initialized(&td->toep_list_lock)) 1107 mtx_destroy(&td->toep_list_lock); 1108 1109 free_tid_tabs(&sc->tids); 1110 free(td, M_CXGBE); 1111 } 1112 1113 static char * 1114 prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen, 1115 int *buflen) 1116 { 1117 char *pkt; 1118 struct tcphdr *th; 1119 int ipv6, len; 1120 const int maxlen = 1121 max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) + 1122 max(sizeof(struct ip), sizeof(struct ip6_hdr)) + 1123 sizeof(struct tcphdr); 1124 1125 MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN); 1126 1127 pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT); 1128 if (pkt == NULL) 1129 return (NULL); 1130 1131 ipv6 = inp->inp_vflag & INP_IPV6; 1132 len = 0; 1133 1134 if (EVL_VLANOFTAG(vtag) == 0xfff) { 1135 struct ether_header *eh = (void *)pkt; 1136 1137 if (ipv6) 1138 eh->ether_type = htons(ETHERTYPE_IPV6); 1139 else 1140 eh->ether_type = htons(ETHERTYPE_IP); 1141 1142 len += sizeof(*eh); 1143 } else { 1144 struct ether_vlan_header *evh = (void *)pkt; 1145 1146 evh->evl_encap_proto = htons(ETHERTYPE_VLAN); 1147 evh->evl_tag = htons(vtag); 1148 if (ipv6) 1149 evh->evl_proto = htons(ETHERTYPE_IPV6); 1150 else 1151 evh->evl_proto = htons(ETHERTYPE_IP); 1152 1153 len += sizeof(*evh); 1154 } 1155 1156 if (ipv6) { 1157 struct ip6_hdr *ip6 = (void *)&pkt[len]; 1158 1159 ip6->ip6_vfc = IPV6_VERSION; 1160 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 1161 ip6->ip6_nxt = IPPROTO_TCP; 1162 if (open_type == OPEN_TYPE_ACTIVE) { 1163 ip6->ip6_src = inp->in6p_laddr; 1164 ip6->ip6_dst = inp->in6p_faddr; 1165 } else if (open_type == OPEN_TYPE_LISTEN) { 1166 ip6->ip6_src = inp->in6p_laddr; 1167 ip6->ip6_dst = ip6->ip6_src; 1168 } 1169 1170 len += sizeof(*ip6); 1171 } else { 1172 struct ip *ip = (void *)&pkt[len]; 1173 1174 ip->ip_v = IPVERSION; 1175 ip->ip_hl = sizeof(*ip) >> 2; 1176 ip->ip_tos = inp->inp_ip_tos; 1177 ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr)); 1178 ip->ip_ttl = inp->inp_ip_ttl; 1179 ip->ip_p = IPPROTO_TCP; 1180 if (open_type == OPEN_TYPE_ACTIVE) { 1181 ip->ip_src = inp->inp_laddr; 1182 ip->ip_dst = inp->inp_faddr; 1183 } else if (open_type == OPEN_TYPE_LISTEN) { 1184 ip->ip_src = inp->inp_laddr; 1185 ip->ip_dst = ip->ip_src; 1186 } 1187 1188 len += sizeof(*ip); 1189 } 1190 1191 th = (void *)&pkt[len]; 1192 if (open_type == OPEN_TYPE_ACTIVE) { 1193 th->th_sport = inp->inp_lport; /* network byte order already */ 1194 th->th_dport = inp->inp_fport; /* ditto */ 1195 } else if (open_type == OPEN_TYPE_LISTEN) { 1196 th->th_sport = inp->inp_lport; /* network byte order already */ 1197 th->th_dport = th->th_sport; 1198 } 1199 len += sizeof(th); 1200 1201 *pktlen = *buflen = len; 1202 return (pkt); 1203 } 1204 1205 const struct offload_settings * 1206 lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m, 1207 uint16_t vtag, struct inpcb *inp) 1208 { 1209 const struct t4_offload_policy *op; 1210 char *pkt; 1211 struct offload_rule *r; 1212 int i, matched, pktlen, buflen; 1213 static const struct offload_settings allow_offloading_settings = { 1214 .offload = 1, 1215 .rx_coalesce = -1, 1216 .cong_algo = -1, 1217 .sched_class = -1, 1218 .tstamp = -1, 1219 .sack = -1, 1220 .nagle = -1, 1221 .ecn = -1, 1222 .ddp = -1, 1223 .tls = -1, 1224 .txq = -1, 1225 .rxq = -1, 1226 .mss = -1, 1227 }; 1228 static const struct offload_settings disallow_offloading_settings = { 1229 .offload = 0, 1230 /* rest is irrelevant when offload is off. */ 1231 }; 1232 1233 rw_assert(&sc->policy_lock, RA_LOCKED); 1234 1235 /* 1236 * If there's no Connection Offloading Policy attached to the device 1237 * then we need to return a default static policy. If 1238 * "cop_managed_offloading" is true, then we need to disallow 1239 * offloading until a COP is attached to the device. Otherwise we 1240 * allow offloading ... 1241 */ 1242 op = sc->policy; 1243 if (op == NULL) { 1244 if (sc->tt.cop_managed_offloading) 1245 return (&disallow_offloading_settings); 1246 else 1247 return (&allow_offloading_settings); 1248 } 1249 1250 switch (open_type) { 1251 case OPEN_TYPE_ACTIVE: 1252 case OPEN_TYPE_LISTEN: 1253 pkt = prepare_pkt(open_type, vtag, inp, &pktlen, &buflen); 1254 break; 1255 case OPEN_TYPE_PASSIVE: 1256 MPASS(m != NULL); 1257 pkt = mtod(m, char *); 1258 MPASS(*pkt == CPL_PASS_ACCEPT_REQ); 1259 pkt += sizeof(struct cpl_pass_accept_req); 1260 pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req); 1261 buflen = m->m_len - sizeof(struct cpl_pass_accept_req); 1262 break; 1263 default: 1264 MPASS(0); 1265 return (&disallow_offloading_settings); 1266 } 1267 1268 if (pkt == NULL || pktlen == 0 || buflen == 0) 1269 return (&disallow_offloading_settings); 1270 1271 matched = 0; 1272 r = &op->rule[0]; 1273 for (i = 0; i < op->nrules; i++, r++) { 1274 if (r->open_type != open_type && 1275 r->open_type != OPEN_TYPE_DONTCARE) { 1276 continue; 1277 } 1278 matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen); 1279 if (matched) 1280 break; 1281 } 1282 1283 if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN) 1284 free(pkt, M_CXGBE); 1285 1286 return (matched ? &r->settings : &disallow_offloading_settings); 1287 } 1288 1289 static void 1290 reclaim_wr_resources(void *arg, int count) 1291 { 1292 struct tom_data *td = arg; 1293 STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); 1294 struct cpl_act_open_req *cpl; 1295 u_int opcode, atid; 1296 struct wrqe *wr; 1297 struct adapter *sc; 1298 1299 mtx_lock(&td->unsent_wr_lock); 1300 STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); 1301 mtx_unlock(&td->unsent_wr_lock); 1302 1303 while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { 1304 STAILQ_REMOVE_HEAD(&twr_list, link); 1305 1306 cpl = wrtod(wr); 1307 opcode = GET_OPCODE(cpl); 1308 1309 switch (opcode) { 1310 case CPL_ACT_OPEN_REQ: 1311 case CPL_ACT_OPEN_REQ6: 1312 atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); 1313 sc = td_adapter(td); 1314 1315 CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); 1316 act_open_failure_cleanup(sc, atid, EHOSTUNREACH); 1317 free(wr, M_CXGBE); 1318 break; 1319 default: 1320 log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " 1321 "opcode %x\n", __func__, wr, wr->wr_len, opcode); 1322 /* WR not freed here; go look at it with a debugger. */ 1323 } 1324 } 1325 } 1326 1327 /* 1328 * Ground control to Major TOM 1329 * Commencing countdown, engines on 1330 */ 1331 static int 1332 t4_tom_activate(struct adapter *sc) 1333 { 1334 struct tom_data *td; 1335 struct toedev *tod; 1336 struct vi_info *vi; 1337 int i, rc, v; 1338 1339 ASSERT_SYNCHRONIZED_OP(sc); 1340 1341 /* per-adapter softc for TOM */ 1342 td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 1343 if (td == NULL) 1344 return (ENOMEM); 1345 1346 /* List of TOE PCBs and associated lock */ 1347 mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 1348 TAILQ_INIT(&td->toep_list); 1349 1350 /* Listen context */ 1351 mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 1352 td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 1353 &td->listen_mask, HASH_NOWAIT); 1354 1355 /* List of WRs for which L2 resolution failed */ 1356 mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); 1357 STAILQ_INIT(&td->unsent_wr_list); 1358 TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); 1359 1360 /* TID tables */ 1361 rc = alloc_tid_tabs(&sc->tids); 1362 if (rc != 0) 1363 goto done; 1364 1365 rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, 1366 t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); 1367 if (rc != 0) 1368 goto done; 1369 t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, 1370 V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); 1371 1372 /* CLIP table for IPv6 offload */ 1373 init_clip_table(sc, td); 1374 1375 /* toedev ops */ 1376 tod = &td->tod; 1377 init_toedev(tod); 1378 tod->tod_softc = sc; 1379 tod->tod_connect = t4_connect; 1380 tod->tod_listen_start = t4_listen_start; 1381 tod->tod_listen_stop = t4_listen_stop; 1382 tod->tod_rcvd = t4_rcvd; 1383 tod->tod_output = t4_tod_output; 1384 tod->tod_send_rst = t4_send_rst; 1385 tod->tod_send_fin = t4_send_fin; 1386 tod->tod_pcb_detach = t4_pcb_detach; 1387 tod->tod_l2_update = t4_l2_update; 1388 tod->tod_syncache_added = t4_syncache_added; 1389 tod->tod_syncache_removed = t4_syncache_removed; 1390 tod->tod_syncache_respond = t4_syncache_respond; 1391 tod->tod_offload_socket = t4_offload_socket; 1392 tod->tod_ctloutput = t4_ctloutput; 1393 tod->tod_tcp_info = t4_tcp_info; 1394 1395 for_each_port(sc, i) { 1396 for_each_vi(sc->port[i], v, vi) { 1397 TOEDEV(vi->ifp) = &td->tod; 1398 } 1399 } 1400 1401 sc->tom_softc = td; 1402 register_toedev(sc->tom_softc); 1403 1404 done: 1405 if (rc != 0) 1406 free_tom_data(sc, td); 1407 return (rc); 1408 } 1409 1410 static int 1411 t4_tom_deactivate(struct adapter *sc) 1412 { 1413 int rc = 0; 1414 struct tom_data *td = sc->tom_softc; 1415 1416 ASSERT_SYNCHRONIZED_OP(sc); 1417 1418 if (td == NULL) 1419 return (0); /* XXX. KASSERT? */ 1420 1421 if (sc->offload_map != 0) 1422 return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 1423 1424 if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) 1425 return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ 1426 1427 mtx_lock(&td->toep_list_lock); 1428 if (!TAILQ_EMPTY(&td->toep_list)) 1429 rc = EBUSY; 1430 mtx_unlock(&td->toep_list_lock); 1431 1432 mtx_lock(&td->lctx_hash_lock); 1433 if (td->lctx_count > 0) 1434 rc = EBUSY; 1435 mtx_unlock(&td->lctx_hash_lock); 1436 1437 taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); 1438 mtx_lock(&td->unsent_wr_lock); 1439 if (!STAILQ_EMPTY(&td->unsent_wr_list)) 1440 rc = EBUSY; 1441 mtx_unlock(&td->unsent_wr_lock); 1442 1443 if (rc == 0) { 1444 unregister_toedev(sc->tom_softc); 1445 free_tom_data(sc, td); 1446 sc->tom_softc = NULL; 1447 } 1448 1449 return (rc); 1450 } 1451 1452 static void 1453 t4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp) 1454 { 1455 1456 atomic_add_rel_int(&in6_ifaddr_gen, 1); 1457 taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); 1458 } 1459 1460 static int 1461 t4_aio_queue_tom(struct socket *so, struct kaiocb *job) 1462 { 1463 struct tcpcb *tp = so_sototcpcb(so); 1464 struct toepcb *toep = tp->t_toe; 1465 int error; 1466 1467 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1468 error = t4_aio_queue_ddp(so, job); 1469 if (error != EOPNOTSUPP) 1470 return (error); 1471 } 1472 1473 return (t4_aio_queue_aiotx(so, job)); 1474 } 1475 1476 static int 1477 t4_ctloutput_tom(struct socket *so, struct sockopt *sopt) 1478 { 1479 1480 if (sopt->sopt_level != IPPROTO_TCP) 1481 return (tcp_ctloutput(so, sopt)); 1482 1483 switch (sopt->sopt_name) { 1484 case TCP_TLSOM_SET_TLS_CONTEXT: 1485 case TCP_TLSOM_GET_TLS_TOM: 1486 case TCP_TLSOM_CLR_TLS_TOM: 1487 case TCP_TLSOM_CLR_QUIES: 1488 return (t4_ctloutput_tls(so, sopt)); 1489 default: 1490 return (tcp_ctloutput(so, sopt)); 1491 } 1492 } 1493 1494 static int 1495 t4_tom_mod_load(void) 1496 { 1497 struct protosw *tcp_protosw, *tcp6_protosw; 1498 1499 /* CPL handlers */ 1500 t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, do_l2t_write_rpl2, 1501 CPL_COOKIE_TOM); 1502 t4_init_connect_cpl_handlers(); 1503 t4_init_listen_cpl_handlers(); 1504 t4_init_cpl_io_handlers(); 1505 1506 t4_ddp_mod_load(); 1507 t4_tls_mod_load(); 1508 1509 tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 1510 if (tcp_protosw == NULL) 1511 return (ENOPROTOOPT); 1512 bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw)); 1513 bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs)); 1514 toe_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1515 toe_protosw.pr_ctloutput = t4_ctloutput_tom; 1516 toe_protosw.pr_usrreqs = &toe_usrreqs; 1517 1518 tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 1519 if (tcp6_protosw == NULL) 1520 return (ENOPROTOOPT); 1521 bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); 1522 bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs)); 1523 toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1524 toe6_protosw.pr_ctloutput = t4_ctloutput_tom; 1525 toe6_protosw.pr_usrreqs = &toe6_usrreqs; 1526 1527 TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); 1528 ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event, 1529 t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); 1530 1531 return (t4_register_uld(&tom_uld_info)); 1532 } 1533 1534 static void 1535 tom_uninit(struct adapter *sc, void *arg __unused) 1536 { 1537 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) 1538 return; 1539 1540 /* Try to free resources (works only if no port has IFCAP_TOE) */ 1541 if (uld_active(sc, ULD_TOM)) 1542 t4_deactivate_uld(sc, ULD_TOM); 1543 1544 end_synchronized_op(sc, 0); 1545 } 1546 1547 static int 1548 t4_tom_mod_unload(void) 1549 { 1550 t4_iterate(tom_uninit, NULL); 1551 1552 if (t4_unregister_uld(&tom_uld_info) == EBUSY) 1553 return (EBUSY); 1554 1555 if (ifaddr_evhandler) { 1556 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler); 1557 taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); 1558 } 1559 1560 t4_tls_mod_unload(); 1561 t4_ddp_mod_unload(); 1562 1563 t4_uninit_connect_cpl_handlers(); 1564 t4_uninit_listen_cpl_handlers(); 1565 t4_uninit_cpl_io_handlers(); 1566 t4_register_shared_cpl_handler(CPL_L2T_WRITE_RPL, NULL, CPL_COOKIE_TOM); 1567 1568 return (0); 1569 } 1570 #endif /* TCP_OFFLOAD */ 1571 1572 static int 1573 t4_tom_modevent(module_t mod, int cmd, void *arg) 1574 { 1575 int rc = 0; 1576 1577 #ifdef TCP_OFFLOAD 1578 switch (cmd) { 1579 case MOD_LOAD: 1580 rc = t4_tom_mod_load(); 1581 break; 1582 1583 case MOD_UNLOAD: 1584 rc = t4_tom_mod_unload(); 1585 break; 1586 1587 default: 1588 rc = EINVAL; 1589 } 1590 #else 1591 printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 1592 rc = EOPNOTSUPP; 1593 #endif 1594 return (rc); 1595 } 1596 1597 static moduledata_t t4_tom_moddata= { 1598 "t4_tom", 1599 t4_tom_modevent, 1600 0 1601 }; 1602 1603 MODULE_VERSION(t4_tom, 1); 1604 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 1605 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 1606 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 1607