1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2012 Chelsio Communications, Inc. 5 * All rights reserved. 6 * Written by: Navdeep Parhar <np@FreeBSD.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_ratelimit.h" 36 37 #include <sys/param.h> 38 #include <sys/types.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/ktr.h> 42 #include <sys/lock.h> 43 #include <sys/limits.h> 44 #include <sys/module.h> 45 #include <sys/protosw.h> 46 #include <sys/domain.h> 47 #include <sys/refcount.h> 48 #include <sys/rmlock.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/taskqueue.h> 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_types.h> 55 #include <net/if_vlan_var.h> 56 #include <netinet/in.h> 57 #include <netinet/in_pcb.h> 58 #include <netinet/in_var.h> 59 #include <netinet/ip.h> 60 #include <netinet/ip6.h> 61 #include <netinet6/scope6_var.h> 62 #define TCPSTATES 63 #include <netinet/tcp_fsm.h> 64 #include <netinet/tcp_timer.h> 65 #include <netinet/tcp_var.h> 66 #include <netinet/toecore.h> 67 68 #ifdef TCP_OFFLOAD 69 #include "common/common.h" 70 #include "common/t4_msg.h" 71 #include "common/t4_regs.h" 72 #include "common/t4_regs_values.h" 73 #include "common/t4_tcb.h" 74 #include "tom/t4_tom_l2t.h" 75 #include "tom/t4_tom.h" 76 #include "tom/t4_tls.h" 77 78 static struct protosw toe_protosw; 79 static struct pr_usrreqs toe_usrreqs; 80 81 static struct protosw toe6_protosw; 82 static struct pr_usrreqs toe6_usrreqs; 83 84 /* Module ops */ 85 static int t4_tom_mod_load(void); 86 static int t4_tom_mod_unload(void); 87 static int t4_tom_modevent(module_t, int, void *); 88 89 /* ULD ops and helpers */ 90 static int t4_tom_activate(struct adapter *); 91 static int t4_tom_deactivate(struct adapter *); 92 93 static struct uld_info tom_uld_info = { 94 .uld_id = ULD_TOM, 95 .activate = t4_tom_activate, 96 .deactivate = t4_tom_deactivate, 97 }; 98 99 static void queue_tid_release(struct adapter *, int); 100 static void release_offload_resources(struct toepcb *); 101 static int alloc_tid_tabs(struct tid_info *); 102 static void free_tid_tabs(struct tid_info *); 103 static int add_lip(struct adapter *, struct in6_addr *); 104 static int delete_lip(struct adapter *, struct in6_addr *); 105 static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 106 static void init_clip_table(struct adapter *, struct tom_data *); 107 static void update_clip(struct adapter *, void *); 108 static void t4_clip_task(void *, int); 109 static void update_clip_table(struct adapter *, struct tom_data *); 110 static void destroy_clip_table(struct adapter *, struct tom_data *); 111 static void free_tom_data(struct adapter *, struct tom_data *); 112 static void reclaim_wr_resources(void *, int); 113 114 static int in6_ifaddr_gen; 115 static eventhandler_tag ifaddr_evhandler; 116 static struct timeout_task clip_task; 117 118 struct toepcb * 119 alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags) 120 { 121 struct port_info *pi = vi->pi; 122 struct adapter *sc = pi->adapter; 123 struct toepcb *toep; 124 int tx_credits, txsd_total, len; 125 126 /* 127 * The firmware counts tx work request credits in units of 16 bytes 128 * each. Reserve room for an ABORT_REQ so the driver never has to worry 129 * about tx credits if it wants to abort a connection. 130 */ 131 tx_credits = sc->params.ofldq_wr_cred; 132 tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 133 134 /* 135 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 136 * immediate payload, and firmware counts tx work request credits in 137 * units of 16 byte. Calculate the maximum work requests possible. 138 */ 139 txsd_total = tx_credits / 140 howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); 141 142 KASSERT(txqid >= vi->first_ofld_txq && 143 txqid < vi->first_ofld_txq + vi->nofldtxq, 144 ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi, 145 vi->first_ofld_txq, vi->nofldtxq)); 146 147 KASSERT(rxqid >= vi->first_ofld_rxq && 148 rxqid < vi->first_ofld_rxq + vi->nofldrxq, 149 ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi, 150 vi->first_ofld_rxq, vi->nofldrxq)); 151 152 len = offsetof(struct toepcb, txsd) + 153 txsd_total * sizeof(struct ofld_tx_sdesc); 154 155 toep = malloc(len, M_CXGBE, M_ZERO | flags); 156 if (toep == NULL) 157 return (NULL); 158 159 refcount_init(&toep->refcount, 1); 160 toep->td = sc->tom_softc; 161 toep->vi = vi; 162 toep->tc_idx = -1; 163 toep->tx_total = tx_credits; 164 toep->tx_credits = tx_credits; 165 toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 166 toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 167 toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 168 mbufq_init(&toep->ulp_pduq, INT_MAX); 169 mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); 170 toep->txsd_total = txsd_total; 171 toep->txsd_avail = txsd_total; 172 toep->txsd_pidx = 0; 173 toep->txsd_cidx = 0; 174 aiotx_init_toep(toep); 175 176 return (toep); 177 } 178 179 struct toepcb * 180 hold_toepcb(struct toepcb *toep) 181 { 182 183 refcount_acquire(&toep->refcount); 184 return (toep); 185 } 186 187 void 188 free_toepcb(struct toepcb *toep) 189 { 190 191 if (refcount_release(&toep->refcount) == 0) 192 return; 193 194 KASSERT(!(toep->flags & TPF_ATTACHED), 195 ("%s: attached to an inpcb", __func__)); 196 KASSERT(!(toep->flags & TPF_CPL_PENDING), 197 ("%s: CPL pending", __func__)); 198 199 if (toep->ulp_mode == ULP_MODE_TCPDDP) 200 ddp_uninit_toep(toep); 201 tls_uninit_toep(toep); 202 free(toep, M_CXGBE); 203 } 204 205 /* 206 * Set up the socket for TCP offload. 207 */ 208 void 209 offload_socket(struct socket *so, struct toepcb *toep) 210 { 211 struct tom_data *td = toep->td; 212 struct inpcb *inp = sotoinpcb(so); 213 struct tcpcb *tp = intotcpcb(inp); 214 struct sockbuf *sb; 215 216 INP_WLOCK_ASSERT(inp); 217 218 /* Update socket */ 219 sb = &so->so_snd; 220 SOCKBUF_LOCK(sb); 221 sb->sb_flags |= SB_NOCOALESCE; 222 SOCKBUF_UNLOCK(sb); 223 sb = &so->so_rcv; 224 SOCKBUF_LOCK(sb); 225 sb->sb_flags |= SB_NOCOALESCE; 226 if (inp->inp_vflag & INP_IPV6) 227 so->so_proto = &toe6_protosw; 228 else 229 so->so_proto = &toe_protosw; 230 SOCKBUF_UNLOCK(sb); 231 232 /* Update TCP PCB */ 233 tp->tod = &td->tod; 234 tp->t_toe = toep; 235 tp->t_flags |= TF_TOE; 236 237 /* Install an extra hold on inp */ 238 toep->inp = inp; 239 toep->flags |= TPF_ATTACHED; 240 in_pcbref(inp); 241 242 /* Add the TOE PCB to the active list */ 243 mtx_lock(&td->toep_list_lock); 244 TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 245 mtx_unlock(&td->toep_list_lock); 246 } 247 248 /* This is _not_ the normal way to "unoffload" a socket. */ 249 void 250 undo_offload_socket(struct socket *so) 251 { 252 struct inpcb *inp = sotoinpcb(so); 253 struct tcpcb *tp = intotcpcb(inp); 254 struct toepcb *toep = tp->t_toe; 255 struct tom_data *td = toep->td; 256 struct sockbuf *sb; 257 258 INP_WLOCK_ASSERT(inp); 259 260 sb = &so->so_snd; 261 SOCKBUF_LOCK(sb); 262 sb->sb_flags &= ~SB_NOCOALESCE; 263 SOCKBUF_UNLOCK(sb); 264 sb = &so->so_rcv; 265 SOCKBUF_LOCK(sb); 266 sb->sb_flags &= ~SB_NOCOALESCE; 267 SOCKBUF_UNLOCK(sb); 268 269 tp->tod = NULL; 270 tp->t_toe = NULL; 271 tp->t_flags &= ~TF_TOE; 272 273 toep->inp = NULL; 274 toep->flags &= ~TPF_ATTACHED; 275 if (in_pcbrele_wlocked(inp)) 276 panic("%s: inp freed.", __func__); 277 278 mtx_lock(&td->toep_list_lock); 279 TAILQ_REMOVE(&td->toep_list, toep, link); 280 mtx_unlock(&td->toep_list_lock); 281 } 282 283 static void 284 release_offload_resources(struct toepcb *toep) 285 { 286 struct tom_data *td = toep->td; 287 struct adapter *sc = td_adapter(td); 288 int tid = toep->tid; 289 290 KASSERT(!(toep->flags & TPF_CPL_PENDING), 291 ("%s: %p has CPL pending.", __func__, toep)); 292 KASSERT(!(toep->flags & TPF_ATTACHED), 293 ("%s: %p is still attached.", __func__, toep)); 294 295 CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 296 __func__, toep, tid, toep->l2te, toep->ce); 297 298 /* 299 * These queues should have been emptied at approximately the same time 300 * that a normal connection's socket's so_snd would have been purged or 301 * drained. Do _not_ clean up here. 302 */ 303 MPASS(mbufq_len(&toep->ulp_pduq) == 0); 304 MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); 305 #ifdef INVARIANTS 306 if (toep->ulp_mode == ULP_MODE_TCPDDP) 307 ddp_assert_empty(toep); 308 #endif 309 310 if (toep->l2te) 311 t4_l2t_release(toep->l2te); 312 313 if (tid >= 0) { 314 remove_tid(sc, tid, toep->ce ? 2 : 1); 315 release_tid(sc, tid, toep->ctrlq); 316 } 317 318 if (toep->ce) 319 release_lip(td, toep->ce); 320 321 #ifdef RATELIMIT 322 if (toep->tc_idx != -1) 323 t4_release_cl_rl_kbps(sc, toep->vi->pi->port_id, toep->tc_idx); 324 #endif 325 mtx_lock(&td->toep_list_lock); 326 TAILQ_REMOVE(&td->toep_list, toep, link); 327 mtx_unlock(&td->toep_list_lock); 328 329 free_toepcb(toep); 330 } 331 332 /* 333 * The kernel is done with the TCP PCB and this is our opportunity to unhook the 334 * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 335 * pending CPL) then it is time to release all resources tied to the toepcb. 336 * 337 * Also gets called when an offloaded active open fails and the TOM wants the 338 * kernel to take the TCP PCB back. 339 */ 340 static void 341 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 342 { 343 #if defined(KTR) || defined(INVARIANTS) 344 struct inpcb *inp = tp->t_inpcb; 345 #endif 346 struct toepcb *toep = tp->t_toe; 347 348 INP_WLOCK_ASSERT(inp); 349 350 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 351 KASSERT(toep->flags & TPF_ATTACHED, 352 ("%s: not attached", __func__)); 353 354 #ifdef KTR 355 if (tp->t_state == TCPS_SYN_SENT) { 356 CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 357 __func__, toep->tid, toep, toep->flags, inp, 358 inp->inp_flags); 359 } else { 360 CTR6(KTR_CXGBE, 361 "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 362 toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 363 inp->inp_flags); 364 } 365 #endif 366 367 tp->t_toe = NULL; 368 tp->t_flags &= ~TF_TOE; 369 toep->flags &= ~TPF_ATTACHED; 370 371 if (!(toep->flags & TPF_CPL_PENDING)) 372 release_offload_resources(toep); 373 } 374 375 /* 376 * setsockopt handler. 377 */ 378 static void 379 t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) 380 { 381 struct adapter *sc = tod->tod_softc; 382 struct toepcb *toep = tp->t_toe; 383 384 if (dir == SOPT_GET) 385 return; 386 387 CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); 388 389 switch (name) { 390 case TCP_NODELAY: 391 if (tp->t_state != TCPS_ESTABLISHED) 392 break; 393 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, 394 V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1), 395 0, 0); 396 break; 397 default: 398 break; 399 } 400 } 401 402 static inline int 403 get_tcb_bit(u_char *tcb, int bit) 404 { 405 int ix, shift; 406 407 ix = 127 - (bit >> 3); 408 shift = bit & 0x7; 409 410 return ((tcb[ix] >> shift) & 1); 411 } 412 413 static inline uint64_t 414 get_tcb_bits(u_char *tcb, int hi, int lo) 415 { 416 uint64_t rc = 0; 417 418 while (hi >= lo) { 419 rc = (rc << 1) | get_tcb_bit(tcb, hi); 420 --hi; 421 } 422 423 return (rc); 424 } 425 426 /* 427 * Called by the kernel to allow the TOE driver to "refine" values filled up in 428 * the tcp_info for an offloaded connection. 429 */ 430 static void 431 t4_tcp_info(struct toedev *tod, struct tcpcb *tp, struct tcp_info *ti) 432 { 433 int i, j, k, rc; 434 struct adapter *sc = tod->tod_softc; 435 struct toepcb *toep = tp->t_toe; 436 uint32_t addr, v; 437 uint32_t buf[TCB_SIZE / sizeof(uint32_t)]; 438 u_char *tcb, tmp; 439 440 INP_WLOCK_ASSERT(tp->t_inpcb); 441 MPASS(ti != NULL); 442 443 addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + toep->tid * TCB_SIZE; 444 rc = read_via_memwin(sc, 2, addr, &buf[0], TCB_SIZE); 445 if (rc != 0) 446 return; 447 448 tcb = (u_char *)&buf[0]; 449 for (i = 0, j = TCB_SIZE - 16; i < j; i += 16, j -= 16) { 450 for (k = 0; k < 16; k++) { 451 tmp = tcb[i + k]; 452 tcb[i + k] = tcb[j + k]; 453 tcb[j + k] = tmp; 454 } 455 } 456 457 ti->tcpi_state = get_tcb_bits(tcb, 115, 112); 458 459 v = get_tcb_bits(tcb, 271, 256); 460 ti->tcpi_rtt = tcp_ticks_to_us(sc, v); 461 462 v = get_tcb_bits(tcb, 287, 272); 463 ti->tcpi_rttvar = tcp_ticks_to_us(sc, v); 464 465 ti->tcpi_snd_ssthresh = get_tcb_bits(tcb, 487, 460); 466 ti->tcpi_snd_cwnd = get_tcb_bits(tcb, 459, 432); 467 ti->tcpi_rcv_nxt = get_tcb_bits(tcb, 553, 522); 468 469 ti->tcpi_snd_nxt = get_tcb_bits(tcb, 319, 288) - 470 get_tcb_bits(tcb, 375, 348); 471 472 /* Receive window being advertised by us. */ 473 ti->tcpi_rcv_space = get_tcb_bits(tcb, 581, 554); 474 475 /* Send window ceiling. */ 476 v = get_tcb_bits(tcb, 159, 144) << get_tcb_bits(tcb, 131, 128); 477 ti->tcpi_snd_wnd = min(v, ti->tcpi_snd_cwnd); 478 } 479 480 /* 481 * The TOE driver will not receive any more CPLs for the tid associated with the 482 * toepcb; release the hold on the inpcb. 483 */ 484 void 485 final_cpl_received(struct toepcb *toep) 486 { 487 struct inpcb *inp = toep->inp; 488 489 KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 490 INP_WLOCK_ASSERT(inp); 491 KASSERT(toep->flags & TPF_CPL_PENDING, 492 ("%s: CPL not pending already?", __func__)); 493 494 CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 495 __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 496 497 if (toep->ulp_mode == ULP_MODE_TCPDDP) 498 release_ddp_resources(toep); 499 toep->inp = NULL; 500 toep->flags &= ~TPF_CPL_PENDING; 501 mbufq_drain(&toep->ulp_pdu_reclaimq); 502 503 if (!(toep->flags & TPF_ATTACHED)) 504 release_offload_resources(toep); 505 506 if (!in_pcbrele_wlocked(inp)) 507 INP_WUNLOCK(inp); 508 } 509 510 void 511 insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) 512 { 513 struct tid_info *t = &sc->tids; 514 515 t->tid_tab[tid] = ctx; 516 atomic_add_int(&t->tids_in_use, ntids); 517 } 518 519 void * 520 lookup_tid(struct adapter *sc, int tid) 521 { 522 struct tid_info *t = &sc->tids; 523 524 return (t->tid_tab[tid]); 525 } 526 527 void 528 update_tid(struct adapter *sc, int tid, void *ctx) 529 { 530 struct tid_info *t = &sc->tids; 531 532 t->tid_tab[tid] = ctx; 533 } 534 535 void 536 remove_tid(struct adapter *sc, int tid, int ntids) 537 { 538 struct tid_info *t = &sc->tids; 539 540 t->tid_tab[tid] = NULL; 541 atomic_subtract_int(&t->tids_in_use, ntids); 542 } 543 544 void 545 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) 546 { 547 struct wrqe *wr; 548 struct cpl_tid_release *req; 549 550 wr = alloc_wrqe(sizeof(*req), ctrlq); 551 if (wr == NULL) { 552 queue_tid_release(sc, tid); /* defer */ 553 return; 554 } 555 req = wrtod(wr); 556 557 INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); 558 559 t4_wrq_tx(sc, wr); 560 } 561 562 static void 563 queue_tid_release(struct adapter *sc, int tid) 564 { 565 566 CXGBE_UNIMPLEMENTED("deferred tid release"); 567 } 568 569 /* 570 * What mtu_idx to use, given a 4-tuple. Note that both s->mss and tcp_mssopt 571 * have the MSS that we should advertise in our SYN. Advertised MSS doesn't 572 * account for any TCP options so the effective MSS (only payload, no headers or 573 * options) could be different. We fill up tp->t_maxseg with the effective MSS 574 * at the end of the 3-way handshake. 575 */ 576 int 577 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, 578 struct offload_settings *s) 579 { 580 unsigned short *mtus = &sc->params.mtus[0]; 581 int i, mss, mtu; 582 583 MPASS(inc != NULL); 584 585 mss = s->mss > 0 ? s->mss : tcp_mssopt(inc); 586 if (inc->inc_flags & INC_ISIPV6) 587 mtu = mss + sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 588 else 589 mtu = mss + sizeof(struct ip) + sizeof(struct tcphdr); 590 591 for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mtu; i++) 592 continue; 593 594 return (i); 595 } 596 597 /* 598 * Determine the receive window size for a socket. 599 */ 600 u_long 601 select_rcv_wnd(struct socket *so) 602 { 603 unsigned long wnd; 604 605 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 606 607 wnd = sbspace(&so->so_rcv); 608 if (wnd < MIN_RCV_WND) 609 wnd = MIN_RCV_WND; 610 611 return min(wnd, MAX_RCV_WND); 612 } 613 614 int 615 select_rcv_wscale(void) 616 { 617 int wscale = 0; 618 unsigned long space = sb_max; 619 620 if (space > MAX_RCV_WND) 621 space = MAX_RCV_WND; 622 623 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 624 wscale++; 625 626 return (wscale); 627 } 628 629 /* 630 * socket so could be a listening socket too. 631 */ 632 uint64_t 633 calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e, 634 int mtu_idx, int rscale, int rx_credits, int ulp_mode, 635 struct offload_settings *s) 636 { 637 int keepalive; 638 uint64_t opt0; 639 640 MPASS(so != NULL); 641 MPASS(vi != NULL); 642 KASSERT(rx_credits <= M_RCV_BUFSIZ, 643 ("%s: rcv_bufsiz too high", __func__)); 644 645 opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 646 V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits) | 647 V_L2T_IDX(e->idx) | V_SMAC_SEL(vi->smt_idx) | 648 V_TX_CHAN(vi->pi->tx_chan); 649 650 keepalive = tcp_always_keepalive || so_options_get(so) & SO_KEEPALIVE; 651 opt0 |= V_KEEP_ALIVE(keepalive != 0); 652 653 if (s->nagle < 0) { 654 struct inpcb *inp = sotoinpcb(so); 655 struct tcpcb *tp = intotcpcb(inp); 656 657 opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 658 } else 659 opt0 |= V_NAGLE(s->nagle != 0); 660 661 return htobe64(opt0); 662 } 663 664 uint64_t 665 select_ntuple(struct vi_info *vi, struct l2t_entry *e) 666 { 667 struct adapter *sc = vi->pi->adapter; 668 struct tp_params *tp = &sc->params.tp; 669 uint16_t viid = vi->viid; 670 uint64_t ntuple = 0; 671 672 /* 673 * Initialize each of the fields which we care about which are present 674 * in the Compressed Filter Tuple. 675 */ 676 if (tp->vlan_shift >= 0 && e->vlan != CPL_L2T_VLAN_NONE) 677 ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; 678 679 if (tp->port_shift >= 0) 680 ntuple |= (uint64_t)e->lport << tp->port_shift; 681 682 if (tp->protocol_shift >= 0) 683 ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; 684 685 if (tp->vnic_shift >= 0) { 686 uint32_t vf = G_FW_VIID_VIN(viid); 687 uint32_t pf = G_FW_VIID_PFN(viid); 688 uint32_t vld = G_FW_VIID_VIVLD(viid); 689 690 ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) | 691 V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; 692 } 693 694 if (is_t4(sc)) 695 return (htobe32((uint32_t)ntuple)); 696 else 697 return (htobe64(V_FILTER_TUPLE(ntuple))); 698 } 699 700 static int 701 is_tls_sock(struct socket *so, struct adapter *sc) 702 { 703 struct inpcb *inp = sotoinpcb(so); 704 int i, rc; 705 706 /* XXX: Eventually add a SO_WANT_TLS socket option perhaps? */ 707 rc = 0; 708 ADAPTER_LOCK(sc); 709 for (i = 0; i < sc->tt.num_tls_rx_ports; i++) { 710 if (inp->inp_lport == htons(sc->tt.tls_rx_ports[i]) || 711 inp->inp_fport == htons(sc->tt.tls_rx_ports[i])) { 712 rc = 1; 713 break; 714 } 715 } 716 ADAPTER_UNLOCK(sc); 717 return (rc); 718 } 719 720 int 721 select_ulp_mode(struct socket *so, struct adapter *sc, 722 struct offload_settings *s) 723 { 724 725 if (can_tls_offload(sc) && 726 (s->tls > 0 || (s->tls < 0 && is_tls_sock(so, sc)))) 727 return (ULP_MODE_TLS); 728 else if (s->ddp > 0 || 729 (s->ddp < 0 && sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0)) 730 return (ULP_MODE_TCPDDP); 731 else 732 return (ULP_MODE_NONE); 733 } 734 735 void 736 set_ulp_mode(struct toepcb *toep, int ulp_mode) 737 { 738 739 CTR4(KTR_CXGBE, "%s: toep %p (tid %d) ulp_mode %d", 740 __func__, toep, toep->tid, ulp_mode); 741 toep->ulp_mode = ulp_mode; 742 tls_init_toep(toep); 743 if (toep->ulp_mode == ULP_MODE_TCPDDP) 744 ddp_init_toep(toep); 745 } 746 747 int 748 negative_advice(int status) 749 { 750 751 return (status == CPL_ERR_RTX_NEG_ADVICE || 752 status == CPL_ERR_PERSIST_NEG_ADVICE || 753 status == CPL_ERR_KEEPALV_NEG_ADVICE); 754 } 755 756 static int 757 alloc_tid_tabs(struct tid_info *t) 758 { 759 size_t size; 760 unsigned int i; 761 762 size = t->ntids * sizeof(*t->tid_tab) + 763 t->natids * sizeof(*t->atid_tab) + 764 t->nstids * sizeof(*t->stid_tab); 765 766 t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT); 767 if (t->tid_tab == NULL) 768 return (ENOMEM); 769 770 mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); 771 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; 772 t->afree = t->atid_tab; 773 t->atids_in_use = 0; 774 for (i = 1; i < t->natids; i++) 775 t->atid_tab[i - 1].next = &t->atid_tab[i]; 776 t->atid_tab[t->natids - 1].next = NULL; 777 778 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 779 t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; 780 t->stids_in_use = 0; 781 TAILQ_INIT(&t->stids); 782 t->nstids_free_head = t->nstids; 783 784 atomic_store_rel_int(&t->tids_in_use, 0); 785 786 return (0); 787 } 788 789 static void 790 free_tid_tabs(struct tid_info *t) 791 { 792 KASSERT(t->tids_in_use == 0, 793 ("%s: %d tids still in use.", __func__, t->tids_in_use)); 794 KASSERT(t->atids_in_use == 0, 795 ("%s: %d atids still in use.", __func__, t->atids_in_use)); 796 KASSERT(t->stids_in_use == 0, 797 ("%s: %d tids still in use.", __func__, t->stids_in_use)); 798 799 free(t->tid_tab, M_CXGBE); 800 t->tid_tab = NULL; 801 802 if (mtx_initialized(&t->atid_lock)) 803 mtx_destroy(&t->atid_lock); 804 if (mtx_initialized(&t->stid_lock)) 805 mtx_destroy(&t->stid_lock); 806 } 807 808 static int 809 add_lip(struct adapter *sc, struct in6_addr *lip) 810 { 811 struct fw_clip_cmd c; 812 813 ASSERT_SYNCHRONIZED_OP(sc); 814 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 815 816 memset(&c, 0, sizeof(c)); 817 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 818 F_FW_CMD_WRITE); 819 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 820 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 821 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 822 823 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 824 } 825 826 static int 827 delete_lip(struct adapter *sc, struct in6_addr *lip) 828 { 829 struct fw_clip_cmd c; 830 831 ASSERT_SYNCHRONIZED_OP(sc); 832 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 833 834 memset(&c, 0, sizeof(c)); 835 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 836 F_FW_CMD_READ); 837 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 838 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 839 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 840 841 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 842 } 843 844 static struct clip_entry * 845 search_lip(struct tom_data *td, struct in6_addr *lip) 846 { 847 struct clip_entry *ce; 848 849 mtx_assert(&td->clip_table_lock, MA_OWNED); 850 851 TAILQ_FOREACH(ce, &td->clip_table, link) { 852 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 853 return (ce); 854 } 855 856 return (NULL); 857 } 858 859 struct clip_entry * 860 hold_lip(struct tom_data *td, struct in6_addr *lip, struct clip_entry *ce) 861 { 862 863 mtx_lock(&td->clip_table_lock); 864 if (ce == NULL) 865 ce = search_lip(td, lip); 866 if (ce != NULL) 867 ce->refcount++; 868 mtx_unlock(&td->clip_table_lock); 869 870 return (ce); 871 } 872 873 void 874 release_lip(struct tom_data *td, struct clip_entry *ce) 875 { 876 877 mtx_lock(&td->clip_table_lock); 878 KASSERT(search_lip(td, &ce->lip) == ce, 879 ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 880 KASSERT(ce->refcount > 0, 881 ("%s: CLIP entry %p has refcount 0", __func__, ce)); 882 --ce->refcount; 883 mtx_unlock(&td->clip_table_lock); 884 } 885 886 static void 887 init_clip_table(struct adapter *sc, struct tom_data *td) 888 { 889 890 ASSERT_SYNCHRONIZED_OP(sc); 891 892 mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 893 TAILQ_INIT(&td->clip_table); 894 td->clip_gen = -1; 895 896 update_clip_table(sc, td); 897 } 898 899 static void 900 update_clip(struct adapter *sc, void *arg __unused) 901 { 902 903 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc")) 904 return; 905 906 if (uld_active(sc, ULD_TOM)) 907 update_clip_table(sc, sc->tom_softc); 908 909 end_synchronized_op(sc, LOCK_HELD); 910 } 911 912 static void 913 t4_clip_task(void *arg, int count) 914 { 915 916 t4_iterate(update_clip, NULL); 917 } 918 919 static void 920 update_clip_table(struct adapter *sc, struct tom_data *td) 921 { 922 struct rm_priotracker in6_ifa_tracker; 923 struct in6_ifaddr *ia; 924 struct in6_addr *lip, tlip; 925 struct clip_head stale; 926 struct clip_entry *ce, *ce_temp; 927 struct vi_info *vi; 928 int rc, gen, i, j; 929 uintptr_t last_vnet; 930 931 ASSERT_SYNCHRONIZED_OP(sc); 932 933 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 934 mtx_lock(&td->clip_table_lock); 935 936 gen = atomic_load_acq_int(&in6_ifaddr_gen); 937 if (gen == td->clip_gen) 938 goto done; 939 940 TAILQ_INIT(&stale); 941 TAILQ_CONCAT(&stale, &td->clip_table, link); 942 943 /* 944 * last_vnet optimizes the common cases where all if_vnet = NULL (no 945 * VIMAGE) or all if_vnet = vnet0. 946 */ 947 last_vnet = (uintptr_t)(-1); 948 for_each_port(sc, i) 949 for_each_vi(sc->port[i], j, vi) { 950 if (last_vnet == (uintptr_t)vi->ifp->if_vnet) 951 continue; 952 953 /* XXX: races with if_vmove */ 954 CURVNET_SET(vi->ifp->if_vnet); 955 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 956 lip = &ia->ia_addr.sin6_addr; 957 958 KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 959 ("%s: mcast address in in6_ifaddr list", __func__)); 960 961 if (IN6_IS_ADDR_LOOPBACK(lip)) 962 continue; 963 if (IN6_IS_SCOPE_EMBED(lip)) { 964 /* Remove the embedded scope */ 965 tlip = *lip; 966 lip = &tlip; 967 in6_clearscope(lip); 968 } 969 /* 970 * XXX: how to weed out the link local address for the 971 * loopback interface? It's fe80::1 usually (always?). 972 */ 973 974 /* 975 * If it's in the main list then we already know it's 976 * not stale. 977 */ 978 TAILQ_FOREACH(ce, &td->clip_table, link) { 979 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 980 goto next; 981 } 982 983 /* 984 * If it's in the stale list we should move it to the 985 * main list. 986 */ 987 TAILQ_FOREACH(ce, &stale, link) { 988 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { 989 TAILQ_REMOVE(&stale, ce, link); 990 TAILQ_INSERT_TAIL(&td->clip_table, ce, 991 link); 992 goto next; 993 } 994 } 995 996 /* A new IP6 address; add it to the CLIP table */ 997 ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 998 memcpy(&ce->lip, lip, sizeof(ce->lip)); 999 ce->refcount = 0; 1000 rc = add_lip(sc, lip); 1001 if (rc == 0) 1002 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 1003 else { 1004 char ip[INET6_ADDRSTRLEN]; 1005 1006 inet_ntop(AF_INET6, &ce->lip, &ip[0], 1007 sizeof(ip)); 1008 log(LOG_ERR, "%s: could not add %s (%d)\n", 1009 __func__, ip, rc); 1010 free(ce, M_CXGBE); 1011 } 1012 next: 1013 continue; 1014 } 1015 CURVNET_RESTORE(); 1016 last_vnet = (uintptr_t)vi->ifp->if_vnet; 1017 } 1018 1019 /* 1020 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are 1021 * no longer referenced by the driver. 1022 */ 1023 TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { 1024 if (ce->refcount == 0) { 1025 rc = delete_lip(sc, &ce->lip); 1026 if (rc == 0) { 1027 TAILQ_REMOVE(&stale, ce, link); 1028 free(ce, M_CXGBE); 1029 } else { 1030 char ip[INET6_ADDRSTRLEN]; 1031 1032 inet_ntop(AF_INET6, &ce->lip, &ip[0], 1033 sizeof(ip)); 1034 log(LOG_ERR, "%s: could not delete %s (%d)\n", 1035 __func__, ip, rc); 1036 } 1037 } 1038 } 1039 /* The ones that are still referenced need to stay in the CLIP table */ 1040 TAILQ_CONCAT(&td->clip_table, &stale, link); 1041 1042 td->clip_gen = gen; 1043 done: 1044 mtx_unlock(&td->clip_table_lock); 1045 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 1046 } 1047 1048 static void 1049 destroy_clip_table(struct adapter *sc, struct tom_data *td) 1050 { 1051 struct clip_entry *ce, *ce_temp; 1052 1053 if (mtx_initialized(&td->clip_table_lock)) { 1054 mtx_lock(&td->clip_table_lock); 1055 TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 1056 KASSERT(ce->refcount == 0, 1057 ("%s: CLIP entry %p still in use (%d)", __func__, 1058 ce, ce->refcount)); 1059 TAILQ_REMOVE(&td->clip_table, ce, link); 1060 delete_lip(sc, &ce->lip); 1061 free(ce, M_CXGBE); 1062 } 1063 mtx_unlock(&td->clip_table_lock); 1064 mtx_destroy(&td->clip_table_lock); 1065 } 1066 } 1067 1068 static void 1069 free_tom_data(struct adapter *sc, struct tom_data *td) 1070 { 1071 1072 ASSERT_SYNCHRONIZED_OP(sc); 1073 1074 KASSERT(TAILQ_EMPTY(&td->toep_list), 1075 ("%s: TOE PCB list is not empty.", __func__)); 1076 KASSERT(td->lctx_count == 0, 1077 ("%s: lctx hash table is not empty.", __func__)); 1078 1079 tls_free_kmap(td); 1080 t4_free_ppod_region(&td->pr); 1081 destroy_clip_table(sc, td); 1082 1083 if (td->listen_mask != 0) 1084 hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 1085 1086 if (mtx_initialized(&td->unsent_wr_lock)) 1087 mtx_destroy(&td->unsent_wr_lock); 1088 if (mtx_initialized(&td->lctx_hash_lock)) 1089 mtx_destroy(&td->lctx_hash_lock); 1090 if (mtx_initialized(&td->toep_list_lock)) 1091 mtx_destroy(&td->toep_list_lock); 1092 1093 free_tid_tabs(&sc->tids); 1094 free(td, M_CXGBE); 1095 } 1096 1097 static char * 1098 prepare_pkt(int open_type, uint16_t vtag, struct inpcb *inp, int *pktlen, 1099 int *buflen) 1100 { 1101 char *pkt; 1102 struct tcphdr *th; 1103 int ipv6, len; 1104 const int maxlen = 1105 max(sizeof(struct ether_header), sizeof(struct ether_vlan_header)) + 1106 max(sizeof(struct ip), sizeof(struct ip6_hdr)) + 1107 sizeof(struct tcphdr); 1108 1109 MPASS(open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN); 1110 1111 pkt = malloc(maxlen, M_CXGBE, M_ZERO | M_NOWAIT); 1112 if (pkt == NULL) 1113 return (NULL); 1114 1115 ipv6 = inp->inp_vflag & INP_IPV6; 1116 len = 0; 1117 1118 if (vtag == 0xffff) { 1119 struct ether_header *eh = (void *)pkt; 1120 1121 if (ipv6) 1122 eh->ether_type = htons(ETHERTYPE_IPV6); 1123 else 1124 eh->ether_type = htons(ETHERTYPE_IP); 1125 1126 len += sizeof(*eh); 1127 } else { 1128 struct ether_vlan_header *evh = (void *)pkt; 1129 1130 evh->evl_encap_proto = htons(ETHERTYPE_VLAN); 1131 evh->evl_tag = htons(vtag); 1132 if (ipv6) 1133 evh->evl_proto = htons(ETHERTYPE_IPV6); 1134 else 1135 evh->evl_proto = htons(ETHERTYPE_IP); 1136 1137 len += sizeof(*evh); 1138 } 1139 1140 if (ipv6) { 1141 struct ip6_hdr *ip6 = (void *)&pkt[len]; 1142 1143 ip6->ip6_vfc = IPV6_VERSION; 1144 ip6->ip6_plen = htons(sizeof(struct tcphdr)); 1145 ip6->ip6_nxt = IPPROTO_TCP; 1146 if (open_type == OPEN_TYPE_ACTIVE) { 1147 ip6->ip6_src = inp->in6p_laddr; 1148 ip6->ip6_dst = inp->in6p_faddr; 1149 } else if (open_type == OPEN_TYPE_LISTEN) { 1150 ip6->ip6_src = inp->in6p_laddr; 1151 ip6->ip6_dst = ip6->ip6_src; 1152 } 1153 1154 len += sizeof(*ip6); 1155 } else { 1156 struct ip *ip = (void *)&pkt[len]; 1157 1158 ip->ip_v = IPVERSION; 1159 ip->ip_hl = sizeof(*ip) >> 2; 1160 ip->ip_tos = inp->inp_ip_tos; 1161 ip->ip_len = htons(sizeof(struct ip) + sizeof(struct tcphdr)); 1162 ip->ip_ttl = inp->inp_ip_ttl; 1163 ip->ip_p = IPPROTO_TCP; 1164 if (open_type == OPEN_TYPE_ACTIVE) { 1165 ip->ip_src = inp->inp_laddr; 1166 ip->ip_dst = inp->inp_faddr; 1167 } else if (open_type == OPEN_TYPE_LISTEN) { 1168 ip->ip_src = inp->inp_laddr; 1169 ip->ip_dst = ip->ip_src; 1170 } 1171 1172 len += sizeof(*ip); 1173 } 1174 1175 th = (void *)&pkt[len]; 1176 if (open_type == OPEN_TYPE_ACTIVE) { 1177 th->th_sport = inp->inp_lport; /* network byte order already */ 1178 th->th_dport = inp->inp_fport; /* ditto */ 1179 } else if (open_type == OPEN_TYPE_LISTEN) { 1180 th->th_sport = inp->inp_lport; /* network byte order already */ 1181 th->th_dport = th->th_sport; 1182 } 1183 len += sizeof(th); 1184 1185 *pktlen = *buflen = len; 1186 return (pkt); 1187 } 1188 1189 const struct offload_settings * 1190 lookup_offload_policy(struct adapter *sc, int open_type, struct mbuf *m, 1191 uint16_t vtag, struct inpcb *inp) 1192 { 1193 const struct t4_offload_policy *op; 1194 char *pkt; 1195 struct offload_rule *r; 1196 int i, matched, pktlen, buflen; 1197 static const struct offload_settings allow_offloading_settings = { 1198 .offload = 1, 1199 .rx_coalesce = -1, 1200 .cong_algo = -1, 1201 .sched_class = -1, 1202 .tstamp = -1, 1203 .sack = -1, 1204 .nagle = -1, 1205 .ecn = -1, 1206 .ddp = -1, 1207 .tls = -1, 1208 .txq = -1, 1209 .rxq = -1, 1210 .mss = -1, 1211 }; 1212 static const struct offload_settings disallow_offloading_settings = { 1213 .offload = 0, 1214 /* rest is irrelevant when offload is off. */ 1215 }; 1216 1217 rw_assert(&sc->policy_lock, RA_LOCKED); 1218 1219 /* 1220 * If there's no Connection Offloading Policy attached to the device 1221 * then we need to return a default static policy. If 1222 * "cop_managed_offloading" is true, then we need to disallow 1223 * offloading until a COP is attached to the device. Otherwise we 1224 * allow offloading ... 1225 */ 1226 op = sc->policy; 1227 if (op == NULL) { 1228 if (sc->tt.cop_managed_offloading) 1229 return (&disallow_offloading_settings); 1230 else 1231 return (&allow_offloading_settings); 1232 } 1233 1234 switch (open_type) { 1235 case OPEN_TYPE_ACTIVE: 1236 case OPEN_TYPE_LISTEN: 1237 pkt = prepare_pkt(open_type, 0xffff, inp, &pktlen, &buflen); 1238 break; 1239 case OPEN_TYPE_PASSIVE: 1240 MPASS(m != NULL); 1241 pkt = mtod(m, char *); 1242 MPASS(*pkt == CPL_PASS_ACCEPT_REQ); 1243 pkt += sizeof(struct cpl_pass_accept_req); 1244 pktlen = m->m_pkthdr.len - sizeof(struct cpl_pass_accept_req); 1245 buflen = m->m_len - sizeof(struct cpl_pass_accept_req); 1246 break; 1247 default: 1248 MPASS(0); 1249 return (&disallow_offloading_settings); 1250 } 1251 1252 if (pkt == NULL || pktlen == 0 || buflen == 0) 1253 return (&disallow_offloading_settings); 1254 1255 r = &op->rule[0]; 1256 for (i = 0; i < op->nrules; i++, r++) { 1257 if (r->open_type != open_type && 1258 r->open_type != OPEN_TYPE_DONTCARE) { 1259 continue; 1260 } 1261 matched = bpf_filter(r->bpf_prog.bf_insns, pkt, pktlen, buflen); 1262 if (matched) 1263 break; 1264 } 1265 1266 if (open_type == OPEN_TYPE_ACTIVE || open_type == OPEN_TYPE_LISTEN) 1267 free(pkt, M_CXGBE); 1268 1269 return (matched ? &r->settings : &disallow_offloading_settings); 1270 } 1271 1272 static void 1273 reclaim_wr_resources(void *arg, int count) 1274 { 1275 struct tom_data *td = arg; 1276 STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); 1277 struct cpl_act_open_req *cpl; 1278 u_int opcode, atid; 1279 struct wrqe *wr; 1280 struct adapter *sc; 1281 1282 mtx_lock(&td->unsent_wr_lock); 1283 STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); 1284 mtx_unlock(&td->unsent_wr_lock); 1285 1286 while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { 1287 STAILQ_REMOVE_HEAD(&twr_list, link); 1288 1289 cpl = wrtod(wr); 1290 opcode = GET_OPCODE(cpl); 1291 1292 switch (opcode) { 1293 case CPL_ACT_OPEN_REQ: 1294 case CPL_ACT_OPEN_REQ6: 1295 atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); 1296 sc = td_adapter(td); 1297 1298 CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); 1299 act_open_failure_cleanup(sc, atid, EHOSTUNREACH); 1300 free(wr, M_CXGBE); 1301 break; 1302 default: 1303 log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " 1304 "opcode %x\n", __func__, wr, wr->wr_len, opcode); 1305 /* WR not freed here; go look at it with a debugger. */ 1306 } 1307 } 1308 } 1309 1310 /* 1311 * Ground control to Major TOM 1312 * Commencing countdown, engines on 1313 */ 1314 static int 1315 t4_tom_activate(struct adapter *sc) 1316 { 1317 struct tom_data *td; 1318 struct toedev *tod; 1319 struct vi_info *vi; 1320 struct sge_ofld_rxq *ofld_rxq; 1321 int i, j, rc, v; 1322 1323 ASSERT_SYNCHRONIZED_OP(sc); 1324 1325 /* per-adapter softc for TOM */ 1326 td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 1327 if (td == NULL) 1328 return (ENOMEM); 1329 1330 /* List of TOE PCBs and associated lock */ 1331 mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 1332 TAILQ_INIT(&td->toep_list); 1333 1334 /* Listen context */ 1335 mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 1336 td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 1337 &td->listen_mask, HASH_NOWAIT); 1338 1339 /* List of WRs for which L2 resolution failed */ 1340 mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); 1341 STAILQ_INIT(&td->unsent_wr_list); 1342 TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); 1343 1344 /* TID tables */ 1345 rc = alloc_tid_tabs(&sc->tids); 1346 if (rc != 0) 1347 goto done; 1348 1349 rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, 1350 t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); 1351 if (rc != 0) 1352 goto done; 1353 t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, 1354 V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); 1355 1356 /* CLIP table for IPv6 offload */ 1357 init_clip_table(sc, td); 1358 1359 if (sc->vres.key.size != 0) { 1360 rc = tls_init_kmap(sc, td); 1361 if (rc != 0) 1362 goto done; 1363 } 1364 1365 /* toedev ops */ 1366 tod = &td->tod; 1367 init_toedev(tod); 1368 tod->tod_softc = sc; 1369 tod->tod_connect = t4_connect; 1370 tod->tod_listen_start = t4_listen_start; 1371 tod->tod_listen_stop = t4_listen_stop; 1372 tod->tod_rcvd = t4_rcvd; 1373 tod->tod_output = t4_tod_output; 1374 tod->tod_send_rst = t4_send_rst; 1375 tod->tod_send_fin = t4_send_fin; 1376 tod->tod_pcb_detach = t4_pcb_detach; 1377 tod->tod_l2_update = t4_l2_update; 1378 tod->tod_syncache_added = t4_syncache_added; 1379 tod->tod_syncache_removed = t4_syncache_removed; 1380 tod->tod_syncache_respond = t4_syncache_respond; 1381 tod->tod_offload_socket = t4_offload_socket; 1382 tod->tod_ctloutput = t4_ctloutput; 1383 tod->tod_tcp_info = t4_tcp_info; 1384 1385 for_each_port(sc, i) { 1386 for_each_vi(sc->port[i], v, vi) { 1387 TOEDEV(vi->ifp) = &td->tod; 1388 for_each_ofld_rxq(vi, j, ofld_rxq) { 1389 ofld_rxq->iq.set_tcb_rpl = do_set_tcb_rpl; 1390 ofld_rxq->iq.l2t_write_rpl = do_l2t_write_rpl2; 1391 } 1392 } 1393 } 1394 1395 sc->tom_softc = td; 1396 register_toedev(sc->tom_softc); 1397 1398 done: 1399 if (rc != 0) 1400 free_tom_data(sc, td); 1401 return (rc); 1402 } 1403 1404 static int 1405 t4_tom_deactivate(struct adapter *sc) 1406 { 1407 int rc = 0; 1408 struct tom_data *td = sc->tom_softc; 1409 1410 ASSERT_SYNCHRONIZED_OP(sc); 1411 1412 if (td == NULL) 1413 return (0); /* XXX. KASSERT? */ 1414 1415 if (sc->offload_map != 0) 1416 return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 1417 1418 if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) 1419 return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ 1420 1421 mtx_lock(&td->toep_list_lock); 1422 if (!TAILQ_EMPTY(&td->toep_list)) 1423 rc = EBUSY; 1424 mtx_unlock(&td->toep_list_lock); 1425 1426 mtx_lock(&td->lctx_hash_lock); 1427 if (td->lctx_count > 0) 1428 rc = EBUSY; 1429 mtx_unlock(&td->lctx_hash_lock); 1430 1431 taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); 1432 mtx_lock(&td->unsent_wr_lock); 1433 if (!STAILQ_EMPTY(&td->unsent_wr_list)) 1434 rc = EBUSY; 1435 mtx_unlock(&td->unsent_wr_lock); 1436 1437 if (rc == 0) { 1438 unregister_toedev(sc->tom_softc); 1439 free_tom_data(sc, td); 1440 sc->tom_softc = NULL; 1441 } 1442 1443 return (rc); 1444 } 1445 1446 static void 1447 t4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp) 1448 { 1449 1450 atomic_add_rel_int(&in6_ifaddr_gen, 1); 1451 taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); 1452 } 1453 1454 static int 1455 t4_aio_queue_tom(struct socket *so, struct kaiocb *job) 1456 { 1457 struct tcpcb *tp = so_sototcpcb(so); 1458 struct toepcb *toep = tp->t_toe; 1459 int error; 1460 1461 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1462 error = t4_aio_queue_ddp(so, job); 1463 if (error != EOPNOTSUPP) 1464 return (error); 1465 } 1466 1467 return (t4_aio_queue_aiotx(so, job)); 1468 } 1469 1470 static int 1471 t4_ctloutput_tom(struct socket *so, struct sockopt *sopt) 1472 { 1473 1474 if (sopt->sopt_level != IPPROTO_TCP) 1475 return (tcp_ctloutput(so, sopt)); 1476 1477 switch (sopt->sopt_name) { 1478 case TCP_TLSOM_SET_TLS_CONTEXT: 1479 case TCP_TLSOM_GET_TLS_TOM: 1480 case TCP_TLSOM_CLR_TLS_TOM: 1481 case TCP_TLSOM_CLR_QUIES: 1482 return (t4_ctloutput_tls(so, sopt)); 1483 default: 1484 return (tcp_ctloutput(so, sopt)); 1485 } 1486 } 1487 1488 static int 1489 t4_tom_mod_load(void) 1490 { 1491 struct protosw *tcp_protosw, *tcp6_protosw; 1492 1493 /* CPL handlers */ 1494 t4_init_connect_cpl_handlers(); 1495 t4_init_listen_cpl_handlers(); 1496 t4_init_cpl_io_handlers(); 1497 1498 t4_ddp_mod_load(); 1499 t4_tls_mod_load(); 1500 1501 tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 1502 if (tcp_protosw == NULL) 1503 return (ENOPROTOOPT); 1504 bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw)); 1505 bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs)); 1506 toe_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1507 toe_protosw.pr_ctloutput = t4_ctloutput_tom; 1508 toe_protosw.pr_usrreqs = &toe_usrreqs; 1509 1510 tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 1511 if (tcp6_protosw == NULL) 1512 return (ENOPROTOOPT); 1513 bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); 1514 bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs)); 1515 toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1516 toe6_protosw.pr_ctloutput = t4_ctloutput_tom; 1517 toe6_protosw.pr_usrreqs = &toe6_usrreqs; 1518 1519 TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); 1520 ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event, 1521 t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); 1522 1523 return (t4_register_uld(&tom_uld_info)); 1524 } 1525 1526 static void 1527 tom_uninit(struct adapter *sc, void *arg __unused) 1528 { 1529 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) 1530 return; 1531 1532 /* Try to free resources (works only if no port has IFCAP_TOE) */ 1533 if (uld_active(sc, ULD_TOM)) 1534 t4_deactivate_uld(sc, ULD_TOM); 1535 1536 end_synchronized_op(sc, 0); 1537 } 1538 1539 static int 1540 t4_tom_mod_unload(void) 1541 { 1542 t4_iterate(tom_uninit, NULL); 1543 1544 if (t4_unregister_uld(&tom_uld_info) == EBUSY) 1545 return (EBUSY); 1546 1547 if (ifaddr_evhandler) { 1548 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler); 1549 taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); 1550 } 1551 1552 t4_tls_mod_unload(); 1553 t4_ddp_mod_unload(); 1554 1555 t4_uninit_connect_cpl_handlers(); 1556 t4_uninit_listen_cpl_handlers(); 1557 t4_uninit_cpl_io_handlers(); 1558 1559 return (0); 1560 } 1561 #endif /* TCP_OFFLOAD */ 1562 1563 static int 1564 t4_tom_modevent(module_t mod, int cmd, void *arg) 1565 { 1566 int rc = 0; 1567 1568 #ifdef TCP_OFFLOAD 1569 switch (cmd) { 1570 case MOD_LOAD: 1571 rc = t4_tom_mod_load(); 1572 break; 1573 1574 case MOD_UNLOAD: 1575 rc = t4_tom_mod_unload(); 1576 break; 1577 1578 default: 1579 rc = EINVAL; 1580 } 1581 #else 1582 printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 1583 rc = EOPNOTSUPP; 1584 #endif 1585 return (rc); 1586 } 1587 1588 static moduledata_t t4_tom_moddata= { 1589 "t4_tom", 1590 t4_tom_modevent, 1591 0 1592 }; 1593 1594 MODULE_VERSION(t4_tom, 1); 1595 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 1596 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 1597 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 1598