1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/limits.h> 41 #include <sys/module.h> 42 #include <sys/protosw.h> 43 #include <sys/domain.h> 44 #include <sys/refcount.h> 45 #include <sys/rmlock.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/taskqueue.h> 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/in_var.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip6.h> 56 #include <netinet6/scope6_var.h> 57 #define TCPSTATES 58 #include <netinet/tcp_fsm.h> 59 #include <netinet/tcp_var.h> 60 #include <netinet/toecore.h> 61 62 #ifdef TCP_OFFLOAD 63 #include "common/common.h" 64 #include "common/t4_msg.h" 65 #include "common/t4_regs.h" 66 #include "common/t4_regs_values.h" 67 #include "common/t4_tcb.h" 68 #include "tom/t4_tom_l2t.h" 69 #include "tom/t4_tom.h" 70 71 static struct protosw ddp_protosw; 72 static struct pr_usrreqs ddp_usrreqs; 73 74 static struct protosw ddp6_protosw; 75 static struct pr_usrreqs ddp6_usrreqs; 76 77 /* Module ops */ 78 static int t4_tom_mod_load(void); 79 static int t4_tom_mod_unload(void); 80 static int t4_tom_modevent(module_t, int, void *); 81 82 /* ULD ops and helpers */ 83 static int t4_tom_activate(struct adapter *); 84 static int t4_tom_deactivate(struct adapter *); 85 86 static struct uld_info tom_uld_info = { 87 .uld_id = ULD_TOM, 88 .activate = t4_tom_activate, 89 .deactivate = t4_tom_deactivate, 90 }; 91 92 static void queue_tid_release(struct adapter *, int); 93 static void release_offload_resources(struct toepcb *); 94 static int alloc_tid_tabs(struct tid_info *); 95 static void free_tid_tabs(struct tid_info *); 96 static int add_lip(struct adapter *, struct in6_addr *); 97 static int delete_lip(struct adapter *, struct in6_addr *); 98 static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 99 static void init_clip_table(struct adapter *, struct tom_data *); 100 static void update_clip(struct adapter *, void *); 101 static void t4_clip_task(void *, int); 102 static void update_clip_table(struct adapter *, struct tom_data *); 103 static void destroy_clip_table(struct adapter *, struct tom_data *); 104 static void free_tom_data(struct adapter *, struct tom_data *); 105 static void reclaim_wr_resources(void *, int); 106 107 static int in6_ifaddr_gen; 108 static eventhandler_tag ifaddr_evhandler; 109 static struct timeout_task clip_task; 110 111 struct toepcb * 112 alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags) 113 { 114 struct port_info *pi = vi->pi; 115 struct adapter *sc = pi->adapter; 116 struct toepcb *toep; 117 int tx_credits, txsd_total, len; 118 119 /* 120 * The firmware counts tx work request credits in units of 16 bytes 121 * each. Reserve room for an ABORT_REQ so the driver never has to worry 122 * about tx credits if it wants to abort a connection. 123 */ 124 tx_credits = sc->params.ofldq_wr_cred; 125 tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 126 127 /* 128 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 129 * immediate payload, and firmware counts tx work request credits in 130 * units of 16 byte. Calculate the maximum work requests possible. 131 */ 132 txsd_total = tx_credits / 133 howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); 134 135 if (txqid < 0) 136 txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq; 137 KASSERT(txqid >= vi->first_ofld_txq && 138 txqid < vi->first_ofld_txq + vi->nofldtxq, 139 ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi, 140 vi->first_ofld_txq, vi->nofldtxq)); 141 142 if (rxqid < 0) 143 rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq; 144 KASSERT(rxqid >= vi->first_ofld_rxq && 145 rxqid < vi->first_ofld_rxq + vi->nofldrxq, 146 ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi, 147 vi->first_ofld_rxq, vi->nofldrxq)); 148 149 len = offsetof(struct toepcb, txsd) + 150 txsd_total * sizeof(struct ofld_tx_sdesc); 151 152 toep = malloc(len, M_CXGBE, M_ZERO | flags); 153 if (toep == NULL) 154 return (NULL); 155 156 refcount_init(&toep->refcount, 1); 157 toep->td = sc->tom_softc; 158 toep->vi = vi; 159 toep->tx_total = tx_credits; 160 toep->tx_credits = tx_credits; 161 toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 162 toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 163 toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 164 mbufq_init(&toep->ulp_pduq, INT_MAX); 165 mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); 166 toep->txsd_total = txsd_total; 167 toep->txsd_avail = txsd_total; 168 toep->txsd_pidx = 0; 169 toep->txsd_cidx = 0; 170 ddp_init_toep(toep); 171 172 return (toep); 173 } 174 175 struct toepcb * 176 hold_toepcb(struct toepcb *toep) 177 { 178 179 refcount_acquire(&toep->refcount); 180 return (toep); 181 } 182 183 void 184 free_toepcb(struct toepcb *toep) 185 { 186 187 if (refcount_release(&toep->refcount) == 0) 188 return; 189 190 KASSERT(!(toep->flags & TPF_ATTACHED), 191 ("%s: attached to an inpcb", __func__)); 192 KASSERT(!(toep->flags & TPF_CPL_PENDING), 193 ("%s: CPL pending", __func__)); 194 195 ddp_uninit_toep(toep); 196 free(toep, M_CXGBE); 197 } 198 199 /* 200 * Set up the socket for TCP offload. 201 */ 202 void 203 offload_socket(struct socket *so, struct toepcb *toep) 204 { 205 struct tom_data *td = toep->td; 206 struct inpcb *inp = sotoinpcb(so); 207 struct tcpcb *tp = intotcpcb(inp); 208 struct sockbuf *sb; 209 210 INP_WLOCK_ASSERT(inp); 211 212 /* Update socket */ 213 sb = &so->so_snd; 214 SOCKBUF_LOCK(sb); 215 sb->sb_flags |= SB_NOCOALESCE; 216 SOCKBUF_UNLOCK(sb); 217 sb = &so->so_rcv; 218 SOCKBUF_LOCK(sb); 219 sb->sb_flags |= SB_NOCOALESCE; 220 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 221 if (inp->inp_vflag & INP_IPV6) 222 so->so_proto = &ddp6_protosw; 223 else 224 so->so_proto = &ddp_protosw; 225 } 226 SOCKBUF_UNLOCK(sb); 227 228 /* Update TCP PCB */ 229 tp->tod = &td->tod; 230 tp->t_toe = toep; 231 tp->t_flags |= TF_TOE; 232 233 /* Install an extra hold on inp */ 234 toep->inp = inp; 235 toep->flags |= TPF_ATTACHED; 236 in_pcbref(inp); 237 238 /* Add the TOE PCB to the active list */ 239 mtx_lock(&td->toep_list_lock); 240 TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 241 mtx_unlock(&td->toep_list_lock); 242 } 243 244 /* This is _not_ the normal way to "unoffload" a socket. */ 245 void 246 undo_offload_socket(struct socket *so) 247 { 248 struct inpcb *inp = sotoinpcb(so); 249 struct tcpcb *tp = intotcpcb(inp); 250 struct toepcb *toep = tp->t_toe; 251 struct tom_data *td = toep->td; 252 struct sockbuf *sb; 253 254 INP_WLOCK_ASSERT(inp); 255 256 sb = &so->so_snd; 257 SOCKBUF_LOCK(sb); 258 sb->sb_flags &= ~SB_NOCOALESCE; 259 SOCKBUF_UNLOCK(sb); 260 sb = &so->so_rcv; 261 SOCKBUF_LOCK(sb); 262 sb->sb_flags &= ~SB_NOCOALESCE; 263 SOCKBUF_UNLOCK(sb); 264 265 tp->tod = NULL; 266 tp->t_toe = NULL; 267 tp->t_flags &= ~TF_TOE; 268 269 toep->inp = NULL; 270 toep->flags &= ~TPF_ATTACHED; 271 if (in_pcbrele_wlocked(inp)) 272 panic("%s: inp freed.", __func__); 273 274 mtx_lock(&td->toep_list_lock); 275 TAILQ_REMOVE(&td->toep_list, toep, link); 276 mtx_unlock(&td->toep_list_lock); 277 278 free_toepcb(toep); 279 } 280 281 static void 282 release_offload_resources(struct toepcb *toep) 283 { 284 struct tom_data *td = toep->td; 285 struct adapter *sc = td_adapter(td); 286 int tid = toep->tid; 287 288 KASSERT(!(toep->flags & TPF_CPL_PENDING), 289 ("%s: %p has CPL pending.", __func__, toep)); 290 KASSERT(!(toep->flags & TPF_ATTACHED), 291 ("%s: %p is still attached.", __func__, toep)); 292 293 CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 294 __func__, toep, tid, toep->l2te, toep->ce); 295 296 /* 297 * These queues should have been emptied at approximately the same time 298 * that a normal connection's socket's so_snd would have been purged or 299 * drained. Do _not_ clean up here. 300 */ 301 MPASS(mbufq_len(&toep->ulp_pduq) == 0); 302 MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); 303 #ifdef INVARIANTS 304 ddp_assert_empty(toep); 305 #endif 306 307 if (toep->l2te) 308 t4_l2t_release(toep->l2te); 309 310 if (tid >= 0) { 311 remove_tid(sc, tid); 312 release_tid(sc, tid, toep->ctrlq); 313 } 314 315 if (toep->ce) 316 release_lip(td, toep->ce); 317 318 mtx_lock(&td->toep_list_lock); 319 TAILQ_REMOVE(&td->toep_list, toep, link); 320 mtx_unlock(&td->toep_list_lock); 321 322 free_toepcb(toep); 323 } 324 325 /* 326 * The kernel is done with the TCP PCB and this is our opportunity to unhook the 327 * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 328 * pending CPL) then it is time to release all resources tied to the toepcb. 329 * 330 * Also gets called when an offloaded active open fails and the TOM wants the 331 * kernel to take the TCP PCB back. 332 */ 333 static void 334 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 335 { 336 #if defined(KTR) || defined(INVARIANTS) 337 struct inpcb *inp = tp->t_inpcb; 338 #endif 339 struct toepcb *toep = tp->t_toe; 340 341 INP_WLOCK_ASSERT(inp); 342 343 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 344 KASSERT(toep->flags & TPF_ATTACHED, 345 ("%s: not attached", __func__)); 346 347 #ifdef KTR 348 if (tp->t_state == TCPS_SYN_SENT) { 349 CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 350 __func__, toep->tid, toep, toep->flags, inp, 351 inp->inp_flags); 352 } else { 353 CTR6(KTR_CXGBE, 354 "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 355 toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 356 inp->inp_flags); 357 } 358 #endif 359 360 tp->t_toe = NULL; 361 tp->t_flags &= ~TF_TOE; 362 toep->flags &= ~TPF_ATTACHED; 363 364 if (!(toep->flags & TPF_CPL_PENDING)) 365 release_offload_resources(toep); 366 } 367 368 /* 369 * setsockopt handler. 370 */ 371 static void 372 t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) 373 { 374 struct adapter *sc = tod->tod_softc; 375 struct toepcb *toep = tp->t_toe; 376 377 if (dir == SOPT_GET) 378 return; 379 380 CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); 381 382 switch (name) { 383 case TCP_NODELAY: 384 t4_set_tcb_field(sc, toep->ctrlq, toep->tid, W_TCB_T_FLAGS, 385 V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1), 386 0, 0, toep->ofld_rxq->iq.abs_id); 387 break; 388 default: 389 break; 390 } 391 } 392 393 /* 394 * The TOE driver will not receive any more CPLs for the tid associated with the 395 * toepcb; release the hold on the inpcb. 396 */ 397 void 398 final_cpl_received(struct toepcb *toep) 399 { 400 struct inpcb *inp = toep->inp; 401 402 KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 403 INP_WLOCK_ASSERT(inp); 404 KASSERT(toep->flags & TPF_CPL_PENDING, 405 ("%s: CPL not pending already?", __func__)); 406 407 CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 408 __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 409 410 if (toep->ulp_mode == ULP_MODE_TCPDDP) 411 release_ddp_resources(toep); 412 toep->inp = NULL; 413 toep->flags &= ~TPF_CPL_PENDING; 414 mbufq_drain(&toep->ulp_pdu_reclaimq); 415 416 if (!(toep->flags & TPF_ATTACHED)) 417 release_offload_resources(toep); 418 419 if (!in_pcbrele_wlocked(inp)) 420 INP_WUNLOCK(inp); 421 } 422 423 void 424 insert_tid(struct adapter *sc, int tid, void *ctx) 425 { 426 struct tid_info *t = &sc->tids; 427 428 t->tid_tab[tid] = ctx; 429 atomic_add_int(&t->tids_in_use, 1); 430 } 431 432 void * 433 lookup_tid(struct adapter *sc, int tid) 434 { 435 struct tid_info *t = &sc->tids; 436 437 return (t->tid_tab[tid]); 438 } 439 440 void 441 update_tid(struct adapter *sc, int tid, void *ctx) 442 { 443 struct tid_info *t = &sc->tids; 444 445 t->tid_tab[tid] = ctx; 446 } 447 448 void 449 remove_tid(struct adapter *sc, int tid) 450 { 451 struct tid_info *t = &sc->tids; 452 453 t->tid_tab[tid] = NULL; 454 atomic_subtract_int(&t->tids_in_use, 1); 455 } 456 457 void 458 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) 459 { 460 struct wrqe *wr; 461 struct cpl_tid_release *req; 462 463 wr = alloc_wrqe(sizeof(*req), ctrlq); 464 if (wr == NULL) { 465 queue_tid_release(sc, tid); /* defer */ 466 return; 467 } 468 req = wrtod(wr); 469 470 INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); 471 472 t4_wrq_tx(sc, wr); 473 } 474 475 static void 476 queue_tid_release(struct adapter *sc, int tid) 477 { 478 479 CXGBE_UNIMPLEMENTED("deferred tid release"); 480 } 481 482 /* 483 * What mtu_idx to use, given a 4-tuple and/or an MSS cap 484 */ 485 int 486 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) 487 { 488 unsigned short *mtus = &sc->params.mtus[0]; 489 int i, mss, n; 490 491 KASSERT(inc != NULL || pmss > 0, 492 ("%s: at least one of inc/pmss must be specified", __func__)); 493 494 mss = inc ? tcp_mssopt(inc) : pmss; 495 if (pmss > 0 && mss > pmss) 496 mss = pmss; 497 498 if (inc->inc_flags & INC_ISIPV6) 499 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 500 else 501 n = sizeof(struct ip) + sizeof(struct tcphdr); 502 503 for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++) 504 continue; 505 506 return (i); 507 } 508 509 /* 510 * Determine the receive window size for a socket. 511 */ 512 u_long 513 select_rcv_wnd(struct socket *so) 514 { 515 unsigned long wnd; 516 517 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 518 519 wnd = sbspace(&so->so_rcv); 520 if (wnd < MIN_RCV_WND) 521 wnd = MIN_RCV_WND; 522 523 return min(wnd, MAX_RCV_WND); 524 } 525 526 int 527 select_rcv_wscale(void) 528 { 529 int wscale = 0; 530 unsigned long space = sb_max; 531 532 if (space > MAX_RCV_WND) 533 space = MAX_RCV_WND; 534 535 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 536 wscale++; 537 538 return (wscale); 539 } 540 541 extern int always_keepalive; 542 #define VIID_SMACIDX(v) (((unsigned int)(v) & 0x7f) << 1) 543 544 /* 545 * socket so could be a listening socket too. 546 */ 547 uint64_t 548 calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e, 549 int mtu_idx, int rscale, int rx_credits, int ulp_mode) 550 { 551 uint64_t opt0; 552 553 KASSERT(rx_credits <= M_RCV_BUFSIZ, 554 ("%s: rcv_bufsiz too high", __func__)); 555 556 opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 557 V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits); 558 559 if (so != NULL) { 560 struct inpcb *inp = sotoinpcb(so); 561 struct tcpcb *tp = intotcpcb(inp); 562 int keepalive = always_keepalive || 563 so_options_get(so) & SO_KEEPALIVE; 564 565 opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 566 opt0 |= V_KEEP_ALIVE(keepalive != 0); 567 } 568 569 if (e != NULL) 570 opt0 |= V_L2T_IDX(e->idx); 571 572 if (vi != NULL) { 573 opt0 |= V_SMAC_SEL(VIID_SMACIDX(vi->viid)); 574 opt0 |= V_TX_CHAN(vi->pi->tx_chan); 575 } 576 577 return htobe64(opt0); 578 } 579 580 uint64_t 581 select_ntuple(struct vi_info *vi, struct l2t_entry *e) 582 { 583 struct adapter *sc = vi->pi->adapter; 584 struct tp_params *tp = &sc->params.tp; 585 uint16_t viid = vi->viid; 586 uint64_t ntuple = 0; 587 588 /* 589 * Initialize each of the fields which we care about which are present 590 * in the Compressed Filter Tuple. 591 */ 592 if (tp->vlan_shift >= 0 && e->vlan != CPL_L2T_VLAN_NONE) 593 ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; 594 595 if (tp->port_shift >= 0) 596 ntuple |= (uint64_t)e->lport << tp->port_shift; 597 598 if (tp->protocol_shift >= 0) 599 ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; 600 601 if (tp->vnic_shift >= 0) { 602 uint32_t vf = G_FW_VIID_VIN(viid); 603 uint32_t pf = G_FW_VIID_PFN(viid); 604 uint32_t vld = G_FW_VIID_VIVLD(viid); 605 606 ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) | 607 V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; 608 } 609 610 if (is_t4(sc)) 611 return (htobe32((uint32_t)ntuple)); 612 else 613 return (htobe64(V_FILTER_TUPLE(ntuple))); 614 } 615 616 void 617 set_tcpddp_ulp_mode(struct toepcb *toep) 618 { 619 620 toep->ulp_mode = ULP_MODE_TCPDDP; 621 toep->ddp_flags = DDP_OK; 622 } 623 624 int 625 negative_advice(int status) 626 { 627 628 return (status == CPL_ERR_RTX_NEG_ADVICE || 629 status == CPL_ERR_PERSIST_NEG_ADVICE || 630 status == CPL_ERR_KEEPALV_NEG_ADVICE); 631 } 632 633 static int 634 alloc_tid_tabs(struct tid_info *t) 635 { 636 size_t size; 637 unsigned int i; 638 639 size = t->ntids * sizeof(*t->tid_tab) + 640 t->natids * sizeof(*t->atid_tab) + 641 t->nstids * sizeof(*t->stid_tab); 642 643 t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT); 644 if (t->tid_tab == NULL) 645 return (ENOMEM); 646 647 mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); 648 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; 649 t->afree = t->atid_tab; 650 t->atids_in_use = 0; 651 for (i = 1; i < t->natids; i++) 652 t->atid_tab[i - 1].next = &t->atid_tab[i]; 653 t->atid_tab[t->natids - 1].next = NULL; 654 655 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 656 t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; 657 t->stids_in_use = 0; 658 TAILQ_INIT(&t->stids); 659 t->nstids_free_head = t->nstids; 660 661 atomic_store_rel_int(&t->tids_in_use, 0); 662 663 return (0); 664 } 665 666 static void 667 free_tid_tabs(struct tid_info *t) 668 { 669 KASSERT(t->tids_in_use == 0, 670 ("%s: %d tids still in use.", __func__, t->tids_in_use)); 671 KASSERT(t->atids_in_use == 0, 672 ("%s: %d atids still in use.", __func__, t->atids_in_use)); 673 KASSERT(t->stids_in_use == 0, 674 ("%s: %d tids still in use.", __func__, t->stids_in_use)); 675 676 free(t->tid_tab, M_CXGBE); 677 t->tid_tab = NULL; 678 679 if (mtx_initialized(&t->atid_lock)) 680 mtx_destroy(&t->atid_lock); 681 if (mtx_initialized(&t->stid_lock)) 682 mtx_destroy(&t->stid_lock); 683 } 684 685 static int 686 add_lip(struct adapter *sc, struct in6_addr *lip) 687 { 688 struct fw_clip_cmd c; 689 690 ASSERT_SYNCHRONIZED_OP(sc); 691 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 692 693 memset(&c, 0, sizeof(c)); 694 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 695 F_FW_CMD_WRITE); 696 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 697 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 698 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 699 700 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 701 } 702 703 static int 704 delete_lip(struct adapter *sc, struct in6_addr *lip) 705 { 706 struct fw_clip_cmd c; 707 708 ASSERT_SYNCHRONIZED_OP(sc); 709 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 710 711 memset(&c, 0, sizeof(c)); 712 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 713 F_FW_CMD_READ); 714 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 715 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 716 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 717 718 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 719 } 720 721 static struct clip_entry * 722 search_lip(struct tom_data *td, struct in6_addr *lip) 723 { 724 struct clip_entry *ce; 725 726 mtx_assert(&td->clip_table_lock, MA_OWNED); 727 728 TAILQ_FOREACH(ce, &td->clip_table, link) { 729 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 730 return (ce); 731 } 732 733 return (NULL); 734 } 735 736 struct clip_entry * 737 hold_lip(struct tom_data *td, struct in6_addr *lip) 738 { 739 struct clip_entry *ce; 740 741 mtx_lock(&td->clip_table_lock); 742 ce = search_lip(td, lip); 743 if (ce != NULL) 744 ce->refcount++; 745 mtx_unlock(&td->clip_table_lock); 746 747 return (ce); 748 } 749 750 void 751 release_lip(struct tom_data *td, struct clip_entry *ce) 752 { 753 754 mtx_lock(&td->clip_table_lock); 755 KASSERT(search_lip(td, &ce->lip) == ce, 756 ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 757 KASSERT(ce->refcount > 0, 758 ("%s: CLIP entry %p has refcount 0", __func__, ce)); 759 --ce->refcount; 760 mtx_unlock(&td->clip_table_lock); 761 } 762 763 static void 764 init_clip_table(struct adapter *sc, struct tom_data *td) 765 { 766 767 ASSERT_SYNCHRONIZED_OP(sc); 768 769 mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 770 TAILQ_INIT(&td->clip_table); 771 td->clip_gen = -1; 772 773 update_clip_table(sc, td); 774 } 775 776 static void 777 update_clip(struct adapter *sc, void *arg __unused) 778 { 779 780 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc")) 781 return; 782 783 if (uld_active(sc, ULD_TOM)) 784 update_clip_table(sc, sc->tom_softc); 785 786 end_synchronized_op(sc, LOCK_HELD); 787 } 788 789 static void 790 t4_clip_task(void *arg, int count) 791 { 792 793 t4_iterate(update_clip, NULL); 794 } 795 796 static void 797 update_clip_table(struct adapter *sc, struct tom_data *td) 798 { 799 struct rm_priotracker in6_ifa_tracker; 800 struct in6_ifaddr *ia; 801 struct in6_addr *lip, tlip; 802 struct clip_head stale; 803 struct clip_entry *ce, *ce_temp; 804 int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen); 805 806 ASSERT_SYNCHRONIZED_OP(sc); 807 808 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 809 mtx_lock(&td->clip_table_lock); 810 811 if (gen == td->clip_gen) 812 goto done; 813 814 TAILQ_INIT(&stale); 815 TAILQ_CONCAT(&stale, &td->clip_table, link); 816 817 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 818 lip = &ia->ia_addr.sin6_addr; 819 820 KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 821 ("%s: mcast address in in6_ifaddr list", __func__)); 822 823 if (IN6_IS_ADDR_LOOPBACK(lip)) 824 continue; 825 if (IN6_IS_SCOPE_EMBED(lip)) { 826 /* Remove the embedded scope */ 827 tlip = *lip; 828 lip = &tlip; 829 in6_clearscope(lip); 830 } 831 /* 832 * XXX: how to weed out the link local address for the loopback 833 * interface? It's fe80::1 usually (always?). 834 */ 835 836 /* 837 * If it's in the main list then we already know it's not stale. 838 */ 839 TAILQ_FOREACH(ce, &td->clip_table, link) { 840 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 841 goto next; 842 } 843 844 /* 845 * If it's in the stale list we should move it to the main list. 846 */ 847 TAILQ_FOREACH(ce, &stale, link) { 848 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { 849 TAILQ_REMOVE(&stale, ce, link); 850 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 851 goto next; 852 } 853 } 854 855 /* A new IP6 address; add it to the CLIP table */ 856 ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 857 memcpy(&ce->lip, lip, sizeof(ce->lip)); 858 ce->refcount = 0; 859 rc = add_lip(sc, lip); 860 if (rc == 0) 861 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 862 else { 863 char ip[INET6_ADDRSTRLEN]; 864 865 inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip)); 866 log(LOG_ERR, "%s: could not add %s (%d)\n", 867 __func__, ip, rc); 868 free(ce, M_CXGBE); 869 } 870 next: 871 continue; 872 } 873 874 /* 875 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are 876 * no longer referenced by the driver. 877 */ 878 TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { 879 if (ce->refcount == 0) { 880 rc = delete_lip(sc, &ce->lip); 881 if (rc == 0) { 882 TAILQ_REMOVE(&stale, ce, link); 883 free(ce, M_CXGBE); 884 } else { 885 char ip[INET6_ADDRSTRLEN]; 886 887 inet_ntop(AF_INET6, &ce->lip, &ip[0], 888 sizeof(ip)); 889 log(LOG_ERR, "%s: could not delete %s (%d)\n", 890 __func__, ip, rc); 891 } 892 } 893 } 894 /* The ones that are still referenced need to stay in the CLIP table */ 895 TAILQ_CONCAT(&td->clip_table, &stale, link); 896 897 td->clip_gen = gen; 898 done: 899 mtx_unlock(&td->clip_table_lock); 900 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 901 } 902 903 static void 904 destroy_clip_table(struct adapter *sc, struct tom_data *td) 905 { 906 struct clip_entry *ce, *ce_temp; 907 908 if (mtx_initialized(&td->clip_table_lock)) { 909 mtx_lock(&td->clip_table_lock); 910 TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 911 KASSERT(ce->refcount == 0, 912 ("%s: CLIP entry %p still in use (%d)", __func__, 913 ce, ce->refcount)); 914 TAILQ_REMOVE(&td->clip_table, ce, link); 915 delete_lip(sc, &ce->lip); 916 free(ce, M_CXGBE); 917 } 918 mtx_unlock(&td->clip_table_lock); 919 mtx_destroy(&td->clip_table_lock); 920 } 921 } 922 923 static void 924 free_tom_data(struct adapter *sc, struct tom_data *td) 925 { 926 927 ASSERT_SYNCHRONIZED_OP(sc); 928 929 KASSERT(TAILQ_EMPTY(&td->toep_list), 930 ("%s: TOE PCB list is not empty.", __func__)); 931 KASSERT(td->lctx_count == 0, 932 ("%s: lctx hash table is not empty.", __func__)); 933 934 t4_uninit_ddp(sc, td); 935 destroy_clip_table(sc, td); 936 937 if (td->listen_mask != 0) 938 hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 939 940 if (mtx_initialized(&td->unsent_wr_lock)) 941 mtx_destroy(&td->unsent_wr_lock); 942 if (mtx_initialized(&td->lctx_hash_lock)) 943 mtx_destroy(&td->lctx_hash_lock); 944 if (mtx_initialized(&td->toep_list_lock)) 945 mtx_destroy(&td->toep_list_lock); 946 947 free_tid_tabs(&sc->tids); 948 free(td, M_CXGBE); 949 } 950 951 static void 952 reclaim_wr_resources(void *arg, int count) 953 { 954 struct tom_data *td = arg; 955 STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); 956 struct cpl_act_open_req *cpl; 957 u_int opcode, atid; 958 struct wrqe *wr; 959 struct adapter *sc; 960 961 mtx_lock(&td->unsent_wr_lock); 962 STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); 963 mtx_unlock(&td->unsent_wr_lock); 964 965 while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { 966 STAILQ_REMOVE_HEAD(&twr_list, link); 967 968 cpl = wrtod(wr); 969 opcode = GET_OPCODE(cpl); 970 971 switch (opcode) { 972 case CPL_ACT_OPEN_REQ: 973 case CPL_ACT_OPEN_REQ6: 974 atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); 975 sc = td_adapter(td); 976 977 CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); 978 act_open_failure_cleanup(sc, atid, EHOSTUNREACH); 979 free(wr, M_CXGBE); 980 break; 981 default: 982 log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " 983 "opcode %x\n", __func__, wr, wr->wr_len, opcode); 984 /* WR not freed here; go look at it with a debugger. */ 985 } 986 } 987 } 988 989 /* 990 * Ground control to Major TOM 991 * Commencing countdown, engines on 992 */ 993 static int 994 t4_tom_activate(struct adapter *sc) 995 { 996 struct tom_data *td; 997 struct toedev *tod; 998 struct vi_info *vi; 999 struct sge_ofld_rxq *ofld_rxq; 1000 int i, j, rc, v; 1001 1002 ASSERT_SYNCHRONIZED_OP(sc); 1003 1004 /* per-adapter softc for TOM */ 1005 td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 1006 if (td == NULL) 1007 return (ENOMEM); 1008 1009 /* List of TOE PCBs and associated lock */ 1010 mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 1011 TAILQ_INIT(&td->toep_list); 1012 1013 /* Listen context */ 1014 mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 1015 td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 1016 &td->listen_mask, HASH_NOWAIT); 1017 1018 /* List of WRs for which L2 resolution failed */ 1019 mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); 1020 STAILQ_INIT(&td->unsent_wr_list); 1021 TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); 1022 1023 /* TID tables */ 1024 rc = alloc_tid_tabs(&sc->tids); 1025 if (rc != 0) 1026 goto done; 1027 1028 /* DDP page pods and CPL handlers */ 1029 t4_init_ddp(sc, td); 1030 1031 /* CLIP table for IPv6 offload */ 1032 init_clip_table(sc, td); 1033 1034 /* toedev ops */ 1035 tod = &td->tod; 1036 init_toedev(tod); 1037 tod->tod_softc = sc; 1038 tod->tod_connect = t4_connect; 1039 tod->tod_listen_start = t4_listen_start; 1040 tod->tod_listen_stop = t4_listen_stop; 1041 tod->tod_rcvd = t4_rcvd; 1042 tod->tod_output = t4_tod_output; 1043 tod->tod_send_rst = t4_send_rst; 1044 tod->tod_send_fin = t4_send_fin; 1045 tod->tod_pcb_detach = t4_pcb_detach; 1046 tod->tod_l2_update = t4_l2_update; 1047 tod->tod_syncache_added = t4_syncache_added; 1048 tod->tod_syncache_removed = t4_syncache_removed; 1049 tod->tod_syncache_respond = t4_syncache_respond; 1050 tod->tod_offload_socket = t4_offload_socket; 1051 tod->tod_ctloutput = t4_ctloutput; 1052 1053 for_each_port(sc, i) { 1054 for_each_vi(sc->port[i], v, vi) { 1055 TOEDEV(vi->ifp) = &td->tod; 1056 for_each_ofld_rxq(vi, j, ofld_rxq) { 1057 ofld_rxq->iq.set_tcb_rpl = do_set_tcb_rpl; 1058 ofld_rxq->iq.l2t_write_rpl = do_l2t_write_rpl2; 1059 } 1060 } 1061 } 1062 1063 sc->tom_softc = td; 1064 register_toedev(sc->tom_softc); 1065 1066 done: 1067 if (rc != 0) 1068 free_tom_data(sc, td); 1069 return (rc); 1070 } 1071 1072 static int 1073 t4_tom_deactivate(struct adapter *sc) 1074 { 1075 int rc = 0; 1076 struct tom_data *td = sc->tom_softc; 1077 1078 ASSERT_SYNCHRONIZED_OP(sc); 1079 1080 if (td == NULL) 1081 return (0); /* XXX. KASSERT? */ 1082 1083 if (sc->offload_map != 0) 1084 return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 1085 1086 if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) 1087 return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ 1088 1089 mtx_lock(&td->toep_list_lock); 1090 if (!TAILQ_EMPTY(&td->toep_list)) 1091 rc = EBUSY; 1092 mtx_unlock(&td->toep_list_lock); 1093 1094 mtx_lock(&td->lctx_hash_lock); 1095 if (td->lctx_count > 0) 1096 rc = EBUSY; 1097 mtx_unlock(&td->lctx_hash_lock); 1098 1099 taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); 1100 mtx_lock(&td->unsent_wr_lock); 1101 if (!STAILQ_EMPTY(&td->unsent_wr_list)) 1102 rc = EBUSY; 1103 mtx_unlock(&td->unsent_wr_lock); 1104 1105 if (rc == 0) { 1106 unregister_toedev(sc->tom_softc); 1107 free_tom_data(sc, td); 1108 sc->tom_softc = NULL; 1109 } 1110 1111 return (rc); 1112 } 1113 1114 static void 1115 t4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp) 1116 { 1117 1118 atomic_add_rel_int(&in6_ifaddr_gen, 1); 1119 taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); 1120 } 1121 1122 static int 1123 t4_tom_mod_load(void) 1124 { 1125 int rc; 1126 struct protosw *tcp_protosw, *tcp6_protosw; 1127 1128 /* CPL handlers */ 1129 t4_init_connect_cpl_handlers(); 1130 t4_init_listen_cpl_handlers(); 1131 t4_init_cpl_io_handlers(); 1132 1133 rc = t4_ddp_mod_load(); 1134 if (rc != 0) 1135 return (rc); 1136 1137 tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 1138 if (tcp_protosw == NULL) 1139 return (ENOPROTOOPT); 1140 bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw)); 1141 bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs)); 1142 ddp_usrreqs.pru_aio_queue = t4_aio_queue_ddp; 1143 ddp_protosw.pr_usrreqs = &ddp_usrreqs; 1144 1145 tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 1146 if (tcp6_protosw == NULL) 1147 return (ENOPROTOOPT); 1148 bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw)); 1149 bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs)); 1150 ddp6_usrreqs.pru_aio_queue = t4_aio_queue_ddp; 1151 ddp6_protosw.pr_usrreqs = &ddp6_usrreqs; 1152 1153 TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); 1154 ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event, 1155 t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); 1156 1157 rc = t4_register_uld(&tom_uld_info); 1158 if (rc != 0) 1159 t4_tom_mod_unload(); 1160 1161 return (rc); 1162 } 1163 1164 static void 1165 tom_uninit(struct adapter *sc, void *arg __unused) 1166 { 1167 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) 1168 return; 1169 1170 /* Try to free resources (works only if no port has IFCAP_TOE) */ 1171 if (uld_active(sc, ULD_TOM)) 1172 t4_deactivate_uld(sc, ULD_TOM); 1173 1174 end_synchronized_op(sc, 0); 1175 } 1176 1177 static int 1178 t4_tom_mod_unload(void) 1179 { 1180 t4_iterate(tom_uninit, NULL); 1181 1182 if (t4_unregister_uld(&tom_uld_info) == EBUSY) 1183 return (EBUSY); 1184 1185 if (ifaddr_evhandler) { 1186 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler); 1187 taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); 1188 } 1189 1190 t4_ddp_mod_unload(); 1191 1192 return (0); 1193 } 1194 #endif /* TCP_OFFLOAD */ 1195 1196 static int 1197 t4_tom_modevent(module_t mod, int cmd, void *arg) 1198 { 1199 int rc = 0; 1200 1201 #ifdef TCP_OFFLOAD 1202 switch (cmd) { 1203 case MOD_LOAD: 1204 rc = t4_tom_mod_load(); 1205 break; 1206 1207 case MOD_UNLOAD: 1208 rc = t4_tom_mod_unload(); 1209 break; 1210 1211 default: 1212 rc = EINVAL; 1213 } 1214 #else 1215 printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 1216 rc = EOPNOTSUPP; 1217 #endif 1218 return (rc); 1219 } 1220 1221 static moduledata_t t4_tom_moddata= { 1222 "t4_tom", 1223 t4_tom_modevent, 1224 0 1225 }; 1226 1227 MODULE_VERSION(t4_tom, 1); 1228 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 1229 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 1230 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 1231