1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/lock.h> 40 #include <sys/limits.h> 41 #include <sys/module.h> 42 #include <sys/protosw.h> 43 #include <sys/domain.h> 44 #include <sys/refcount.h> 45 #include <sys/rmlock.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/taskqueue.h> 49 #include <net/if.h> 50 #include <net/if_var.h> 51 #include <netinet/in.h> 52 #include <netinet/in_pcb.h> 53 #include <netinet/in_var.h> 54 #include <netinet/ip.h> 55 #include <netinet/ip6.h> 56 #include <netinet6/scope6_var.h> 57 #define TCPSTATES 58 #include <netinet/tcp_fsm.h> 59 #include <netinet/tcp_var.h> 60 #include <netinet/toecore.h> 61 62 #ifdef TCP_OFFLOAD 63 #include "common/common.h" 64 #include "common/t4_msg.h" 65 #include "common/t4_regs.h" 66 #include "common/t4_regs_values.h" 67 #include "common/t4_tcb.h" 68 #include "tom/t4_tom_l2t.h" 69 #include "tom/t4_tom.h" 70 71 static struct protosw toe_protosw; 72 static struct pr_usrreqs toe_usrreqs; 73 74 static struct protosw toe6_protosw; 75 static struct pr_usrreqs toe6_usrreqs; 76 77 /* Module ops */ 78 static int t4_tom_mod_load(void); 79 static int t4_tom_mod_unload(void); 80 static int t4_tom_modevent(module_t, int, void *); 81 82 /* ULD ops and helpers */ 83 static int t4_tom_activate(struct adapter *); 84 static int t4_tom_deactivate(struct adapter *); 85 86 static struct uld_info tom_uld_info = { 87 .uld_id = ULD_TOM, 88 .activate = t4_tom_activate, 89 .deactivate = t4_tom_deactivate, 90 }; 91 92 static void queue_tid_release(struct adapter *, int); 93 static void release_offload_resources(struct toepcb *); 94 static int alloc_tid_tabs(struct tid_info *); 95 static void free_tid_tabs(struct tid_info *); 96 static int add_lip(struct adapter *, struct in6_addr *); 97 static int delete_lip(struct adapter *, struct in6_addr *); 98 static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 99 static void init_clip_table(struct adapter *, struct tom_data *); 100 static void update_clip(struct adapter *, void *); 101 static void t4_clip_task(void *, int); 102 static void update_clip_table(struct adapter *, struct tom_data *); 103 static void destroy_clip_table(struct adapter *, struct tom_data *); 104 static void free_tom_data(struct adapter *, struct tom_data *); 105 static void reclaim_wr_resources(void *, int); 106 107 static int in6_ifaddr_gen; 108 static eventhandler_tag ifaddr_evhandler; 109 static struct timeout_task clip_task; 110 111 struct toepcb * 112 alloc_toepcb(struct vi_info *vi, int txqid, int rxqid, int flags) 113 { 114 struct port_info *pi = vi->pi; 115 struct adapter *sc = pi->adapter; 116 struct toepcb *toep; 117 int tx_credits, txsd_total, len; 118 119 /* 120 * The firmware counts tx work request credits in units of 16 bytes 121 * each. Reserve room for an ABORT_REQ so the driver never has to worry 122 * about tx credits if it wants to abort a connection. 123 */ 124 tx_credits = sc->params.ofldq_wr_cred; 125 tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 126 127 /* 128 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 129 * immediate payload, and firmware counts tx work request credits in 130 * units of 16 byte. Calculate the maximum work requests possible. 131 */ 132 txsd_total = tx_credits / 133 howmany(sizeof(struct fw_ofld_tx_data_wr) + 1, 16); 134 135 if (txqid < 0) 136 txqid = (arc4random() % vi->nofldtxq) + vi->first_ofld_txq; 137 KASSERT(txqid >= vi->first_ofld_txq && 138 txqid < vi->first_ofld_txq + vi->nofldtxq, 139 ("%s: txqid %d for vi %p (first %d, n %d)", __func__, txqid, vi, 140 vi->first_ofld_txq, vi->nofldtxq)); 141 142 if (rxqid < 0) 143 rxqid = (arc4random() % vi->nofldrxq) + vi->first_ofld_rxq; 144 KASSERT(rxqid >= vi->first_ofld_rxq && 145 rxqid < vi->first_ofld_rxq + vi->nofldrxq, 146 ("%s: rxqid %d for vi %p (first %d, n %d)", __func__, rxqid, vi, 147 vi->first_ofld_rxq, vi->nofldrxq)); 148 149 len = offsetof(struct toepcb, txsd) + 150 txsd_total * sizeof(struct ofld_tx_sdesc); 151 152 toep = malloc(len, M_CXGBE, M_ZERO | flags); 153 if (toep == NULL) 154 return (NULL); 155 156 refcount_init(&toep->refcount, 1); 157 toep->td = sc->tom_softc; 158 toep->vi = vi; 159 toep->tx_total = tx_credits; 160 toep->tx_credits = tx_credits; 161 toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 162 toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 163 toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 164 mbufq_init(&toep->ulp_pduq, INT_MAX); 165 mbufq_init(&toep->ulp_pdu_reclaimq, INT_MAX); 166 toep->txsd_total = txsd_total; 167 toep->txsd_avail = txsd_total; 168 toep->txsd_pidx = 0; 169 toep->txsd_cidx = 0; 170 aiotx_init_toep(toep); 171 ddp_init_toep(toep); 172 173 return (toep); 174 } 175 176 struct toepcb * 177 hold_toepcb(struct toepcb *toep) 178 { 179 180 refcount_acquire(&toep->refcount); 181 return (toep); 182 } 183 184 void 185 free_toepcb(struct toepcb *toep) 186 { 187 188 if (refcount_release(&toep->refcount) == 0) 189 return; 190 191 KASSERT(!(toep->flags & TPF_ATTACHED), 192 ("%s: attached to an inpcb", __func__)); 193 KASSERT(!(toep->flags & TPF_CPL_PENDING), 194 ("%s: CPL pending", __func__)); 195 196 ddp_uninit_toep(toep); 197 free(toep, M_CXGBE); 198 } 199 200 /* 201 * Set up the socket for TCP offload. 202 */ 203 void 204 offload_socket(struct socket *so, struct toepcb *toep) 205 { 206 struct tom_data *td = toep->td; 207 struct inpcb *inp = sotoinpcb(so); 208 struct tcpcb *tp = intotcpcb(inp); 209 struct sockbuf *sb; 210 211 INP_WLOCK_ASSERT(inp); 212 213 /* Update socket */ 214 sb = &so->so_snd; 215 SOCKBUF_LOCK(sb); 216 sb->sb_flags |= SB_NOCOALESCE; 217 SOCKBUF_UNLOCK(sb); 218 sb = &so->so_rcv; 219 SOCKBUF_LOCK(sb); 220 sb->sb_flags |= SB_NOCOALESCE; 221 if (inp->inp_vflag & INP_IPV6) 222 so->so_proto = &toe6_protosw; 223 else 224 so->so_proto = &toe_protosw; 225 SOCKBUF_UNLOCK(sb); 226 227 /* Update TCP PCB */ 228 tp->tod = &td->tod; 229 tp->t_toe = toep; 230 tp->t_flags |= TF_TOE; 231 232 /* Install an extra hold on inp */ 233 toep->inp = inp; 234 toep->flags |= TPF_ATTACHED; 235 in_pcbref(inp); 236 237 /* Add the TOE PCB to the active list */ 238 mtx_lock(&td->toep_list_lock); 239 TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 240 mtx_unlock(&td->toep_list_lock); 241 } 242 243 /* This is _not_ the normal way to "unoffload" a socket. */ 244 void 245 undo_offload_socket(struct socket *so) 246 { 247 struct inpcb *inp = sotoinpcb(so); 248 struct tcpcb *tp = intotcpcb(inp); 249 struct toepcb *toep = tp->t_toe; 250 struct tom_data *td = toep->td; 251 struct sockbuf *sb; 252 253 INP_WLOCK_ASSERT(inp); 254 255 sb = &so->so_snd; 256 SOCKBUF_LOCK(sb); 257 sb->sb_flags &= ~SB_NOCOALESCE; 258 SOCKBUF_UNLOCK(sb); 259 sb = &so->so_rcv; 260 SOCKBUF_LOCK(sb); 261 sb->sb_flags &= ~SB_NOCOALESCE; 262 SOCKBUF_UNLOCK(sb); 263 264 tp->tod = NULL; 265 tp->t_toe = NULL; 266 tp->t_flags &= ~TF_TOE; 267 268 toep->inp = NULL; 269 toep->flags &= ~TPF_ATTACHED; 270 if (in_pcbrele_wlocked(inp)) 271 panic("%s: inp freed.", __func__); 272 273 mtx_lock(&td->toep_list_lock); 274 TAILQ_REMOVE(&td->toep_list, toep, link); 275 mtx_unlock(&td->toep_list_lock); 276 } 277 278 static void 279 release_offload_resources(struct toepcb *toep) 280 { 281 struct tom_data *td = toep->td; 282 struct adapter *sc = td_adapter(td); 283 int tid = toep->tid; 284 285 KASSERT(!(toep->flags & TPF_CPL_PENDING), 286 ("%s: %p has CPL pending.", __func__, toep)); 287 KASSERT(!(toep->flags & TPF_ATTACHED), 288 ("%s: %p is still attached.", __func__, toep)); 289 290 CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 291 __func__, toep, tid, toep->l2te, toep->ce); 292 293 /* 294 * These queues should have been emptied at approximately the same time 295 * that a normal connection's socket's so_snd would have been purged or 296 * drained. Do _not_ clean up here. 297 */ 298 MPASS(mbufq_len(&toep->ulp_pduq) == 0); 299 MPASS(mbufq_len(&toep->ulp_pdu_reclaimq) == 0); 300 #ifdef INVARIANTS 301 ddp_assert_empty(toep); 302 #endif 303 304 if (toep->l2te) 305 t4_l2t_release(toep->l2te); 306 307 if (tid >= 0) { 308 remove_tid(sc, tid, toep->ce ? 2 : 1); 309 release_tid(sc, tid, toep->ctrlq); 310 } 311 312 if (toep->ce) 313 release_lip(td, toep->ce); 314 315 mtx_lock(&td->toep_list_lock); 316 TAILQ_REMOVE(&td->toep_list, toep, link); 317 mtx_unlock(&td->toep_list_lock); 318 319 free_toepcb(toep); 320 } 321 322 /* 323 * The kernel is done with the TCP PCB and this is our opportunity to unhook the 324 * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 325 * pending CPL) then it is time to release all resources tied to the toepcb. 326 * 327 * Also gets called when an offloaded active open fails and the TOM wants the 328 * kernel to take the TCP PCB back. 329 */ 330 static void 331 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 332 { 333 #if defined(KTR) || defined(INVARIANTS) 334 struct inpcb *inp = tp->t_inpcb; 335 #endif 336 struct toepcb *toep = tp->t_toe; 337 338 INP_WLOCK_ASSERT(inp); 339 340 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 341 KASSERT(toep->flags & TPF_ATTACHED, 342 ("%s: not attached", __func__)); 343 344 #ifdef KTR 345 if (tp->t_state == TCPS_SYN_SENT) { 346 CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 347 __func__, toep->tid, toep, toep->flags, inp, 348 inp->inp_flags); 349 } else { 350 CTR6(KTR_CXGBE, 351 "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 352 toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 353 inp->inp_flags); 354 } 355 #endif 356 357 tp->t_toe = NULL; 358 tp->t_flags &= ~TF_TOE; 359 toep->flags &= ~TPF_ATTACHED; 360 361 if (!(toep->flags & TPF_CPL_PENDING)) 362 release_offload_resources(toep); 363 } 364 365 /* 366 * setsockopt handler. 367 */ 368 static void 369 t4_ctloutput(struct toedev *tod, struct tcpcb *tp, int dir, int name) 370 { 371 struct adapter *sc = tod->tod_softc; 372 struct toepcb *toep = tp->t_toe; 373 374 if (dir == SOPT_GET) 375 return; 376 377 CTR4(KTR_CXGBE, "%s: tp %p, dir %u, name %u", __func__, tp, dir, name); 378 379 switch (name) { 380 case TCP_NODELAY: 381 t4_set_tcb_field(sc, toep->ctrlq, toep->tid, W_TCB_T_FLAGS, 382 V_TF_NAGLE(1), V_TF_NAGLE(tp->t_flags & TF_NODELAY ? 0 : 1), 383 0, 0, toep->ofld_rxq->iq.abs_id); 384 break; 385 default: 386 break; 387 } 388 } 389 390 /* 391 * The TOE driver will not receive any more CPLs for the tid associated with the 392 * toepcb; release the hold on the inpcb. 393 */ 394 void 395 final_cpl_received(struct toepcb *toep) 396 { 397 struct inpcb *inp = toep->inp; 398 399 KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 400 INP_WLOCK_ASSERT(inp); 401 KASSERT(toep->flags & TPF_CPL_PENDING, 402 ("%s: CPL not pending already?", __func__)); 403 404 CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 405 __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 406 407 if (toep->ulp_mode == ULP_MODE_TCPDDP) 408 release_ddp_resources(toep); 409 toep->inp = NULL; 410 toep->flags &= ~TPF_CPL_PENDING; 411 mbufq_drain(&toep->ulp_pdu_reclaimq); 412 413 if (!(toep->flags & TPF_ATTACHED)) 414 release_offload_resources(toep); 415 416 if (!in_pcbrele_wlocked(inp)) 417 INP_WUNLOCK(inp); 418 } 419 420 void 421 insert_tid(struct adapter *sc, int tid, void *ctx, int ntids) 422 { 423 struct tid_info *t = &sc->tids; 424 425 t->tid_tab[tid] = ctx; 426 atomic_add_int(&t->tids_in_use, ntids); 427 } 428 429 void * 430 lookup_tid(struct adapter *sc, int tid) 431 { 432 struct tid_info *t = &sc->tids; 433 434 return (t->tid_tab[tid]); 435 } 436 437 void 438 update_tid(struct adapter *sc, int tid, void *ctx) 439 { 440 struct tid_info *t = &sc->tids; 441 442 t->tid_tab[tid] = ctx; 443 } 444 445 void 446 remove_tid(struct adapter *sc, int tid, int ntids) 447 { 448 struct tid_info *t = &sc->tids; 449 450 t->tid_tab[tid] = NULL; 451 atomic_subtract_int(&t->tids_in_use, ntids); 452 } 453 454 void 455 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) 456 { 457 struct wrqe *wr; 458 struct cpl_tid_release *req; 459 460 wr = alloc_wrqe(sizeof(*req), ctrlq); 461 if (wr == NULL) { 462 queue_tid_release(sc, tid); /* defer */ 463 return; 464 } 465 req = wrtod(wr); 466 467 INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); 468 469 t4_wrq_tx(sc, wr); 470 } 471 472 static void 473 queue_tid_release(struct adapter *sc, int tid) 474 { 475 476 CXGBE_UNIMPLEMENTED("deferred tid release"); 477 } 478 479 /* 480 * What mtu_idx to use, given a 4-tuple and/or an MSS cap 481 */ 482 int 483 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) 484 { 485 unsigned short *mtus = &sc->params.mtus[0]; 486 int i, mss, n; 487 488 KASSERT(inc != NULL || pmss > 0, 489 ("%s: at least one of inc/pmss must be specified", __func__)); 490 491 mss = inc ? tcp_mssopt(inc) : pmss; 492 if (pmss > 0 && mss > pmss) 493 mss = pmss; 494 495 if (inc->inc_flags & INC_ISIPV6) 496 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 497 else 498 n = sizeof(struct ip) + sizeof(struct tcphdr); 499 500 for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++) 501 continue; 502 503 return (i); 504 } 505 506 /* 507 * Determine the receive window size for a socket. 508 */ 509 u_long 510 select_rcv_wnd(struct socket *so) 511 { 512 unsigned long wnd; 513 514 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 515 516 wnd = sbspace(&so->so_rcv); 517 if (wnd < MIN_RCV_WND) 518 wnd = MIN_RCV_WND; 519 520 return min(wnd, MAX_RCV_WND); 521 } 522 523 int 524 select_rcv_wscale(void) 525 { 526 int wscale = 0; 527 unsigned long space = sb_max; 528 529 if (space > MAX_RCV_WND) 530 space = MAX_RCV_WND; 531 532 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 533 wscale++; 534 535 return (wscale); 536 } 537 538 extern int always_keepalive; 539 540 /* 541 * socket so could be a listening socket too. 542 */ 543 uint64_t 544 calc_opt0(struct socket *so, struct vi_info *vi, struct l2t_entry *e, 545 int mtu_idx, int rscale, int rx_credits, int ulp_mode) 546 { 547 uint64_t opt0; 548 549 KASSERT(rx_credits <= M_RCV_BUFSIZ, 550 ("%s: rcv_bufsiz too high", __func__)); 551 552 opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 553 V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits); 554 555 if (so != NULL) { 556 struct inpcb *inp = sotoinpcb(so); 557 struct tcpcb *tp = intotcpcb(inp); 558 int keepalive = always_keepalive || 559 so_options_get(so) & SO_KEEPALIVE; 560 561 opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 562 opt0 |= V_KEEP_ALIVE(keepalive != 0); 563 } 564 565 if (e != NULL) 566 opt0 |= V_L2T_IDX(e->idx); 567 568 if (vi != NULL) { 569 opt0 |= V_SMAC_SEL(vi->smt_idx); 570 opt0 |= V_TX_CHAN(vi->pi->tx_chan); 571 } 572 573 return htobe64(opt0); 574 } 575 576 uint64_t 577 select_ntuple(struct vi_info *vi, struct l2t_entry *e) 578 { 579 struct adapter *sc = vi->pi->adapter; 580 struct tp_params *tp = &sc->params.tp; 581 uint16_t viid = vi->viid; 582 uint64_t ntuple = 0; 583 584 /* 585 * Initialize each of the fields which we care about which are present 586 * in the Compressed Filter Tuple. 587 */ 588 if (tp->vlan_shift >= 0 && e->vlan != CPL_L2T_VLAN_NONE) 589 ntuple |= (uint64_t)(F_FT_VLAN_VLD | e->vlan) << tp->vlan_shift; 590 591 if (tp->port_shift >= 0) 592 ntuple |= (uint64_t)e->lport << tp->port_shift; 593 594 if (tp->protocol_shift >= 0) 595 ntuple |= (uint64_t)IPPROTO_TCP << tp->protocol_shift; 596 597 if (tp->vnic_shift >= 0) { 598 uint32_t vf = G_FW_VIID_VIN(viid); 599 uint32_t pf = G_FW_VIID_PFN(viid); 600 uint32_t vld = G_FW_VIID_VIVLD(viid); 601 602 ntuple |= (uint64_t)(V_FT_VNID_ID_VF(vf) | V_FT_VNID_ID_PF(pf) | 603 V_FT_VNID_ID_VLD(vld)) << tp->vnic_shift; 604 } 605 606 if (is_t4(sc)) 607 return (htobe32((uint32_t)ntuple)); 608 else 609 return (htobe64(V_FILTER_TUPLE(ntuple))); 610 } 611 612 void 613 set_tcpddp_ulp_mode(struct toepcb *toep) 614 { 615 616 toep->ulp_mode = ULP_MODE_TCPDDP; 617 toep->ddp_flags = DDP_OK; 618 } 619 620 int 621 negative_advice(int status) 622 { 623 624 return (status == CPL_ERR_RTX_NEG_ADVICE || 625 status == CPL_ERR_PERSIST_NEG_ADVICE || 626 status == CPL_ERR_KEEPALV_NEG_ADVICE); 627 } 628 629 static int 630 alloc_tid_tabs(struct tid_info *t) 631 { 632 size_t size; 633 unsigned int i; 634 635 size = t->ntids * sizeof(*t->tid_tab) + 636 t->natids * sizeof(*t->atid_tab) + 637 t->nstids * sizeof(*t->stid_tab); 638 639 t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT); 640 if (t->tid_tab == NULL) 641 return (ENOMEM); 642 643 mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); 644 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; 645 t->afree = t->atid_tab; 646 t->atids_in_use = 0; 647 for (i = 1; i < t->natids; i++) 648 t->atid_tab[i - 1].next = &t->atid_tab[i]; 649 t->atid_tab[t->natids - 1].next = NULL; 650 651 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 652 t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; 653 t->stids_in_use = 0; 654 TAILQ_INIT(&t->stids); 655 t->nstids_free_head = t->nstids; 656 657 atomic_store_rel_int(&t->tids_in_use, 0); 658 659 return (0); 660 } 661 662 static void 663 free_tid_tabs(struct tid_info *t) 664 { 665 KASSERT(t->tids_in_use == 0, 666 ("%s: %d tids still in use.", __func__, t->tids_in_use)); 667 KASSERT(t->atids_in_use == 0, 668 ("%s: %d atids still in use.", __func__, t->atids_in_use)); 669 KASSERT(t->stids_in_use == 0, 670 ("%s: %d tids still in use.", __func__, t->stids_in_use)); 671 672 free(t->tid_tab, M_CXGBE); 673 t->tid_tab = NULL; 674 675 if (mtx_initialized(&t->atid_lock)) 676 mtx_destroy(&t->atid_lock); 677 if (mtx_initialized(&t->stid_lock)) 678 mtx_destroy(&t->stid_lock); 679 } 680 681 static int 682 add_lip(struct adapter *sc, struct in6_addr *lip) 683 { 684 struct fw_clip_cmd c; 685 686 ASSERT_SYNCHRONIZED_OP(sc); 687 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 688 689 memset(&c, 0, sizeof(c)); 690 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 691 F_FW_CMD_WRITE); 692 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 693 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 694 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 695 696 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 697 } 698 699 static int 700 delete_lip(struct adapter *sc, struct in6_addr *lip) 701 { 702 struct fw_clip_cmd c; 703 704 ASSERT_SYNCHRONIZED_OP(sc); 705 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 706 707 memset(&c, 0, sizeof(c)); 708 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 709 F_FW_CMD_READ); 710 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 711 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 712 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 713 714 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 715 } 716 717 static struct clip_entry * 718 search_lip(struct tom_data *td, struct in6_addr *lip) 719 { 720 struct clip_entry *ce; 721 722 mtx_assert(&td->clip_table_lock, MA_OWNED); 723 724 TAILQ_FOREACH(ce, &td->clip_table, link) { 725 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 726 return (ce); 727 } 728 729 return (NULL); 730 } 731 732 struct clip_entry * 733 hold_lip(struct tom_data *td, struct in6_addr *lip, struct clip_entry *ce) 734 { 735 736 mtx_lock(&td->clip_table_lock); 737 if (ce == NULL) 738 ce = search_lip(td, lip); 739 if (ce != NULL) 740 ce->refcount++; 741 mtx_unlock(&td->clip_table_lock); 742 743 return (ce); 744 } 745 746 void 747 release_lip(struct tom_data *td, struct clip_entry *ce) 748 { 749 750 mtx_lock(&td->clip_table_lock); 751 KASSERT(search_lip(td, &ce->lip) == ce, 752 ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 753 KASSERT(ce->refcount > 0, 754 ("%s: CLIP entry %p has refcount 0", __func__, ce)); 755 --ce->refcount; 756 mtx_unlock(&td->clip_table_lock); 757 } 758 759 static void 760 init_clip_table(struct adapter *sc, struct tom_data *td) 761 { 762 763 ASSERT_SYNCHRONIZED_OP(sc); 764 765 mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 766 TAILQ_INIT(&td->clip_table); 767 td->clip_gen = -1; 768 769 update_clip_table(sc, td); 770 } 771 772 static void 773 update_clip(struct adapter *sc, void *arg __unused) 774 { 775 776 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc")) 777 return; 778 779 if (uld_active(sc, ULD_TOM)) 780 update_clip_table(sc, sc->tom_softc); 781 782 end_synchronized_op(sc, LOCK_HELD); 783 } 784 785 static void 786 t4_clip_task(void *arg, int count) 787 { 788 789 t4_iterate(update_clip, NULL); 790 } 791 792 static void 793 update_clip_table(struct adapter *sc, struct tom_data *td) 794 { 795 struct rm_priotracker in6_ifa_tracker; 796 struct in6_ifaddr *ia; 797 struct in6_addr *lip, tlip; 798 struct clip_head stale; 799 struct clip_entry *ce, *ce_temp; 800 struct vi_info *vi; 801 int rc, gen, i, j; 802 uintptr_t last_vnet; 803 804 ASSERT_SYNCHRONIZED_OP(sc); 805 806 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 807 mtx_lock(&td->clip_table_lock); 808 809 gen = atomic_load_acq_int(&in6_ifaddr_gen); 810 if (gen == td->clip_gen) 811 goto done; 812 813 TAILQ_INIT(&stale); 814 TAILQ_CONCAT(&stale, &td->clip_table, link); 815 816 /* 817 * last_vnet optimizes the common cases where all if_vnet = NULL (no 818 * VIMAGE) or all if_vnet = vnet0. 819 */ 820 last_vnet = (uintptr_t)(-1); 821 for_each_port(sc, i) 822 for_each_vi(sc->port[i], j, vi) { 823 if (last_vnet == (uintptr_t)vi->ifp->if_vnet) 824 continue; 825 826 /* XXX: races with if_vmove */ 827 CURVNET_SET(vi->ifp->if_vnet); 828 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 829 lip = &ia->ia_addr.sin6_addr; 830 831 KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 832 ("%s: mcast address in in6_ifaddr list", __func__)); 833 834 if (IN6_IS_ADDR_LOOPBACK(lip)) 835 continue; 836 if (IN6_IS_SCOPE_EMBED(lip)) { 837 /* Remove the embedded scope */ 838 tlip = *lip; 839 lip = &tlip; 840 in6_clearscope(lip); 841 } 842 /* 843 * XXX: how to weed out the link local address for the 844 * loopback interface? It's fe80::1 usually (always?). 845 */ 846 847 /* 848 * If it's in the main list then we already know it's 849 * not stale. 850 */ 851 TAILQ_FOREACH(ce, &td->clip_table, link) { 852 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 853 goto next; 854 } 855 856 /* 857 * If it's in the stale list we should move it to the 858 * main list. 859 */ 860 TAILQ_FOREACH(ce, &stale, link) { 861 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { 862 TAILQ_REMOVE(&stale, ce, link); 863 TAILQ_INSERT_TAIL(&td->clip_table, ce, 864 link); 865 goto next; 866 } 867 } 868 869 /* A new IP6 address; add it to the CLIP table */ 870 ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 871 memcpy(&ce->lip, lip, sizeof(ce->lip)); 872 ce->refcount = 0; 873 rc = add_lip(sc, lip); 874 if (rc == 0) 875 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 876 else { 877 char ip[INET6_ADDRSTRLEN]; 878 879 inet_ntop(AF_INET6, &ce->lip, &ip[0], 880 sizeof(ip)); 881 log(LOG_ERR, "%s: could not add %s (%d)\n", 882 __func__, ip, rc); 883 free(ce, M_CXGBE); 884 } 885 next: 886 continue; 887 } 888 CURVNET_RESTORE(); 889 last_vnet = (uintptr_t)vi->ifp->if_vnet; 890 } 891 892 /* 893 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are 894 * no longer referenced by the driver. 895 */ 896 TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { 897 if (ce->refcount == 0) { 898 rc = delete_lip(sc, &ce->lip); 899 if (rc == 0) { 900 TAILQ_REMOVE(&stale, ce, link); 901 free(ce, M_CXGBE); 902 } else { 903 char ip[INET6_ADDRSTRLEN]; 904 905 inet_ntop(AF_INET6, &ce->lip, &ip[0], 906 sizeof(ip)); 907 log(LOG_ERR, "%s: could not delete %s (%d)\n", 908 __func__, ip, rc); 909 } 910 } 911 } 912 /* The ones that are still referenced need to stay in the CLIP table */ 913 TAILQ_CONCAT(&td->clip_table, &stale, link); 914 915 td->clip_gen = gen; 916 done: 917 mtx_unlock(&td->clip_table_lock); 918 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 919 } 920 921 static void 922 destroy_clip_table(struct adapter *sc, struct tom_data *td) 923 { 924 struct clip_entry *ce, *ce_temp; 925 926 if (mtx_initialized(&td->clip_table_lock)) { 927 mtx_lock(&td->clip_table_lock); 928 TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 929 KASSERT(ce->refcount == 0, 930 ("%s: CLIP entry %p still in use (%d)", __func__, 931 ce, ce->refcount)); 932 TAILQ_REMOVE(&td->clip_table, ce, link); 933 delete_lip(sc, &ce->lip); 934 free(ce, M_CXGBE); 935 } 936 mtx_unlock(&td->clip_table_lock); 937 mtx_destroy(&td->clip_table_lock); 938 } 939 } 940 941 static void 942 free_tom_data(struct adapter *sc, struct tom_data *td) 943 { 944 945 ASSERT_SYNCHRONIZED_OP(sc); 946 947 KASSERT(TAILQ_EMPTY(&td->toep_list), 948 ("%s: TOE PCB list is not empty.", __func__)); 949 KASSERT(td->lctx_count == 0, 950 ("%s: lctx hash table is not empty.", __func__)); 951 952 t4_free_ppod_region(&td->pr); 953 destroy_clip_table(sc, td); 954 955 if (td->listen_mask != 0) 956 hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 957 958 if (mtx_initialized(&td->unsent_wr_lock)) 959 mtx_destroy(&td->unsent_wr_lock); 960 if (mtx_initialized(&td->lctx_hash_lock)) 961 mtx_destroy(&td->lctx_hash_lock); 962 if (mtx_initialized(&td->toep_list_lock)) 963 mtx_destroy(&td->toep_list_lock); 964 965 free_tid_tabs(&sc->tids); 966 free(td, M_CXGBE); 967 } 968 969 static void 970 reclaim_wr_resources(void *arg, int count) 971 { 972 struct tom_data *td = arg; 973 STAILQ_HEAD(, wrqe) twr_list = STAILQ_HEAD_INITIALIZER(twr_list); 974 struct cpl_act_open_req *cpl; 975 u_int opcode, atid; 976 struct wrqe *wr; 977 struct adapter *sc; 978 979 mtx_lock(&td->unsent_wr_lock); 980 STAILQ_SWAP(&td->unsent_wr_list, &twr_list, wrqe); 981 mtx_unlock(&td->unsent_wr_lock); 982 983 while ((wr = STAILQ_FIRST(&twr_list)) != NULL) { 984 STAILQ_REMOVE_HEAD(&twr_list, link); 985 986 cpl = wrtod(wr); 987 opcode = GET_OPCODE(cpl); 988 989 switch (opcode) { 990 case CPL_ACT_OPEN_REQ: 991 case CPL_ACT_OPEN_REQ6: 992 atid = G_TID_TID(be32toh(OPCODE_TID(cpl))); 993 sc = td_adapter(td); 994 995 CTR2(KTR_CXGBE, "%s: atid %u ", __func__, atid); 996 act_open_failure_cleanup(sc, atid, EHOSTUNREACH); 997 free(wr, M_CXGBE); 998 break; 999 default: 1000 log(LOG_ERR, "%s: leaked work request %p, wr_len %d, " 1001 "opcode %x\n", __func__, wr, wr->wr_len, opcode); 1002 /* WR not freed here; go look at it with a debugger. */ 1003 } 1004 } 1005 } 1006 1007 /* 1008 * Ground control to Major TOM 1009 * Commencing countdown, engines on 1010 */ 1011 static int 1012 t4_tom_activate(struct adapter *sc) 1013 { 1014 struct tom_data *td; 1015 struct toedev *tod; 1016 struct vi_info *vi; 1017 struct sge_ofld_rxq *ofld_rxq; 1018 int i, j, rc, v; 1019 1020 ASSERT_SYNCHRONIZED_OP(sc); 1021 1022 /* per-adapter softc for TOM */ 1023 td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 1024 if (td == NULL) 1025 return (ENOMEM); 1026 1027 /* List of TOE PCBs and associated lock */ 1028 mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 1029 TAILQ_INIT(&td->toep_list); 1030 1031 /* Listen context */ 1032 mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 1033 td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 1034 &td->listen_mask, HASH_NOWAIT); 1035 1036 /* List of WRs for which L2 resolution failed */ 1037 mtx_init(&td->unsent_wr_lock, "Unsent WR list lock", NULL, MTX_DEF); 1038 STAILQ_INIT(&td->unsent_wr_list); 1039 TASK_INIT(&td->reclaim_wr_resources, 0, reclaim_wr_resources, td); 1040 1041 /* TID tables */ 1042 rc = alloc_tid_tabs(&sc->tids); 1043 if (rc != 0) 1044 goto done; 1045 1046 rc = t4_init_ppod_region(&td->pr, &sc->vres.ddp, 1047 t4_read_reg(sc, A_ULP_RX_TDDP_PSZ), "TDDP page pods"); 1048 if (rc != 0) 1049 goto done; 1050 t4_set_reg_field(sc, A_ULP_RX_TDDP_TAGMASK, 1051 V_TDDPTAGMASK(M_TDDPTAGMASK), td->pr.pr_tag_mask); 1052 1053 /* CLIP table for IPv6 offload */ 1054 init_clip_table(sc, td); 1055 1056 /* toedev ops */ 1057 tod = &td->tod; 1058 init_toedev(tod); 1059 tod->tod_softc = sc; 1060 tod->tod_connect = t4_connect; 1061 tod->tod_listen_start = t4_listen_start; 1062 tod->tod_listen_stop = t4_listen_stop; 1063 tod->tod_rcvd = t4_rcvd; 1064 tod->tod_output = t4_tod_output; 1065 tod->tod_send_rst = t4_send_rst; 1066 tod->tod_send_fin = t4_send_fin; 1067 tod->tod_pcb_detach = t4_pcb_detach; 1068 tod->tod_l2_update = t4_l2_update; 1069 tod->tod_syncache_added = t4_syncache_added; 1070 tod->tod_syncache_removed = t4_syncache_removed; 1071 tod->tod_syncache_respond = t4_syncache_respond; 1072 tod->tod_offload_socket = t4_offload_socket; 1073 tod->tod_ctloutput = t4_ctloutput; 1074 1075 for_each_port(sc, i) { 1076 for_each_vi(sc->port[i], v, vi) { 1077 TOEDEV(vi->ifp) = &td->tod; 1078 for_each_ofld_rxq(vi, j, ofld_rxq) { 1079 ofld_rxq->iq.set_tcb_rpl = do_set_tcb_rpl; 1080 ofld_rxq->iq.l2t_write_rpl = do_l2t_write_rpl2; 1081 } 1082 } 1083 } 1084 1085 sc->tom_softc = td; 1086 register_toedev(sc->tom_softc); 1087 1088 done: 1089 if (rc != 0) 1090 free_tom_data(sc, td); 1091 return (rc); 1092 } 1093 1094 static int 1095 t4_tom_deactivate(struct adapter *sc) 1096 { 1097 int rc = 0; 1098 struct tom_data *td = sc->tom_softc; 1099 1100 ASSERT_SYNCHRONIZED_OP(sc); 1101 1102 if (td == NULL) 1103 return (0); /* XXX. KASSERT? */ 1104 1105 if (sc->offload_map != 0) 1106 return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 1107 1108 if (uld_active(sc, ULD_IWARP) || uld_active(sc, ULD_ISCSI)) 1109 return (EBUSY); /* both iWARP and iSCSI rely on the TOE. */ 1110 1111 mtx_lock(&td->toep_list_lock); 1112 if (!TAILQ_EMPTY(&td->toep_list)) 1113 rc = EBUSY; 1114 mtx_unlock(&td->toep_list_lock); 1115 1116 mtx_lock(&td->lctx_hash_lock); 1117 if (td->lctx_count > 0) 1118 rc = EBUSY; 1119 mtx_unlock(&td->lctx_hash_lock); 1120 1121 taskqueue_drain(taskqueue_thread, &td->reclaim_wr_resources); 1122 mtx_lock(&td->unsent_wr_lock); 1123 if (!STAILQ_EMPTY(&td->unsent_wr_list)) 1124 rc = EBUSY; 1125 mtx_unlock(&td->unsent_wr_lock); 1126 1127 if (rc == 0) { 1128 unregister_toedev(sc->tom_softc); 1129 free_tom_data(sc, td); 1130 sc->tom_softc = NULL; 1131 } 1132 1133 return (rc); 1134 } 1135 1136 static void 1137 t4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp) 1138 { 1139 1140 atomic_add_rel_int(&in6_ifaddr_gen, 1); 1141 taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); 1142 } 1143 1144 static int 1145 t4_aio_queue_tom(struct socket *so, struct kaiocb *job) 1146 { 1147 struct tcpcb *tp = so_sototcpcb(so); 1148 struct toepcb *toep = tp->t_toe; 1149 int error; 1150 1151 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 1152 error = t4_aio_queue_ddp(so, job); 1153 if (error != EOPNOTSUPP) 1154 return (error); 1155 } 1156 1157 return (t4_aio_queue_aiotx(so, job)); 1158 } 1159 1160 static int 1161 t4_tom_mod_load(void) 1162 { 1163 int rc; 1164 struct protosw *tcp_protosw, *tcp6_protosw; 1165 1166 /* CPL handlers */ 1167 t4_init_connect_cpl_handlers(); 1168 t4_init_listen_cpl_handlers(); 1169 t4_init_cpl_io_handlers(); 1170 1171 rc = t4_ddp_mod_load(); 1172 if (rc != 0) 1173 return (rc); 1174 1175 tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 1176 if (tcp_protosw == NULL) 1177 return (ENOPROTOOPT); 1178 bcopy(tcp_protosw, &toe_protosw, sizeof(toe_protosw)); 1179 bcopy(tcp_protosw->pr_usrreqs, &toe_usrreqs, sizeof(toe_usrreqs)); 1180 toe_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1181 toe_protosw.pr_usrreqs = &toe_usrreqs; 1182 1183 tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 1184 if (tcp6_protosw == NULL) 1185 return (ENOPROTOOPT); 1186 bcopy(tcp6_protosw, &toe6_protosw, sizeof(toe6_protosw)); 1187 bcopy(tcp6_protosw->pr_usrreqs, &toe6_usrreqs, sizeof(toe6_usrreqs)); 1188 toe6_usrreqs.pru_aio_queue = t4_aio_queue_tom; 1189 toe6_protosw.pr_usrreqs = &toe6_usrreqs; 1190 1191 TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); 1192 ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event, 1193 t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); 1194 1195 rc = t4_register_uld(&tom_uld_info); 1196 if (rc != 0) 1197 t4_tom_mod_unload(); 1198 1199 return (rc); 1200 } 1201 1202 static void 1203 tom_uninit(struct adapter *sc, void *arg __unused) 1204 { 1205 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tomun")) 1206 return; 1207 1208 /* Try to free resources (works only if no port has IFCAP_TOE) */ 1209 if (uld_active(sc, ULD_TOM)) 1210 t4_deactivate_uld(sc, ULD_TOM); 1211 1212 end_synchronized_op(sc, 0); 1213 } 1214 1215 static int 1216 t4_tom_mod_unload(void) 1217 { 1218 t4_iterate(tom_uninit, NULL); 1219 1220 if (t4_unregister_uld(&tom_uld_info) == EBUSY) 1221 return (EBUSY); 1222 1223 if (ifaddr_evhandler) { 1224 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler); 1225 taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); 1226 } 1227 1228 t4_ddp_mod_unload(); 1229 1230 t4_uninit_connect_cpl_handlers(); 1231 t4_uninit_listen_cpl_handlers(); 1232 t4_uninit_cpl_io_handlers(); 1233 1234 return (0); 1235 } 1236 #endif /* TCP_OFFLOAD */ 1237 1238 static int 1239 t4_tom_modevent(module_t mod, int cmd, void *arg) 1240 { 1241 int rc = 0; 1242 1243 #ifdef TCP_OFFLOAD 1244 switch (cmd) { 1245 case MOD_LOAD: 1246 rc = t4_tom_mod_load(); 1247 break; 1248 1249 case MOD_UNLOAD: 1250 rc = t4_tom_mod_unload(); 1251 break; 1252 1253 default: 1254 rc = EINVAL; 1255 } 1256 #else 1257 printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 1258 rc = EOPNOTSUPP; 1259 #endif 1260 return (rc); 1261 } 1262 1263 static moduledata_t t4_tom_moddata= { 1264 "t4_tom", 1265 t4_tom_modevent, 1266 0 1267 }; 1268 1269 MODULE_VERSION(t4_tom, 1); 1270 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 1271 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 1272 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 1273