1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * Written by: Navdeep Parhar <np@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/module.h> 40 #include <sys/protosw.h> 41 #include <sys/domain.h> 42 #include <sys/socket.h> 43 #include <sys/socketvar.h> 44 #include <sys/taskqueue.h> 45 #include <net/if.h> 46 #include <netinet/in.h> 47 #include <netinet/in_pcb.h> 48 #include <netinet/in_var.h> 49 #include <netinet/ip.h> 50 #include <netinet/ip6.h> 51 #include <netinet/tcp_var.h> 52 #include <netinet6/scope6_var.h> 53 #define TCPSTATES 54 #include <netinet/tcp_fsm.h> 55 #include <netinet/toecore.h> 56 57 #ifdef TCP_OFFLOAD 58 #include "common/common.h" 59 #include "common/t4_msg.h" 60 #include "common/t4_regs.h" 61 #include "tom/t4_tom_l2t.h" 62 #include "tom/t4_tom.h" 63 64 static struct protosw ddp_protosw; 65 static struct pr_usrreqs ddp_usrreqs; 66 67 static struct protosw ddp6_protosw; 68 static struct pr_usrreqs ddp6_usrreqs; 69 70 /* Module ops */ 71 static int t4_tom_mod_load(void); 72 static int t4_tom_mod_unload(void); 73 static int t4_tom_modevent(module_t, int, void *); 74 75 /* ULD ops and helpers */ 76 static int t4_tom_activate(struct adapter *); 77 static int t4_tom_deactivate(struct adapter *); 78 79 static struct uld_info tom_uld_info = { 80 .uld_id = ULD_TOM, 81 .activate = t4_tom_activate, 82 .deactivate = t4_tom_deactivate, 83 }; 84 85 static void queue_tid_release(struct adapter *, int); 86 static void release_offload_resources(struct toepcb *); 87 static int alloc_tid_tabs(struct tid_info *); 88 static void free_tid_tabs(struct tid_info *); 89 static int add_lip(struct adapter *, struct in6_addr *); 90 static int delete_lip(struct adapter *, struct in6_addr *); 91 static struct clip_entry *search_lip(struct tom_data *, struct in6_addr *); 92 static void init_clip_table(struct adapter *, struct tom_data *); 93 static void update_clip(struct adapter *, void *); 94 static void t4_clip_task(void *, int); 95 static void update_clip_table(struct adapter *, struct tom_data *); 96 static void destroy_clip_table(struct adapter *, struct tom_data *); 97 static void free_tom_data(struct adapter *, struct tom_data *); 98 99 static int in6_ifaddr_gen; 100 static eventhandler_tag ifaddr_evhandler; 101 static struct timeout_task clip_task; 102 103 struct toepcb * 104 alloc_toepcb(struct port_info *pi, int txqid, int rxqid, int flags) 105 { 106 struct adapter *sc = pi->adapter; 107 struct toepcb *toep; 108 int tx_credits, txsd_total, len; 109 110 /* 111 * The firmware counts tx work request credits in units of 16 bytes 112 * each. Reserve room for an ABORT_REQ so the driver never has to worry 113 * about tx credits if it wants to abort a connection. 114 */ 115 tx_credits = sc->params.ofldq_wr_cred; 116 tx_credits -= howmany(sizeof(struct cpl_abort_req), 16); 117 118 /* 119 * Shortest possible tx work request is a fw_ofld_tx_data_wr + 1 byte 120 * immediate payload, and firmware counts tx work request credits in 121 * units of 16 byte. Calculate the maximum work requests possible. 122 */ 123 txsd_total = tx_credits / 124 howmany((sizeof(struct fw_ofld_tx_data_wr) + 1), 16); 125 126 if (txqid < 0) 127 txqid = (arc4random() % pi->nofldtxq) + pi->first_ofld_txq; 128 KASSERT(txqid >= pi->first_ofld_txq && 129 txqid < pi->first_ofld_txq + pi->nofldtxq, 130 ("%s: txqid %d for port %p (first %d, n %d)", __func__, txqid, pi, 131 pi->first_ofld_txq, pi->nofldtxq)); 132 133 if (rxqid < 0) 134 rxqid = (arc4random() % pi->nofldrxq) + pi->first_ofld_rxq; 135 KASSERT(rxqid >= pi->first_ofld_rxq && 136 rxqid < pi->first_ofld_rxq + pi->nofldrxq, 137 ("%s: rxqid %d for port %p (first %d, n %d)", __func__, rxqid, pi, 138 pi->first_ofld_rxq, pi->nofldrxq)); 139 140 len = offsetof(struct toepcb, txsd) + 141 txsd_total * sizeof(struct ofld_tx_sdesc); 142 143 toep = malloc(len, M_CXGBE, M_ZERO | flags); 144 if (toep == NULL) 145 return (NULL); 146 147 toep->td = sc->tom_softc; 148 toep->port = pi; 149 toep->tx_credits = tx_credits; 150 toep->ofld_txq = &sc->sge.ofld_txq[txqid]; 151 toep->ofld_rxq = &sc->sge.ofld_rxq[rxqid]; 152 toep->ctrlq = &sc->sge.ctrlq[pi->port_id]; 153 toep->txsd_total = txsd_total; 154 toep->txsd_avail = txsd_total; 155 toep->txsd_pidx = 0; 156 toep->txsd_cidx = 0; 157 158 return (toep); 159 } 160 161 void 162 free_toepcb(struct toepcb *toep) 163 { 164 165 KASSERT(!(toep->flags & TPF_ATTACHED), 166 ("%s: attached to an inpcb", __func__)); 167 KASSERT(!(toep->flags & TPF_CPL_PENDING), 168 ("%s: CPL pending", __func__)); 169 170 free(toep, M_CXGBE); 171 } 172 173 /* 174 * Set up the socket for TCP offload. 175 */ 176 void 177 offload_socket(struct socket *so, struct toepcb *toep) 178 { 179 struct tom_data *td = toep->td; 180 struct inpcb *inp = sotoinpcb(so); 181 struct tcpcb *tp = intotcpcb(inp); 182 struct sockbuf *sb; 183 184 INP_WLOCK_ASSERT(inp); 185 186 /* Update socket */ 187 sb = &so->so_snd; 188 SOCKBUF_LOCK(sb); 189 sb->sb_flags |= SB_NOCOALESCE; 190 SOCKBUF_UNLOCK(sb); 191 sb = &so->so_rcv; 192 SOCKBUF_LOCK(sb); 193 sb->sb_flags |= SB_NOCOALESCE; 194 if (toep->ulp_mode == ULP_MODE_TCPDDP) { 195 if (inp->inp_vflag & INP_IPV6) 196 so->so_proto = &ddp6_protosw; 197 else 198 so->so_proto = &ddp_protosw; 199 } 200 SOCKBUF_UNLOCK(sb); 201 202 /* Update TCP PCB */ 203 tp->tod = &td->tod; 204 tp->t_toe = toep; 205 tp->t_flags |= TF_TOE; 206 207 /* Install an extra hold on inp */ 208 toep->inp = inp; 209 toep->flags |= TPF_ATTACHED; 210 in_pcbref(inp); 211 212 /* Add the TOE PCB to the active list */ 213 mtx_lock(&td->toep_list_lock); 214 TAILQ_INSERT_HEAD(&td->toep_list, toep, link); 215 mtx_unlock(&td->toep_list_lock); 216 } 217 218 /* This is _not_ the normal way to "unoffload" a socket. */ 219 void 220 undo_offload_socket(struct socket *so) 221 { 222 struct inpcb *inp = sotoinpcb(so); 223 struct tcpcb *tp = intotcpcb(inp); 224 struct toepcb *toep = tp->t_toe; 225 struct tom_data *td = toep->td; 226 struct sockbuf *sb; 227 228 INP_WLOCK_ASSERT(inp); 229 230 sb = &so->so_snd; 231 SOCKBUF_LOCK(sb); 232 sb->sb_flags &= ~SB_NOCOALESCE; 233 SOCKBUF_UNLOCK(sb); 234 sb = &so->so_rcv; 235 SOCKBUF_LOCK(sb); 236 sb->sb_flags &= ~SB_NOCOALESCE; 237 SOCKBUF_UNLOCK(sb); 238 239 tp->tod = NULL; 240 tp->t_toe = NULL; 241 tp->t_flags &= ~TF_TOE; 242 243 toep->inp = NULL; 244 toep->flags &= ~TPF_ATTACHED; 245 if (in_pcbrele_wlocked(inp)) 246 panic("%s: inp freed.", __func__); 247 248 mtx_lock(&td->toep_list_lock); 249 TAILQ_REMOVE(&td->toep_list, toep, link); 250 mtx_unlock(&td->toep_list_lock); 251 } 252 253 static void 254 release_offload_resources(struct toepcb *toep) 255 { 256 struct tom_data *td = toep->td; 257 struct adapter *sc = td_adapter(td); 258 int tid = toep->tid; 259 260 KASSERT(!(toep->flags & TPF_CPL_PENDING), 261 ("%s: %p has CPL pending.", __func__, toep)); 262 KASSERT(!(toep->flags & TPF_ATTACHED), 263 ("%s: %p is still attached.", __func__, toep)); 264 265 CTR5(KTR_CXGBE, "%s: toep %p (tid %d, l2te %p, ce %p)", 266 __func__, toep, tid, toep->l2te, toep->ce); 267 268 if (toep->ulp_mode == ULP_MODE_TCPDDP) 269 release_ddp_resources(toep); 270 271 if (toep->l2te) 272 t4_l2t_release(toep->l2te); 273 274 if (tid >= 0) { 275 remove_tid(sc, tid); 276 release_tid(sc, tid, toep->ctrlq); 277 } 278 279 if (toep->ce) 280 release_lip(td, toep->ce); 281 282 mtx_lock(&td->toep_list_lock); 283 TAILQ_REMOVE(&td->toep_list, toep, link); 284 mtx_unlock(&td->toep_list_lock); 285 286 free_toepcb(toep); 287 } 288 289 /* 290 * The kernel is done with the TCP PCB and this is our opportunity to unhook the 291 * toepcb hanging off of it. If the TOE driver is also done with the toepcb (no 292 * pending CPL) then it is time to release all resources tied to the toepcb. 293 * 294 * Also gets called when an offloaded active open fails and the TOM wants the 295 * kernel to take the TCP PCB back. 296 */ 297 static void 298 t4_pcb_detach(struct toedev *tod __unused, struct tcpcb *tp) 299 { 300 #if defined(KTR) || defined(INVARIANTS) 301 struct inpcb *inp = tp->t_inpcb; 302 #endif 303 struct toepcb *toep = tp->t_toe; 304 305 INP_WLOCK_ASSERT(inp); 306 307 KASSERT(toep != NULL, ("%s: toep is NULL", __func__)); 308 KASSERT(toep->flags & TPF_ATTACHED, 309 ("%s: not attached", __func__)); 310 311 #ifdef KTR 312 if (tp->t_state == TCPS_SYN_SENT) { 313 CTR6(KTR_CXGBE, "%s: atid %d, toep %p (0x%x), inp %p (0x%x)", 314 __func__, toep->tid, toep, toep->flags, inp, 315 inp->inp_flags); 316 } else { 317 CTR6(KTR_CXGBE, 318 "t4_pcb_detach: tid %d (%s), toep %p (0x%x), inp %p (0x%x)", 319 toep->tid, tcpstates[tp->t_state], toep, toep->flags, inp, 320 inp->inp_flags); 321 } 322 #endif 323 324 tp->t_toe = NULL; 325 tp->t_flags &= ~TF_TOE; 326 toep->flags &= ~TPF_ATTACHED; 327 328 if (!(toep->flags & TPF_CPL_PENDING)) 329 release_offload_resources(toep); 330 } 331 332 /* 333 * The TOE driver will not receive any more CPLs for the tid associated with the 334 * toepcb; release the hold on the inpcb. 335 */ 336 void 337 final_cpl_received(struct toepcb *toep) 338 { 339 struct inpcb *inp = toep->inp; 340 341 KASSERT(inp != NULL, ("%s: inp is NULL", __func__)); 342 INP_WLOCK_ASSERT(inp); 343 KASSERT(toep->flags & TPF_CPL_PENDING, 344 ("%s: CPL not pending already?", __func__)); 345 346 CTR6(KTR_CXGBE, "%s: tid %d, toep %p (0x%x), inp %p (0x%x)", 347 __func__, toep->tid, toep, toep->flags, inp, inp->inp_flags); 348 349 toep->inp = NULL; 350 toep->flags &= ~TPF_CPL_PENDING; 351 352 if (!(toep->flags & TPF_ATTACHED)) 353 release_offload_resources(toep); 354 355 if (!in_pcbrele_wlocked(inp)) 356 INP_WUNLOCK(inp); 357 } 358 359 void 360 insert_tid(struct adapter *sc, int tid, void *ctx) 361 { 362 struct tid_info *t = &sc->tids; 363 364 t->tid_tab[tid] = ctx; 365 atomic_add_int(&t->tids_in_use, 1); 366 } 367 368 void * 369 lookup_tid(struct adapter *sc, int tid) 370 { 371 struct tid_info *t = &sc->tids; 372 373 return (t->tid_tab[tid]); 374 } 375 376 void 377 update_tid(struct adapter *sc, int tid, void *ctx) 378 { 379 struct tid_info *t = &sc->tids; 380 381 t->tid_tab[tid] = ctx; 382 } 383 384 void 385 remove_tid(struct adapter *sc, int tid) 386 { 387 struct tid_info *t = &sc->tids; 388 389 t->tid_tab[tid] = NULL; 390 atomic_subtract_int(&t->tids_in_use, 1); 391 } 392 393 void 394 release_tid(struct adapter *sc, int tid, struct sge_wrq *ctrlq) 395 { 396 struct wrqe *wr; 397 struct cpl_tid_release *req; 398 399 wr = alloc_wrqe(sizeof(*req), ctrlq); 400 if (wr == NULL) { 401 queue_tid_release(sc, tid); /* defer */ 402 return; 403 } 404 req = wrtod(wr); 405 406 INIT_TP_WR_MIT_CPL(req, CPL_TID_RELEASE, tid); 407 408 t4_wrq_tx(sc, wr); 409 } 410 411 static void 412 queue_tid_release(struct adapter *sc, int tid) 413 { 414 415 CXGBE_UNIMPLEMENTED("deferred tid release"); 416 } 417 418 /* 419 * What mtu_idx to use, given a 4-tuple and/or an MSS cap 420 */ 421 int 422 find_best_mtu_idx(struct adapter *sc, struct in_conninfo *inc, int pmss) 423 { 424 unsigned short *mtus = &sc->params.mtus[0]; 425 int i, mss, n; 426 427 KASSERT(inc != NULL || pmss > 0, 428 ("%s: at least one of inc/pmss must be specified", __func__)); 429 430 mss = inc ? tcp_mssopt(inc) : pmss; 431 if (pmss > 0 && mss > pmss) 432 mss = pmss; 433 434 if (inc->inc_flags & INC_ISIPV6) 435 n = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); 436 else 437 n = sizeof(struct ip) + sizeof(struct tcphdr); 438 439 for (i = 0; i < NMTUS - 1 && mtus[i + 1] <= mss + n; i++) 440 continue; 441 442 return (i); 443 } 444 445 /* 446 * Determine the receive window size for a socket. 447 */ 448 u_long 449 select_rcv_wnd(struct socket *so) 450 { 451 unsigned long wnd; 452 453 SOCKBUF_LOCK_ASSERT(&so->so_rcv); 454 455 wnd = sbspace(&so->so_rcv); 456 if (wnd < MIN_RCV_WND) 457 wnd = MIN_RCV_WND; 458 459 return min(wnd, MAX_RCV_WND); 460 } 461 462 int 463 select_rcv_wscale(void) 464 { 465 int wscale = 0; 466 unsigned long space = sb_max; 467 468 if (space > MAX_RCV_WND) 469 space = MAX_RCV_WND; 470 471 while (wscale < TCP_MAX_WINSHIFT && (TCP_MAXWIN << wscale) < space) 472 wscale++; 473 474 return (wscale); 475 } 476 477 extern int always_keepalive; 478 #define VIID_SMACIDX(v) (((unsigned int)(v) & 0x7f) << 1) 479 480 /* 481 * socket so could be a listening socket too. 482 */ 483 uint64_t 484 calc_opt0(struct socket *so, struct port_info *pi, struct l2t_entry *e, 485 int mtu_idx, int rscale, int rx_credits, int ulp_mode) 486 { 487 uint64_t opt0; 488 489 KASSERT(rx_credits <= M_RCV_BUFSIZ, 490 ("%s: rcv_bufsiz too high", __func__)); 491 492 opt0 = F_TCAM_BYPASS | V_WND_SCALE(rscale) | V_MSS_IDX(mtu_idx) | 493 V_ULP_MODE(ulp_mode) | V_RCV_BUFSIZ(rx_credits); 494 495 if (so != NULL) { 496 struct inpcb *inp = sotoinpcb(so); 497 struct tcpcb *tp = intotcpcb(inp); 498 int keepalive = always_keepalive || 499 so_options_get(so) & SO_KEEPALIVE; 500 501 opt0 |= V_NAGLE((tp->t_flags & TF_NODELAY) == 0); 502 opt0 |= V_KEEP_ALIVE(keepalive != 0); 503 } 504 505 if (e != NULL) 506 opt0 |= V_L2T_IDX(e->idx); 507 508 if (pi != NULL) { 509 opt0 |= V_SMAC_SEL(VIID_SMACIDX(pi->viid)); 510 opt0 |= V_TX_CHAN(pi->tx_chan); 511 } 512 513 return htobe64(opt0); 514 } 515 516 #define FILTER_SEL_WIDTH_P_FC (3 + 1) 517 #define FILTER_SEL_WIDTH_VIN_P_FC (6 + 7 + FILTER_SEL_WIDTH_P_FC) 518 #define FILTER_SEL_WIDTH_TAG_P_FC (3 + FILTER_SEL_WIDTH_VIN_P_FC) 519 #define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC) 520 #define VLAN_NONE 0xfff 521 #define FILTER_SEL_VLAN_NONE 0xffff 522 523 uint64_t 524 select_ntuple(struct port_info *pi, struct l2t_entry *e, uint32_t filter_mode) 525 { 526 uint16_t viid = pi->viid; 527 uint32_t ntuple = 0; 528 529 if (filter_mode == HW_TPL_FR_MT_PR_IV_P_FC) { 530 if (e->vlan == VLAN_NONE) 531 ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC; 532 else { 533 ntuple |= e->vlan << FILTER_SEL_WIDTH_P_FC; 534 ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC; 535 } 536 ntuple |= e->lport << S_PORT; 537 ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC; 538 } else if (filter_mode == HW_TPL_FR_MT_PR_OV_P_FC) { 539 ntuple |= G_FW_VIID_VIN(viid) << FILTER_SEL_WIDTH_P_FC; 540 ntuple |= G_FW_VIID_PFN(viid) << FILTER_SEL_WIDTH_VIN_P_FC; 541 ntuple |= G_FW_VIID_VIVLD(viid) << FILTER_SEL_WIDTH_TAG_P_FC; 542 ntuple |= e->lport << S_PORT; 543 ntuple |= IPPROTO_TCP << FILTER_SEL_WIDTH_VLD_TAG_P_FC; 544 } 545 546 if (is_t4(pi->adapter)) 547 return (htobe32(ntuple)); 548 else 549 return (htobe64(V_FILTER_TUPLE(ntuple))); 550 } 551 552 void 553 set_tcpddp_ulp_mode(struct toepcb *toep) 554 { 555 556 toep->ulp_mode = ULP_MODE_TCPDDP; 557 toep->ddp_flags = DDP_OK; 558 toep->ddp_score = DDP_LOW_SCORE; 559 } 560 561 int 562 negative_advice(int status) 563 { 564 565 return (status == CPL_ERR_RTX_NEG_ADVICE || 566 status == CPL_ERR_PERSIST_NEG_ADVICE || 567 status == CPL_ERR_KEEPALV_NEG_ADVICE); 568 } 569 570 static int 571 alloc_tid_tabs(struct tid_info *t) 572 { 573 size_t size; 574 unsigned int i; 575 576 size = t->ntids * sizeof(*t->tid_tab) + 577 t->natids * sizeof(*t->atid_tab) + 578 t->nstids * sizeof(*t->stid_tab); 579 580 t->tid_tab = malloc(size, M_CXGBE, M_ZERO | M_NOWAIT); 581 if (t->tid_tab == NULL) 582 return (ENOMEM); 583 584 mtx_init(&t->atid_lock, "atid lock", NULL, MTX_DEF); 585 t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids]; 586 t->afree = t->atid_tab; 587 t->atids_in_use = 0; 588 for (i = 1; i < t->natids; i++) 589 t->atid_tab[i - 1].next = &t->atid_tab[i]; 590 t->atid_tab[t->natids - 1].next = NULL; 591 592 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF); 593 t->stid_tab = (struct listen_ctx **)&t->atid_tab[t->natids]; 594 t->stids_in_use = 0; 595 TAILQ_INIT(&t->stids); 596 t->nstids_free_head = t->nstids; 597 598 atomic_store_rel_int(&t->tids_in_use, 0); 599 600 return (0); 601 } 602 603 static void 604 free_tid_tabs(struct tid_info *t) 605 { 606 KASSERT(t->tids_in_use == 0, 607 ("%s: %d tids still in use.", __func__, t->tids_in_use)); 608 KASSERT(t->atids_in_use == 0, 609 ("%s: %d atids still in use.", __func__, t->atids_in_use)); 610 KASSERT(t->stids_in_use == 0, 611 ("%s: %d tids still in use.", __func__, t->stids_in_use)); 612 613 free(t->tid_tab, M_CXGBE); 614 t->tid_tab = NULL; 615 616 if (mtx_initialized(&t->atid_lock)) 617 mtx_destroy(&t->atid_lock); 618 if (mtx_initialized(&t->stid_lock)) 619 mtx_destroy(&t->stid_lock); 620 } 621 622 static int 623 add_lip(struct adapter *sc, struct in6_addr *lip) 624 { 625 struct fw_clip_cmd c; 626 627 ASSERT_SYNCHRONIZED_OP(sc); 628 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 629 630 memset(&c, 0, sizeof(c)); 631 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 632 F_FW_CMD_WRITE); 633 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_ALLOC | FW_LEN16(c)); 634 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 635 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 636 637 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 638 } 639 640 static int 641 delete_lip(struct adapter *sc, struct in6_addr *lip) 642 { 643 struct fw_clip_cmd c; 644 645 ASSERT_SYNCHRONIZED_OP(sc); 646 /* mtx_assert(&td->clip_table_lock, MA_OWNED); */ 647 648 memset(&c, 0, sizeof(c)); 649 c.op_to_write = htonl(V_FW_CMD_OP(FW_CLIP_CMD) | F_FW_CMD_REQUEST | 650 F_FW_CMD_READ); 651 c.alloc_to_len16 = htonl(F_FW_CLIP_CMD_FREE | FW_LEN16(c)); 652 c.ip_hi = *(uint64_t *)&lip->s6_addr[0]; 653 c.ip_lo = *(uint64_t *)&lip->s6_addr[8]; 654 655 return (-t4_wr_mbox_ns(sc, sc->mbox, &c, sizeof(c), &c)); 656 } 657 658 static struct clip_entry * 659 search_lip(struct tom_data *td, struct in6_addr *lip) 660 { 661 struct clip_entry *ce; 662 663 mtx_assert(&td->clip_table_lock, MA_OWNED); 664 665 TAILQ_FOREACH(ce, &td->clip_table, link) { 666 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 667 return (ce); 668 } 669 670 return (NULL); 671 } 672 673 struct clip_entry * 674 hold_lip(struct tom_data *td, struct in6_addr *lip) 675 { 676 struct clip_entry *ce; 677 678 mtx_lock(&td->clip_table_lock); 679 ce = search_lip(td, lip); 680 if (ce != NULL) 681 ce->refcount++; 682 mtx_unlock(&td->clip_table_lock); 683 684 return (ce); 685 } 686 687 void 688 release_lip(struct tom_data *td, struct clip_entry *ce) 689 { 690 691 mtx_lock(&td->clip_table_lock); 692 KASSERT(search_lip(td, &ce->lip) == ce, 693 ("%s: CLIP entry %p p not in CLIP table.", __func__, ce)); 694 KASSERT(ce->refcount > 0, 695 ("%s: CLIP entry %p has refcount 0", __func__, ce)); 696 --ce->refcount; 697 mtx_unlock(&td->clip_table_lock); 698 } 699 700 static void 701 init_clip_table(struct adapter *sc, struct tom_data *td) 702 { 703 704 ASSERT_SYNCHRONIZED_OP(sc); 705 706 mtx_init(&td->clip_table_lock, "CLIP table lock", NULL, MTX_DEF); 707 TAILQ_INIT(&td->clip_table); 708 td->clip_gen = -1; 709 710 update_clip_table(sc, td); 711 } 712 713 static void 714 update_clip(struct adapter *sc, void *arg __unused) 715 { 716 717 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomuc")) 718 return; 719 720 if (sc->flags & TOM_INIT_DONE) 721 update_clip_table(sc, sc->tom_softc); 722 723 end_synchronized_op(sc, LOCK_HELD); 724 } 725 726 static void 727 t4_clip_task(void *arg, int count) 728 { 729 730 t4_iterate(update_clip, NULL); 731 } 732 733 static void 734 update_clip_table(struct adapter *sc, struct tom_data *td) 735 { 736 struct in6_ifaddr *ia; 737 struct in6_addr *lip, tlip; 738 struct clip_head stale; 739 struct clip_entry *ce, *ce_temp; 740 int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen); 741 742 ASSERT_SYNCHRONIZED_OP(sc); 743 744 IN6_IFADDR_RLOCK(); 745 mtx_lock(&td->clip_table_lock); 746 747 if (gen == td->clip_gen) 748 goto done; 749 750 TAILQ_INIT(&stale); 751 TAILQ_CONCAT(&stale, &td->clip_table, link); 752 753 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 754 lip = &ia->ia_addr.sin6_addr; 755 756 KASSERT(!IN6_IS_ADDR_MULTICAST(lip), 757 ("%s: mcast address in in6_ifaddr list", __func__)); 758 759 if (IN6_IS_ADDR_LOOPBACK(lip)) 760 continue; 761 if (IN6_IS_SCOPE_EMBED(lip)) { 762 /* Remove the embedded scope */ 763 tlip = *lip; 764 lip = &tlip; 765 in6_clearscope(lip); 766 } 767 /* 768 * XXX: how to weed out the link local address for the loopback 769 * interface? It's fe80::1 usually (always?). 770 */ 771 772 /* 773 * If it's in the main list then we already know it's not stale. 774 */ 775 TAILQ_FOREACH(ce, &td->clip_table, link) { 776 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) 777 goto next; 778 } 779 780 /* 781 * If it's in the stale list we should move it to the main list. 782 */ 783 TAILQ_FOREACH(ce, &stale, link) { 784 if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { 785 TAILQ_REMOVE(&stale, ce, link); 786 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 787 goto next; 788 } 789 } 790 791 /* A new IP6 address; add it to the CLIP table */ 792 ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); 793 memcpy(&ce->lip, lip, sizeof(ce->lip)); 794 ce->refcount = 0; 795 rc = add_lip(sc, lip); 796 if (rc == 0) 797 TAILQ_INSERT_TAIL(&td->clip_table, ce, link); 798 else { 799 char ip[INET6_ADDRSTRLEN]; 800 801 inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip)); 802 log(LOG_ERR, "%s: could not add %s (%d)\n", 803 __func__, ip, rc); 804 free(ce, M_CXGBE); 805 } 806 next: 807 continue; 808 } 809 810 /* 811 * Remove stale addresses (those no longer in V_in6_ifaddrhead) that are 812 * no longer referenced by the driver. 813 */ 814 TAILQ_FOREACH_SAFE(ce, &stale, link, ce_temp) { 815 if (ce->refcount == 0) { 816 rc = delete_lip(sc, &ce->lip); 817 if (rc == 0) { 818 TAILQ_REMOVE(&stale, ce, link); 819 free(ce, M_CXGBE); 820 } else { 821 char ip[INET6_ADDRSTRLEN]; 822 823 inet_ntop(AF_INET6, &ce->lip, &ip[0], 824 sizeof(ip)); 825 log(LOG_ERR, "%s: could not delete %s (%d)\n", 826 __func__, ip, rc); 827 } 828 } 829 } 830 /* The ones that are still referenced need to stay in the CLIP table */ 831 TAILQ_CONCAT(&td->clip_table, &stale, link); 832 833 td->clip_gen = gen; 834 done: 835 mtx_unlock(&td->clip_table_lock); 836 IN6_IFADDR_RUNLOCK(); 837 } 838 839 static void 840 destroy_clip_table(struct adapter *sc, struct tom_data *td) 841 { 842 struct clip_entry *ce, *ce_temp; 843 844 if (mtx_initialized(&td->clip_table_lock)) { 845 mtx_lock(&td->clip_table_lock); 846 TAILQ_FOREACH_SAFE(ce, &td->clip_table, link, ce_temp) { 847 KASSERT(ce->refcount == 0, 848 ("%s: CLIP entry %p still in use (%d)", __func__, 849 ce, ce->refcount)); 850 TAILQ_REMOVE(&td->clip_table, ce, link); 851 delete_lip(sc, &ce->lip); 852 free(ce, M_CXGBE); 853 } 854 mtx_unlock(&td->clip_table_lock); 855 mtx_destroy(&td->clip_table_lock); 856 } 857 } 858 859 static void 860 free_tom_data(struct adapter *sc, struct tom_data *td) 861 { 862 863 ASSERT_SYNCHRONIZED_OP(sc); 864 865 KASSERT(TAILQ_EMPTY(&td->toep_list), 866 ("%s: TOE PCB list is not empty.", __func__)); 867 KASSERT(td->lctx_count == 0, 868 ("%s: lctx hash table is not empty.", __func__)); 869 870 t4_uninit_l2t_cpl_handlers(sc); 871 t4_uninit_cpl_io_handlers(sc); 872 t4_uninit_ddp(sc, td); 873 destroy_clip_table(sc, td); 874 875 if (td->listen_mask != 0) 876 hashdestroy(td->listen_hash, M_CXGBE, td->listen_mask); 877 878 if (mtx_initialized(&td->lctx_hash_lock)) 879 mtx_destroy(&td->lctx_hash_lock); 880 if (mtx_initialized(&td->toep_list_lock)) 881 mtx_destroy(&td->toep_list_lock); 882 883 free_tid_tabs(&sc->tids); 884 free(td, M_CXGBE); 885 } 886 887 /* 888 * Ground control to Major TOM 889 * Commencing countdown, engines on 890 */ 891 static int 892 t4_tom_activate(struct adapter *sc) 893 { 894 struct tom_data *td; 895 struct toedev *tod; 896 int i, rc; 897 898 ASSERT_SYNCHRONIZED_OP(sc); 899 900 /* per-adapter softc for TOM */ 901 td = malloc(sizeof(*td), M_CXGBE, M_ZERO | M_NOWAIT); 902 if (td == NULL) 903 return (ENOMEM); 904 905 /* List of TOE PCBs and associated lock */ 906 mtx_init(&td->toep_list_lock, "PCB list lock", NULL, MTX_DEF); 907 TAILQ_INIT(&td->toep_list); 908 909 /* Listen context */ 910 mtx_init(&td->lctx_hash_lock, "lctx hash lock", NULL, MTX_DEF); 911 td->listen_hash = hashinit_flags(LISTEN_HASH_SIZE, M_CXGBE, 912 &td->listen_mask, HASH_NOWAIT); 913 914 /* TID tables */ 915 rc = alloc_tid_tabs(&sc->tids); 916 if (rc != 0) 917 goto done; 918 919 /* DDP page pods and CPL handlers */ 920 t4_init_ddp(sc, td); 921 922 /* CLIP table for IPv6 offload */ 923 init_clip_table(sc, td); 924 925 /* CPL handlers */ 926 t4_init_connect_cpl_handlers(sc); 927 t4_init_l2t_cpl_handlers(sc); 928 t4_init_listen_cpl_handlers(sc); 929 t4_init_cpl_io_handlers(sc); 930 931 /* toedev ops */ 932 tod = &td->tod; 933 init_toedev(tod); 934 tod->tod_softc = sc; 935 tod->tod_connect = t4_connect; 936 tod->tod_listen_start = t4_listen_start; 937 tod->tod_listen_stop = t4_listen_stop; 938 tod->tod_rcvd = t4_rcvd; 939 tod->tod_output = t4_tod_output; 940 tod->tod_send_rst = t4_send_rst; 941 tod->tod_send_fin = t4_send_fin; 942 tod->tod_pcb_detach = t4_pcb_detach; 943 tod->tod_l2_update = t4_l2_update; 944 tod->tod_syncache_added = t4_syncache_added; 945 tod->tod_syncache_removed = t4_syncache_removed; 946 tod->tod_syncache_respond = t4_syncache_respond; 947 tod->tod_offload_socket = t4_offload_socket; 948 949 for_each_port(sc, i) 950 TOEDEV(sc->port[i]->ifp) = &td->tod; 951 952 sc->tom_softc = td; 953 sc->flags |= TOM_INIT_DONE; 954 register_toedev(sc->tom_softc); 955 956 done: 957 if (rc != 0) 958 free_tom_data(sc, td); 959 return (rc); 960 } 961 962 static int 963 t4_tom_deactivate(struct adapter *sc) 964 { 965 int rc = 0; 966 struct tom_data *td = sc->tom_softc; 967 968 ASSERT_SYNCHRONIZED_OP(sc); 969 970 if (td == NULL) 971 return (0); /* XXX. KASSERT? */ 972 973 if (sc->offload_map != 0) 974 return (EBUSY); /* at least one port has IFCAP_TOE enabled */ 975 976 mtx_lock(&td->toep_list_lock); 977 if (!TAILQ_EMPTY(&td->toep_list)) 978 rc = EBUSY; 979 mtx_unlock(&td->toep_list_lock); 980 981 mtx_lock(&td->lctx_hash_lock); 982 if (td->lctx_count > 0) 983 rc = EBUSY; 984 mtx_unlock(&td->lctx_hash_lock); 985 986 if (rc == 0) { 987 unregister_toedev(sc->tom_softc); 988 free_tom_data(sc, td); 989 sc->tom_softc = NULL; 990 sc->flags &= ~TOM_INIT_DONE; 991 } 992 993 return (rc); 994 } 995 996 static void 997 t4_tom_ifaddr_event(void *arg __unused, struct ifnet *ifp) 998 { 999 1000 atomic_add_rel_int(&in6_ifaddr_gen, 1); 1001 taskqueue_enqueue_timeout(taskqueue_thread, &clip_task, -hz / 4); 1002 } 1003 1004 static int 1005 t4_tom_mod_load(void) 1006 { 1007 int rc; 1008 struct protosw *tcp_protosw, *tcp6_protosw; 1009 1010 tcp_protosw = pffindproto(PF_INET, IPPROTO_TCP, SOCK_STREAM); 1011 if (tcp_protosw == NULL) 1012 return (ENOPROTOOPT); 1013 bcopy(tcp_protosw, &ddp_protosw, sizeof(ddp_protosw)); 1014 bcopy(tcp_protosw->pr_usrreqs, &ddp_usrreqs, sizeof(ddp_usrreqs)); 1015 ddp_usrreqs.pru_soreceive = t4_soreceive_ddp; 1016 ddp_protosw.pr_usrreqs = &ddp_usrreqs; 1017 1018 tcp6_protosw = pffindproto(PF_INET6, IPPROTO_TCP, SOCK_STREAM); 1019 if (tcp6_protosw == NULL) 1020 return (ENOPROTOOPT); 1021 bcopy(tcp6_protosw, &ddp6_protosw, sizeof(ddp6_protosw)); 1022 bcopy(tcp6_protosw->pr_usrreqs, &ddp6_usrreqs, sizeof(ddp6_usrreqs)); 1023 ddp6_usrreqs.pru_soreceive = t4_soreceive_ddp; 1024 ddp6_protosw.pr_usrreqs = &ddp6_usrreqs; 1025 1026 TIMEOUT_TASK_INIT(taskqueue_thread, &clip_task, 0, t4_clip_task, NULL); 1027 ifaddr_evhandler = EVENTHANDLER_REGISTER(ifaddr_event, 1028 t4_tom_ifaddr_event, NULL, EVENTHANDLER_PRI_ANY); 1029 1030 rc = t4_register_uld(&tom_uld_info); 1031 if (rc != 0) 1032 t4_tom_mod_unload(); 1033 1034 return (rc); 1035 } 1036 1037 static void 1038 tom_uninit(struct adapter *sc, void *arg __unused) 1039 { 1040 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4tomun")) 1041 return; 1042 1043 /* Try to free resources (works only if no port has IFCAP_TOE) */ 1044 if (sc->flags & TOM_INIT_DONE) 1045 t4_deactivate_uld(sc, ULD_TOM); 1046 1047 end_synchronized_op(sc, LOCK_HELD); 1048 } 1049 1050 static int 1051 t4_tom_mod_unload(void) 1052 { 1053 t4_iterate(tom_uninit, NULL); 1054 1055 if (t4_unregister_uld(&tom_uld_info) == EBUSY) 1056 return (EBUSY); 1057 1058 if (ifaddr_evhandler) { 1059 EVENTHANDLER_DEREGISTER(ifaddr_event, ifaddr_evhandler); 1060 taskqueue_cancel_timeout(taskqueue_thread, &clip_task, NULL); 1061 } 1062 1063 return (0); 1064 } 1065 #endif /* TCP_OFFLOAD */ 1066 1067 static int 1068 t4_tom_modevent(module_t mod, int cmd, void *arg) 1069 { 1070 int rc = 0; 1071 1072 #ifdef TCP_OFFLOAD 1073 switch (cmd) { 1074 case MOD_LOAD: 1075 rc = t4_tom_mod_load(); 1076 break; 1077 1078 case MOD_UNLOAD: 1079 rc = t4_tom_mod_unload(); 1080 break; 1081 1082 default: 1083 rc = EINVAL; 1084 } 1085 #else 1086 printf("t4_tom: compiled without TCP_OFFLOAD support.\n"); 1087 rc = EOPNOTSUPP; 1088 #endif 1089 return (rc); 1090 } 1091 1092 static moduledata_t t4_tom_moddata= { 1093 "t4_tom", 1094 t4_tom_modevent, 1095 0 1096 }; 1097 1098 MODULE_VERSION(t4_tom, 1); 1099 MODULE_DEPEND(t4_tom, toecore, 1, 1, 1); 1100 MODULE_DEPEND(t4_tom, t4nex, 1, 1, 1); 1101 DECLARE_MODULE(t4_tom, t4_tom_moddata, SI_SUB_EXEC, SI_ORDER_ANY); 1102