1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #ifdef TCP_OFFLOAD
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/if_vlan_var.h>
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_fib.h>
59 #include <netinet6/scope6_var.h>
60 #include <netinet/tcp_timer.h>
61 #define TCPSTATES
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/toecore.h>
65 #include <netinet/cc/cc.h>
66
67 #include "common/common.h"
68 #include "common/t4_msg.h"
69 #include "common/t4_regs.h"
70 #include "t4_clip.h"
71 #include "tom/t4_tom_l2t.h"
72 #include "tom/t4_tom.h"
73
74 /* stid services */
75 static int alloc_stid(struct adapter *, bool, void *);
76 static struct listen_ctx *lookup_stid(struct adapter *, int);
77 static void free_stid(struct adapter *, int , bool);
78
79 /* lctx services */
80 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
81 struct vi_info *);
82 static int free_lctx(struct adapter *, struct listen_ctx *);
83 static void hold_lctx(struct listen_ctx *);
84 static void listen_hash_add(struct adapter *, struct listen_ctx *);
85 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
86 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
87 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
88
89 static void send_abort_rpl_synqe(struct toedev *, struct synq_entry *, int);
90
91 static int create_server6(struct adapter *, struct listen_ctx *);
92 static int create_server(struct adapter *, struct listen_ctx *);
93
94 int
alloc_stid_tab(struct adapter * sc)95 alloc_stid_tab(struct adapter *sc)
96 {
97 struct tid_info *t = &sc->tids;
98
99 MPASS(t->nstids > 0);
100 MPASS(t->stid_tab == NULL);
101
102 t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
103 M_ZERO | M_NOWAIT);
104 if (t->stid_tab == NULL)
105 return (ENOMEM);
106 t->stid_bitmap = bit_alloc(t->nstids, M_CXGBE, M_NOWAIT);
107 if (t->stid_bitmap == NULL) {
108 free(t->stid_tab, M_CXGBE);
109 t->stid_tab = NULL;
110 return (ENOMEM);
111 }
112 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
113 t->stids_in_use = 0;
114
115 return (0);
116 }
117
118 void
free_stid_tab(struct adapter * sc)119 free_stid_tab(struct adapter *sc)
120 {
121 struct tid_info *t = &sc->tids;
122
123 KASSERT(t->stids_in_use == 0,
124 ("%s: %d tids still in use.", __func__, t->stids_in_use));
125
126 if (mtx_initialized(&t->stid_lock))
127 mtx_destroy(&t->stid_lock);
128 free(t->stid_tab, M_CXGBE);
129 t->stid_tab = NULL;
130 free(t->stid_bitmap, M_CXGBE);
131 t->stid_bitmap = NULL;
132 }
133
134 void
stop_stid_tab(struct adapter * sc)135 stop_stid_tab(struct adapter *sc)
136 {
137 struct tid_info *t = &sc->tids;
138 struct tom_data *td = sc->tom_softc;
139 struct listen_ctx *lctx;
140 struct synq_entry *synqe;
141 int i, ntids;
142
143 mtx_lock(&t->stid_lock);
144 t->stid_tab_stopped = true;
145 mtx_unlock(&t->stid_lock);
146
147 mtx_lock(&td->lctx_hash_lock);
148 for (i = 0; i <= td->listen_mask; i++) {
149 LIST_FOREACH(lctx, &td->listen_hash[i], link)
150 lctx->flags &= ~(LCTX_RPL_PENDING | LCTX_SETUP_IN_HW);
151 }
152 mtx_unlock(&td->lctx_hash_lock);
153
154 mtx_lock(&td->toep_list_lock);
155 TAILQ_FOREACH(synqe, &td->synqe_list, link) {
156 MPASS(sc->incarnation == synqe->incarnation);
157 MPASS(synqe->tid >= 0);
158 MPASS(synqe == lookup_tid(sc, synqe->tid));
159 /* Remove tid from the lookup table immediately. */
160 CTR(KTR_CXGBE, "%s: tid %d@%d STRANDED, removed from table",
161 __func__, synqe->tid, synqe->incarnation);
162 ntids = synqe->lctx->inp->inp_vflag & INP_IPV6 ? 2 : 1;
163 remove_tid(sc, synqe->tid, ntids);
164 #if 0
165 /* synqe->tid is stale now but left alone for debug. */
166 synqe->tid = -1;
167 #endif
168 }
169 MPASS(TAILQ_EMPTY(&td->stranded_synqe));
170 TAILQ_CONCAT(&td->stranded_synqe, &td->synqe_list, link);
171 MPASS(TAILQ_EMPTY(&td->synqe_list));
172 mtx_unlock(&td->toep_list_lock);
173 }
174
175 void
restart_stid_tab(struct adapter * sc)176 restart_stid_tab(struct adapter *sc)
177 {
178 struct tid_info *t = &sc->tids;
179 struct tom_data *td = sc->tom_softc;
180 struct listen_ctx *lctx;
181 int i;
182
183 mtx_lock(&td->lctx_hash_lock);
184 for (i = 0; i <= td->listen_mask; i++) {
185 LIST_FOREACH(lctx, &td->listen_hash[i], link) {
186 MPASS((lctx->flags & (LCTX_RPL_PENDING | LCTX_SETUP_IN_HW)) == 0);
187 lctx->flags |= LCTX_RPL_PENDING;
188 if (lctx->inp->inp_vflag & INP_IPV6)
189 create_server6(sc, lctx);
190 else
191 create_server(sc, lctx);
192 }
193 }
194 mtx_unlock(&td->lctx_hash_lock);
195
196 mtx_lock(&t->stid_lock);
197 t->stid_tab_stopped = false;
198 mtx_unlock(&t->stid_lock);
199
200 }
201
202 static int
alloc_stid(struct adapter * sc,bool isipv6,void * ctx)203 alloc_stid(struct adapter *sc, bool isipv6, void *ctx)
204 {
205 struct tid_info *t = &sc->tids;
206 const u_int n = isipv6 ? 2 : 1;
207 int stid, pair_stid;
208 u_int i;
209 ssize_t val;
210
211 mtx_lock(&t->stid_lock);
212 MPASS(t->stids_in_use <= t->nstids);
213 if (n > t->nstids - t->stids_in_use || t->stid_tab_stopped) {
214 mtx_unlock(&t->stid_lock);
215 return (-1);
216 }
217
218 stid = -1;
219 if (isipv6) {
220 /*
221 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4
222 * cells) in the TCAM. We know that the start of the stid
223 * region is properly aligned already (the chip requires each
224 * region to be 128-cell aligned).
225 */
226 for (i = 0; i + 1 < t->nstids; i = roundup2(val + 1, 2)) {
227 bit_ffc_area_at(t->stid_bitmap, i, t->nstids, 2, &val);
228 if (val == -1)
229 break;
230 if ((val & 1) == 0) {
231 stid = val;
232 break;
233 }
234 }
235 } else {
236 /*
237 * An IPv4 server needs one stid without any alignment
238 * requirements. But we try extra hard to find an available
239 * stid adjacent to a used stid so that free "stid-pairs" are
240 * left intact for IPv6.
241 */
242 bit_ffc_at(t->stid_bitmap, 0, t->nstids, &val);
243 while (val != -1) {
244 if (stid == -1) {
245 /*
246 * First usable stid. Look no further if it's
247 * an ideal fit.
248 */
249 stid = val;
250 if (val & 1 || bit_test(t->stid_bitmap, val + 1))
251 break;
252 } else {
253 /*
254 * We have an unused stid already but are now
255 * looking for in-use stids because we'd prefer
256 * to grab an unused stid adjacent to one that's
257 * in use.
258 *
259 * Odd stids pair with the previous stid and
260 * even ones pair with the next stid.
261 */
262 pair_stid = val & 1 ? val - 1 : val + 1;
263 if (bit_test(t->stid_bitmap, pair_stid) == 0) {
264 stid = pair_stid;
265 break;
266 }
267 }
268 val = roundup2(val + 1, 2);
269 if (val >= t->nstids)
270 break;
271 bit_ffs_at(t->stid_bitmap, val, t->nstids, &val);
272 }
273 }
274
275 if (stid >= 0) {
276 MPASS(stid + n - 1 < t->nstids);
277 MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 0));
278 bit_nset(t->stid_bitmap, stid, stid + n - 1);
279 t->stids_in_use += n;
280 t->stid_tab[stid] = ctx;
281 #ifdef INVARIANTS
282 if (n == 2) {
283 MPASS((stid & 1) == 0);
284 t->stid_tab[stid + 1] = NULL;
285 }
286 #endif
287 stid += t->stid_base;
288 }
289 mtx_unlock(&t->stid_lock);
290 return (stid);
291 }
292
293 static struct listen_ctx *
lookup_stid(struct adapter * sc,int stid)294 lookup_stid(struct adapter *sc, int stid)
295 {
296 struct tid_info *t = &sc->tids;
297
298 return (t->stid_tab[stid - t->stid_base]);
299 }
300
301 static void
free_stid(struct adapter * sc,int stid,bool isipv6)302 free_stid(struct adapter *sc, int stid, bool isipv6)
303 {
304 struct tid_info *t = &sc->tids;
305 const u_int n = isipv6 ? 2 : 1;
306
307 mtx_lock(&t->stid_lock);
308 MPASS(stid >= t->stid_base);
309 stid -= t->stid_base;
310 MPASS(stid + n - 1 < t->nstids);
311 MPASS(t->stids_in_use <= t->nstids);
312 MPASS(t->stids_in_use >= n);
313 MPASS(t->stid_tab[stid] != NULL);
314 #ifdef INVARIANTS
315 if (n == 2) {
316 MPASS((stid & 1) == 0);
317 MPASS(t->stid_tab[stid + 1] == NULL);
318 }
319 #endif
320 MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 1));
321 bit_nclear(t->stid_bitmap, stid, stid + n - 1);
322 t->stid_tab[stid] = NULL;
323 t->stids_in_use -= n;
324 mtx_unlock(&t->stid_lock);
325 }
326
327 static struct listen_ctx *
alloc_lctx(struct adapter * sc,struct inpcb * inp,struct vi_info * vi)328 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
329 {
330 struct listen_ctx *lctx;
331
332 INP_WLOCK_ASSERT(inp);
333
334 lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
335 if (lctx == NULL)
336 return (NULL);
337
338 lctx->isipv6 = inp->inp_vflag & INP_IPV6;
339 lctx->stid = alloc_stid(sc, lctx->isipv6, lctx);
340 if (lctx->stid < 0) {
341 free(lctx, M_CXGBE);
342 return (NULL);
343 }
344
345 if (lctx->isipv6 &&
346 !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
347 lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
348 if (lctx->ce == NULL) {
349 free(lctx, M_CXGBE);
350 return (NULL);
351 }
352 }
353
354 lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
355 lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
356 refcount_init(&lctx->refcount, 1);
357
358 lctx->inp = inp;
359 lctx->vnet = inp->inp_socket->so_vnet;
360 in_pcbref(inp);
361
362 return (lctx);
363 }
364
365 /* Don't call this directly, use release_lctx instead */
366 static int
free_lctx(struct adapter * sc,struct listen_ctx * lctx)367 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
368 {
369 struct inpcb *inp = lctx->inp;
370
371 INP_WLOCK_ASSERT(inp);
372 KASSERT(lctx->refcount == 0,
373 ("%s: refcount %d", __func__, lctx->refcount));
374 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
375
376 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
377 __func__, lctx->stid, lctx, lctx->inp);
378
379 if (lctx->ce)
380 t4_release_clip_entry(sc, lctx->ce);
381 free_stid(sc, lctx->stid, lctx->isipv6);
382 free(lctx, M_CXGBE);
383
384 return (in_pcbrele_wlocked(inp));
385 }
386
387 static void
hold_lctx(struct listen_ctx * lctx)388 hold_lctx(struct listen_ctx *lctx)
389 {
390
391 refcount_acquire(&lctx->refcount);
392 }
393
394 static inline uint32_t
listen_hashfn(void * key,u_long mask)395 listen_hashfn(void *key, u_long mask)
396 {
397
398 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
399 }
400
401 /*
402 * Add a listen_ctx entry to the listen hash table.
403 */
404 static void
listen_hash_add(struct adapter * sc,struct listen_ctx * lctx)405 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
406 {
407 struct tom_data *td = sc->tom_softc;
408 int bucket = listen_hashfn(lctx->inp, td->listen_mask);
409
410 mtx_lock(&td->lctx_hash_lock);
411 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
412 td->lctx_count++;
413 mtx_unlock(&td->lctx_hash_lock);
414 }
415
416 /*
417 * Look for the listening socket's context entry in the hash and return it.
418 */
419 static struct listen_ctx *
listen_hash_find(struct adapter * sc,struct inpcb * inp)420 listen_hash_find(struct adapter *sc, struct inpcb *inp)
421 {
422 struct tom_data *td = sc->tom_softc;
423 int bucket = listen_hashfn(inp, td->listen_mask);
424 struct listen_ctx *lctx;
425
426 mtx_lock(&td->lctx_hash_lock);
427 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
428 if (lctx->inp == inp)
429 break;
430 }
431 mtx_unlock(&td->lctx_hash_lock);
432
433 return (lctx);
434 }
435
436 /*
437 * Removes the listen_ctx structure for inp from the hash and returns it.
438 */
439 static struct listen_ctx *
listen_hash_del(struct adapter * sc,struct inpcb * inp)440 listen_hash_del(struct adapter *sc, struct inpcb *inp)
441 {
442 struct tom_data *td = sc->tom_softc;
443 int bucket = listen_hashfn(inp, td->listen_mask);
444 struct listen_ctx *lctx, *l;
445
446 mtx_lock(&td->lctx_hash_lock);
447 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
448 if (lctx->inp == inp) {
449 LIST_REMOVE(lctx, link);
450 td->lctx_count--;
451 break;
452 }
453 }
454 mtx_unlock(&td->lctx_hash_lock);
455
456 return (lctx);
457 }
458
459 /*
460 * Releases a hold on the lctx. Must be called with the listening socket's inp
461 * locked. The inp may be freed by this function and it returns NULL to
462 * indicate this.
463 */
464 static struct inpcb *
release_lctx(struct adapter * sc,struct listen_ctx * lctx)465 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
466 {
467 struct inpcb *inp = lctx->inp;
468 int inp_freed = 0;
469
470 INP_WLOCK_ASSERT(inp);
471 if (refcount_release(&lctx->refcount))
472 inp_freed = free_lctx(sc, lctx);
473
474 return (inp_freed ? NULL : inp);
475 }
476
477 static void
send_flowc_wr_synqe(struct adapter * sc,struct synq_entry * synqe)478 send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe)
479 {
480 struct mbuf *m = synqe->syn;
481 if_t ifp = m->m_pkthdr.rcvif;
482 struct vi_info *vi = if_getsoftc(ifp);
483 struct port_info *pi = vi->pi;
484 struct wrqe *wr;
485 struct fw_flowc_wr *flowc;
486 struct sge_ofld_txq *ofld_txq;
487 struct sge_ofld_rxq *ofld_rxq;
488 const int nparams = 6;
489 const int flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
490 const u_int pfvf = sc->pf << S_FW_VIID_PFN;
491
492 INP_WLOCK_ASSERT(synqe->lctx->inp);
493 MPASS((synqe->flags & TPF_FLOWC_WR_SENT) == 0);
494
495 ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
496 ofld_rxq = &sc->sge.ofld_rxq[synqe->params.rxq_idx];
497
498 wr = alloc_wrqe(roundup2(flowclen, 16), &ofld_txq->wrq);
499 if (wr == NULL) {
500 /* XXX */
501 panic("%s: allocation failure.", __func__);
502 }
503 flowc = wrtod(wr);
504 memset(flowc, 0, wr->wr_len);
505 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
506 V_FW_FLOWC_WR_NPARAMS(nparams));
507 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
508 V_FW_WR_FLOWID(synqe->tid));
509 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
510 flowc->mnemval[0].val = htobe32(pfvf);
511 /* Firmware expects hw port and will translate to channel itself. */
512 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
513 flowc->mnemval[1].val = htobe32(pi->hw_port);
514 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
515 flowc->mnemval[2].val = htobe32(pi->hw_port);
516 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
517 flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
518 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
519 flowc->mnemval[4].val = htobe32(512);
520 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
521 flowc->mnemval[5].val = htobe32(512);
522
523 synqe->flags |= TPF_FLOWC_WR_SENT;
524 t4_wrq_tx(sc, wr);
525 }
526
527 static void
send_abort_rpl_synqe(struct toedev * tod,struct synq_entry * synqe,int rst_status)528 send_abort_rpl_synqe(struct toedev *tod, struct synq_entry *synqe,
529 int rst_status)
530 {
531 struct adapter *sc = tod->tod_softc;
532 struct wrqe *wr;
533 struct cpl_abort_req *req;
534
535 INP_WLOCK_ASSERT(synqe->lctx->inp);
536
537 CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
538 __func__, synqe, synqe->flags, synqe->tid,
539 synqe->flags & TPF_ABORT_SHUTDOWN ?
540 " (abort already in progress)" : "");
541 if (synqe->flags & TPF_ABORT_SHUTDOWN)
542 return; /* abort already in progress */
543 synqe->flags |= TPF_ABORT_SHUTDOWN;
544
545 if (!(synqe->flags & TPF_FLOWC_WR_SENT))
546 send_flowc_wr_synqe(sc, synqe);
547
548 wr = alloc_wrqe(sizeof(*req),
549 &sc->sge.ofld_txq[synqe->params.txq_idx].wrq);
550 if (wr == NULL) {
551 /* XXX */
552 panic("%s: allocation failure.", __func__);
553 }
554 req = wrtod(wr);
555 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
556 req->rsvd0 = 0; /* don't have a snd_nxt */
557 req->rsvd1 = 1; /* no data sent yet */
558 req->cmd = rst_status;
559
560 t4_l2t_send(sc, wr, &sc->l2t->l2tab[synqe->params.l2t_idx]);
561 }
562
563 static int
create_server(struct adapter * sc,struct listen_ctx * lctx)564 create_server(struct adapter *sc, struct listen_ctx *lctx)
565 {
566 struct wrqe *wr;
567 struct cpl_pass_open_req *req;
568 struct inpcb *inp = lctx->inp;
569
570 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
571 if (wr == NULL) {
572 log(LOG_ERR, "%s: allocation failure", __func__);
573 return (ENOMEM);
574 }
575 req = wrtod(wr);
576
577 INIT_TP_WR(req, 0);
578 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
579 req->local_port = inp->inp_lport;
580 req->peer_port = 0;
581 req->local_ip = inp->inp_laddr.s_addr;
582 req->peer_ip = 0;
583 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
584 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
585 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
586
587 t4_wrq_tx(sc, wr);
588 return (0);
589 }
590
591 static int
create_server6(struct adapter * sc,struct listen_ctx * lctx)592 create_server6(struct adapter *sc, struct listen_ctx *lctx)
593 {
594 struct wrqe *wr;
595 struct cpl_pass_open_req6 *req;
596 struct inpcb *inp = lctx->inp;
597
598 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
599 if (wr == NULL) {
600 log(LOG_ERR, "%s: allocation failure", __func__);
601 return (ENOMEM);
602 }
603 req = wrtod(wr);
604
605 INIT_TP_WR(req, 0);
606 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
607 req->local_port = inp->inp_lport;
608 req->peer_port = 0;
609 req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
610 req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
611 req->peer_ip_hi = 0;
612 req->peer_ip_lo = 0;
613 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
614 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
615 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
616
617 t4_wrq_tx(sc, wr);
618 return (0);
619 }
620
621 static int
destroy_server(struct adapter * sc,struct listen_ctx * lctx)622 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
623 {
624 struct wrqe *wr;
625 struct cpl_close_listsvr_req *req;
626
627 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
628 if (wr == NULL) {
629 /* XXX */
630 panic("%s: allocation failure.", __func__);
631 }
632 req = wrtod(wr);
633
634 INIT_TP_WR(req, 0);
635 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
636 lctx->stid));
637 req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
638 req->rsvd = htobe16(0);
639
640 t4_wrq_tx(sc, wr);
641 return (0);
642 }
643
644 /*
645 * Start a listening server by sending a passive open request to HW.
646 *
647 * Can't take adapter lock here and access to sc->flags,
648 * sc->offload_map, if_capenable are all race prone.
649 */
650 int
t4_listen_start(struct toedev * tod,struct tcpcb * tp)651 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
652 {
653 struct adapter *sc = tod->tod_softc;
654 struct vi_info *vi;
655 struct port_info *pi;
656 struct inpcb *inp = tptoinpcb(tp);
657 struct listen_ctx *lctx;
658 int i, rc, v;
659 struct offload_settings settings;
660
661 INP_WLOCK_ASSERT(inp);
662
663 rw_rlock(&sc->policy_lock);
664 settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL,
665 EVL_MAKETAG(0xfff, 0, 0), inp);
666 rw_runlock(&sc->policy_lock);
667 if (!settings.offload)
668 return (0);
669
670 /* Don't start a hardware listener for any loopback address. */
671 if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
672 return (0);
673 if (!(inp->inp_vflag & INP_IPV6) &&
674 IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
675 return (0);
676 if (sc->flags & KERN_TLS_ON)
677 return (0);
678 #if 0
679 ADAPTER_LOCK(sc);
680 if (IS_BUSY(sc)) {
681 log(LOG_ERR, "%s: listen request ignored, %s is busy",
682 __func__, device_get_nameunit(sc->dev));
683 goto done;
684 }
685
686 KASSERT(uld_active(sc, ULD_TOM),
687 ("%s: TOM not initialized", __func__));
688 #endif
689
690 /*
691 * Find an initialized VI with IFCAP_TOE (4 or 6). We'll use the first
692 * such VI's queues to send the passive open and receive the reply to
693 * it.
694 *
695 * XXX: need a way to mark a port in use by offload. if_cxgbe should
696 * then reject any attempt to bring down such a port (and maybe reject
697 * attempts to disable IFCAP_TOE on that port too?).
698 */
699 for_each_port(sc, i) {
700 pi = sc->port[i];
701 for_each_vi(pi, v, vi) {
702 if (vi->flags & VI_INIT_DONE &&
703 if_getcapenable(vi->ifp) & IFCAP_TOE)
704 goto found;
705 }
706 }
707 goto done; /* no port that's UP with IFCAP_TOE enabled */
708 found:
709
710 if (listen_hash_find(sc, inp) != NULL)
711 goto done; /* already setup */
712
713 lctx = alloc_lctx(sc, inp, vi);
714 if (lctx == NULL) {
715 log(LOG_ERR,
716 "%s: listen request ignored, %s couldn't allocate lctx\n",
717 __func__, device_get_nameunit(sc->dev));
718 goto done;
719 }
720 listen_hash_add(sc, lctx);
721
722 CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
723 __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
724 inp->inp_vflag);
725
726 if (inp->inp_vflag & INP_IPV6)
727 rc = create_server6(sc, lctx);
728 else
729 rc = create_server(sc, lctx);
730 if (rc != 0) {
731 log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
732 __func__, device_get_nameunit(sc->dev), rc);
733 (void) listen_hash_del(sc, inp);
734 inp = release_lctx(sc, lctx);
735 /* can't be freed, host stack has a reference */
736 KASSERT(inp != NULL, ("%s: inp freed", __func__));
737 goto done;
738 }
739 lctx->flags |= LCTX_RPL_PENDING;
740 done:
741 #if 0
742 ADAPTER_UNLOCK(sc);
743 #endif
744 return (0);
745 }
746
747 int
t4_listen_stop(struct toedev * tod,struct tcpcb * tp)748 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
749 {
750 struct listen_ctx *lctx;
751 struct adapter *sc = tod->tod_softc;
752 struct inpcb *inp = tptoinpcb(tp);
753
754 INP_WLOCK_ASSERT(inp);
755
756 lctx = listen_hash_del(sc, inp);
757 if (lctx == NULL)
758 return (ENOENT); /* no hardware listener for this inp */
759
760 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
761 lctx, lctx->flags);
762
763 /*
764 * If the reply to the PASS_OPEN is still pending we'll wait for it to
765 * arrive and clean up when it does.
766 */
767 if (lctx->flags & LCTX_RPL_PENDING) {
768 return (EINPROGRESS);
769 }
770
771 if (lctx->flags & LCTX_SETUP_IN_HW)
772 destroy_server(sc, lctx);
773 else
774 inp = release_lctx(sc, lctx);
775 return (0);
776 }
777
778 static inline struct synq_entry *
alloc_synqe(struct adapter * sc,struct listen_ctx * lctx,int flags)779 alloc_synqe(struct adapter *sc, struct listen_ctx *lctx, int flags)
780 {
781 struct synq_entry *synqe;
782
783 INP_RLOCK_ASSERT(lctx->inp);
784 MPASS(flags == M_WAITOK || flags == M_NOWAIT);
785
786 synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
787 if (__predict_true(synqe != NULL)) {
788 synqe->flags = TPF_SYNQE;
789 synqe->incarnation = sc->incarnation;
790 refcount_init(&synqe->refcnt, 1);
791 synqe->lctx = lctx;
792 hold_lctx(lctx); /* Every synqe has a ref on its lctx. */
793 synqe->syn = NULL;
794 }
795
796 return (synqe);
797 }
798
799 static inline void
hold_synqe(struct synq_entry * synqe)800 hold_synqe(struct synq_entry *synqe)
801 {
802
803 refcount_acquire(&synqe->refcnt);
804 }
805
806 static inline struct inpcb *
release_synqe(struct adapter * sc,struct synq_entry * synqe)807 release_synqe(struct adapter *sc, struct synq_entry *synqe)
808 {
809 struct inpcb *inp;
810
811 MPASS(synqe->flags & TPF_SYNQE);
812 MPASS(synqe->lctx != NULL);
813
814 inp = synqe->lctx->inp;
815 MPASS(inp != NULL);
816 INP_WLOCK_ASSERT(inp);
817
818 if (refcount_release(&synqe->refcnt)) {
819 inp = release_lctx(sc, synqe->lctx);
820 m_freem(synqe->syn);
821 free(synqe, M_CXGBE);
822 }
823
824 return (inp);
825 }
826
827 void
t4_syncache_added(struct toedev * tod __unused,void * arg)828 t4_syncache_added(struct toedev *tod __unused, void *arg)
829 {
830 struct synq_entry *synqe = arg;
831
832 hold_synqe(synqe);
833 }
834
835 void
t4_syncache_removed(struct toedev * tod,void * arg)836 t4_syncache_removed(struct toedev *tod, void *arg)
837 {
838 struct adapter *sc = tod->tod_softc;
839 struct synq_entry *synqe = arg;
840 struct inpcb *inp = synqe->lctx->inp;
841
842 /*
843 * XXX: this is a LOR but harmless when running from the softclock.
844 */
845 INP_WLOCK(inp);
846 inp = release_synqe(sc, synqe);
847 if (inp != NULL)
848 INP_WUNLOCK(inp);
849 }
850
851 int
t4_syncache_respond(struct toedev * tod,void * arg,struct mbuf * m)852 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
853 {
854 struct synq_entry *synqe = arg;
855
856 if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
857 struct tcpopt to;
858 struct ip *ip = mtod(m, struct ip *);
859 struct tcphdr *th;
860
861 if (ip->ip_v == IPVERSION)
862 th = (void *)(ip + 1);
863 else
864 th = (void *)((struct ip6_hdr *)ip + 1);
865 bzero(&to, sizeof(to));
866 tcp_dooptions(&to, (void *)(th + 1),
867 (th->th_off << 2) - sizeof(*th), TO_SYN);
868
869 /* save these for later */
870 synqe->iss = be32toh(th->th_seq);
871 synqe->irs = be32toh(th->th_ack) - 1;
872 synqe->ts = to.to_tsval;
873 }
874
875 m_freem(m); /* don't need this any more */
876 return (0);
877 }
878
879 static int
do_pass_open_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)880 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
881 struct mbuf *m)
882 {
883 struct adapter *sc = iq->adapter;
884 const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
885 int stid = GET_TID(cpl);
886 unsigned int status = cpl->status;
887 struct listen_ctx *lctx = lookup_stid(sc, stid);
888 struct inpcb *inp = lctx->inp;
889 struct tcpcb *tp = intotcpcb(inp);
890 #ifdef INVARIANTS
891 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
892 #endif
893
894 KASSERT(opcode == CPL_PASS_OPEN_RPL,
895 ("%s: unexpected opcode 0x%x", __func__, opcode));
896 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
897 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
898
899 INP_WLOCK(inp);
900
901 CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
902 __func__, stid, status, lctx->flags);
903
904 lctx->flags &= ~LCTX_RPL_PENDING;
905 if (status == CPL_ERR_NONE)
906 lctx->flags |= LCTX_SETUP_IN_HW;
907 else
908 log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
909
910 #ifdef INVARIANTS
911 /*
912 * If the inp has been dropped (listening socket closed) then
913 * listen_stop must have run and taken the inp out of the hash.
914 */
915 if (tp->t_flags & TF_DISCONNECTED) {
916 KASSERT(listen_hash_del(sc, inp) == NULL,
917 ("%s: inp %p still in listen hash", __func__, inp));
918 }
919 #endif
920
921 if (tp->t_flags & TF_DISCONNECTED && status != CPL_ERR_NONE) {
922 if (release_lctx(sc, lctx) != NULL)
923 INP_WUNLOCK(inp);
924 return (status);
925 }
926
927 /*
928 * Listening socket stopped listening earlier and now the chip tells us
929 * it has started the hardware listener. Stop it; the lctx will be
930 * released in do_close_server_rpl.
931 */
932 if (tp->t_flags & TF_DISCONNECTED) {
933 destroy_server(sc, lctx);
934 INP_WUNLOCK(inp);
935 return (status);
936 }
937
938 /*
939 * Failed to start hardware listener. Take inp out of the hash and
940 * release our reference on it. An error message has been logged
941 * already.
942 */
943 if (status != CPL_ERR_NONE) {
944 listen_hash_del(sc, inp);
945 if (release_lctx(sc, lctx) != NULL)
946 INP_WUNLOCK(inp);
947 return (status);
948 }
949
950 /* hardware listener open for business */
951
952 INP_WUNLOCK(inp);
953 return (status);
954 }
955
956 static int
do_close_server_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)957 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
958 struct mbuf *m)
959 {
960 struct adapter *sc = iq->adapter;
961 const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
962 int stid = GET_TID(cpl);
963 unsigned int status = cpl->status;
964 struct listen_ctx *lctx = lookup_stid(sc, stid);
965 struct inpcb *inp = lctx->inp;
966 #ifdef INVARIANTS
967 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
968 #endif
969
970 KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
971 ("%s: unexpected opcode 0x%x", __func__, opcode));
972 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
973 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
974
975 CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
976
977 if (status != CPL_ERR_NONE) {
978 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
979 __func__, status, stid);
980 return (status);
981 }
982
983 INP_WLOCK(inp);
984 inp = release_lctx(sc, lctx);
985 if (inp != NULL)
986 INP_WUNLOCK(inp);
987
988 return (status);
989 }
990
991 static void
done_with_synqe(struct adapter * sc,struct synq_entry * synqe)992 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
993 {
994 struct tom_data *td = sc->tom_softc;
995 struct listen_ctx *lctx = synqe->lctx;
996 struct inpcb *inp = lctx->inp;
997 struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
998 int ntids;
999
1000 INP_WLOCK_ASSERT(inp);
1001
1002 if (synqe->tid != -1) {
1003 ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
1004 remove_tid(sc, synqe->tid, ntids);
1005 mtx_lock(&td->toep_list_lock);
1006 TAILQ_REMOVE(&td->synqe_list, synqe, link);
1007 mtx_unlock(&td->toep_list_lock);
1008 release_tid(sc, synqe->tid, lctx->ctrlq);
1009 }
1010 t4_l2t_release(e);
1011 inp = release_synqe(sc, synqe);
1012 if (inp)
1013 INP_WUNLOCK(inp);
1014 }
1015
1016 void
synack_failure_cleanup(struct adapter * sc,struct synq_entry * synqe)1017 synack_failure_cleanup(struct adapter *sc, struct synq_entry *synqe)
1018 {
1019 INP_WLOCK(synqe->lctx->inp);
1020 done_with_synqe(sc, synqe);
1021 }
1022
1023 int
do_abort_req_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1024 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
1025 struct mbuf *m)
1026 {
1027 struct adapter *sc = iq->adapter;
1028 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1029 unsigned int tid = GET_TID(cpl);
1030 struct synq_entry *synqe = lookup_tid(sc, tid);
1031 struct listen_ctx *lctx = synqe->lctx;
1032 struct inpcb *inp = lctx->inp;
1033 struct sge_ofld_txq *ofld_txq;
1034 #ifdef INVARIANTS
1035 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1036 #endif
1037
1038 KASSERT(opcode == CPL_ABORT_REQ_RSS,
1039 ("%s: unexpected opcode 0x%x", __func__, opcode));
1040 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1041 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1042
1043 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1044 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1045
1046 if (negative_advice(cpl->status))
1047 return (0); /* Ignore negative advice */
1048
1049 INP_WLOCK(inp);
1050
1051 ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
1052
1053 if (!(synqe->flags & TPF_FLOWC_WR_SENT))
1054 send_flowc_wr_synqe(sc, synqe);
1055
1056 /*
1057 * If we'd initiated an abort earlier the reply to it is responsible for
1058 * cleaning up resources. Otherwise we tear everything down right here
1059 * right now. We owe the T4 a CPL_ABORT_RPL no matter what.
1060 */
1061 if (synqe->flags & TPF_ABORT_SHUTDOWN) {
1062 INP_WUNLOCK(inp);
1063 goto done;
1064 }
1065
1066 done_with_synqe(sc, synqe);
1067 /* inp lock released by done_with_synqe */
1068 done:
1069 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1070 return (0);
1071 }
1072
1073 int
do_abort_rpl_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1074 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
1075 struct mbuf *m)
1076 {
1077 struct adapter *sc = iq->adapter;
1078 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1079 unsigned int tid = GET_TID(cpl);
1080 struct synq_entry *synqe = lookup_tid(sc, tid);
1081 struct listen_ctx *lctx = synqe->lctx;
1082 struct inpcb *inp = lctx->inp;
1083 #ifdef INVARIANTS
1084 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1085 #endif
1086
1087 KASSERT(opcode == CPL_ABORT_RPL_RSS,
1088 ("%s: unexpected opcode 0x%x", __func__, opcode));
1089 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1090 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1091
1092 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1093 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1094
1095 INP_WLOCK(inp);
1096 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1097 ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
1098 __func__, synqe, synqe->flags));
1099
1100 done_with_synqe(sc, synqe);
1101 /* inp lock released by done_with_synqe */
1102
1103 return (0);
1104 }
1105
1106 void
t4_offload_socket(struct toedev * tod,void * arg,struct socket * so)1107 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
1108 {
1109 struct adapter *sc = tod->tod_softc;
1110 struct tom_data *td = sc->tom_softc;
1111 struct synq_entry *synqe = arg;
1112 struct inpcb *inp = sotoinpcb(so);
1113 struct toepcb *toep = synqe->toep;
1114
1115 NET_EPOCH_ASSERT(); /* prevents bad race with accept() */
1116 INP_WLOCK_ASSERT(inp);
1117 KASSERT(synqe->flags & TPF_SYNQE,
1118 ("%s: %p not a synq_entry?", __func__, arg));
1119 MPASS(toep->tid == synqe->tid);
1120
1121 offload_socket(so, toep);
1122 make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
1123 toep->flags |= TPF_CPL_PENDING;
1124 update_tid(sc, synqe->tid, toep);
1125 synqe->flags |= TPF_SYNQE_EXPANDED;
1126 mtx_lock(&td->toep_list_lock);
1127 /* Remove synqe from its list and add the TOE PCB to the active list. */
1128 TAILQ_REMOVE(&td->synqe_list, synqe, link);
1129 TAILQ_INSERT_TAIL(&td->toep_list, toep, link);
1130 toep->flags |= TPF_IN_TOEP_LIST;
1131 mtx_unlock(&td->toep_list_lock);
1132 inp->inp_flowtype = (inp->inp_vflag & INP_IPV6) ?
1133 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4;
1134 inp->inp_flowid = synqe->rss_hash;
1135 }
1136
1137 static void
t4opt_to_tcpopt(const struct tcp_options * t4opt,struct tcpopt * to)1138 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
1139 {
1140 bzero(to, sizeof(*to));
1141
1142 if (t4opt->mss) {
1143 to->to_flags |= TOF_MSS;
1144 to->to_mss = be16toh(t4opt->mss);
1145 }
1146
1147 if (t4opt->wsf > 0 && t4opt->wsf < 15) {
1148 to->to_flags |= TOF_SCALE;
1149 to->to_wscale = t4opt->wsf;
1150 }
1151
1152 if (t4opt->tstamp)
1153 to->to_flags |= TOF_TS;
1154
1155 if (t4opt->sack)
1156 to->to_flags |= TOF_SACKPERM;
1157 }
1158
1159 static bool
encapsulated_syn(struct adapter * sc,const struct cpl_pass_accept_req * cpl)1160 encapsulated_syn(struct adapter *sc, const struct cpl_pass_accept_req *cpl)
1161 {
1162 u_int hlen = be32toh(cpl->hdr_len);
1163
1164 if (chip_id(sc) >= CHELSIO_T6)
1165 return (G_T6_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1166 else
1167 return (G_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1168 }
1169
1170 static void
pass_accept_req_to_protohdrs(struct adapter * sc,const struct mbuf * m,struct in_conninfo * inc,struct tcphdr * th,uint8_t * iptos)1171 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
1172 struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
1173 {
1174 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1175 const struct ether_header *eh;
1176 unsigned int hlen = be32toh(cpl->hdr_len);
1177 uintptr_t l3hdr;
1178 const struct tcphdr *tcp;
1179
1180 eh = (const void *)(cpl + 1);
1181 if (chip_id(sc) >= CHELSIO_T6) {
1182 l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
1183 tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
1184 } else {
1185 l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1186 tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1187 }
1188
1189 /* extract TOS (DiffServ + ECN) byte for AccECN */
1190 if (iptos) {
1191 if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1192 const struct ip *ip = (const void *)l3hdr;
1193 *iptos = ip->ip_tos;
1194 }
1195 #ifdef INET6
1196 else
1197 if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
1198 const struct ip6_hdr *ip6 = (const void *)l3hdr;
1199 *iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1200 }
1201 #endif /* INET */
1202 }
1203
1204 if (inc) {
1205 bzero(inc, sizeof(*inc));
1206 inc->inc_fport = tcp->th_sport;
1207 inc->inc_lport = tcp->th_dport;
1208 if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1209 const struct ip *ip = (const void *)l3hdr;
1210
1211 inc->inc_faddr = ip->ip_src;
1212 inc->inc_laddr = ip->ip_dst;
1213 } else {
1214 const struct ip6_hdr *ip6 = (const void *)l3hdr;
1215
1216 inc->inc_flags |= INC_ISIPV6;
1217 inc->inc6_faddr = ip6->ip6_src;
1218 inc->inc6_laddr = ip6->ip6_dst;
1219 }
1220 }
1221
1222 if (th) {
1223 bcopy(tcp, th, sizeof(*th));
1224 tcp_fields_to_host(th); /* just like tcp_input */
1225 }
1226 }
1227
1228 static struct l2t_entry *
get_l2te_for_nexthop(struct port_info * pi,if_t ifp,struct in_conninfo * inc)1229 get_l2te_for_nexthop(struct port_info *pi, if_t ifp,
1230 struct in_conninfo *inc)
1231 {
1232 struct l2t_entry *e;
1233 struct sockaddr_in6 sin6;
1234 struct sockaddr *dst = (void *)&sin6;
1235 struct nhop_object *nh;
1236
1237 if (inc->inc_flags & INC_ISIPV6) {
1238 bzero(dst, sizeof(struct sockaddr_in6));
1239 dst->sa_len = sizeof(struct sockaddr_in6);
1240 dst->sa_family = AF_INET6;
1241
1242 if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1243 /* no need for route lookup */
1244 e = t4_l2t_get(pi, ifp, dst);
1245 return (e);
1246 }
1247
1248 nh = fib6_lookup(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, NHR_NONE, 0);
1249 if (nh == NULL)
1250 return (NULL);
1251 if (nh->nh_ifp != ifp)
1252 return (NULL);
1253 if (nh->nh_flags & NHF_GATEWAY)
1254 ((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr;
1255 else
1256 ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1257 } else {
1258 dst->sa_len = sizeof(struct sockaddr_in);
1259 dst->sa_family = AF_INET;
1260
1261 nh = fib4_lookup(RT_DEFAULT_FIB, inc->inc_faddr, 0, NHR_NONE, 0);
1262 if (nh == NULL)
1263 return (NULL);
1264 if (nh->nh_ifp != ifp)
1265 return (NULL);
1266 if (nh->nh_flags & NHF_GATEWAY)
1267 if (nh->gw_sa.sa_family == AF_INET)
1268 ((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
1269 else
1270 *((struct sockaddr_in6 *)dst) = nh->gw6_sa;
1271 else
1272 ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1273 }
1274
1275 e = t4_l2t_get(pi, ifp, dst);
1276 return (e);
1277 }
1278
1279 static int
send_synack(struct adapter * sc,struct synq_entry * synqe,uint64_t opt0,uint32_t opt2,int tid)1280 send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
1281 uint32_t opt2, int tid)
1282 {
1283 struct wrqe *wr;
1284 struct cpl_pass_accept_rpl *rpl;
1285 struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
1286
1287 wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1288 sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
1289 if (wr == NULL)
1290 return (ENOMEM);
1291 rpl = wrtod(wr);
1292
1293 if (is_t4(sc))
1294 INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1295 else {
1296 struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1297
1298 INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1299 rpl5->iss = htobe32(synqe->iss);
1300 }
1301 rpl->opt0 = opt0;
1302 rpl->opt2 = opt2;
1303
1304 return (t4_l2t_send(sc, wr, e));
1305 }
1306
1307 #define REJECT_PASS_ACCEPT_REQ(tunnel) do { \
1308 if (!tunnel) { \
1309 m_freem(m); \
1310 m = NULL; \
1311 } \
1312 reject_reason = __LINE__; \
1313 goto reject; \
1314 } while (0)
1315
1316 /*
1317 * The context associated with a tid entry via insert_tid could be a synq_entry
1318 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags.
1319 */
1320 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1321
1322 /*
1323 * Incoming SYN on a listening socket.
1324 *
1325 * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1326 * etc.
1327 */
1328 static int
do_pass_accept_req(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1329 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1330 struct mbuf *m)
1331 {
1332 struct adapter *sc = iq->adapter;
1333 struct tom_data *td = sc->tom_softc;
1334 struct toedev *tod;
1335 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1336 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1337 unsigned int tid = GET_TID(cpl);
1338 struct listen_ctx *lctx = lookup_stid(sc, stid);
1339 struct inpcb *inp;
1340 struct tcpcb *tp;
1341 struct socket *so;
1342 struct in_conninfo inc;
1343 struct tcphdr th;
1344 struct tcpopt to;
1345 struct port_info *pi;
1346 struct vi_info *vi;
1347 if_t hw_ifp, ifp;
1348 struct l2t_entry *e = NULL;
1349 struct synq_entry *synqe = NULL;
1350 int reject_reason, v, ntids;
1351 uint16_t vid, l2info;
1352 struct epoch_tracker et;
1353 #ifdef INVARIANTS
1354 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1355 #endif
1356 struct offload_settings settings;
1357 uint8_t iptos;
1358
1359 KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1360 ("%s: unexpected opcode 0x%x", __func__, opcode));
1361 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1362
1363 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1364 lctx);
1365
1366 /*
1367 * Figure out the port the SYN arrived on. We'll look for an exact VI
1368 * match in a bit but in case we don't find any we'll use the main VI as
1369 * the incoming ifnet.
1370 */
1371 l2info = be16toh(cpl->l2info);
1372 pi = sc->port[G_SYN_INTF(l2info)];
1373 hw_ifp = pi->vi[0].ifp;
1374 m->m_pkthdr.rcvif = hw_ifp;
1375
1376 CURVNET_SET(lctx->vnet); /* before any potential REJECT */
1377
1378 /*
1379 * If VXLAN/NVGRE parsing is enabled then SYNs in the inner traffic will
1380 * also hit the listener. We don't want to offload those.
1381 */
1382 if (encapsulated_syn(sc, cpl)) {
1383 REJECT_PASS_ACCEPT_REQ(true);
1384 }
1385
1386 /*
1387 * Use the MAC index to lookup the associated VI. If this SYN didn't
1388 * match a perfect MAC filter, punt.
1389 */
1390 if (!(l2info & F_SYN_XACT_MATCH)) {
1391 REJECT_PASS_ACCEPT_REQ(true);
1392 }
1393 for_each_vi(pi, v, vi) {
1394 if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
1395 goto found;
1396 }
1397 REJECT_PASS_ACCEPT_REQ(true);
1398 found:
1399 hw_ifp = vi->ifp; /* the cxgbe ifnet */
1400 m->m_pkthdr.rcvif = hw_ifp;
1401 tod = TOEDEV(hw_ifp);
1402
1403 /*
1404 * Don't offload if the peer requested a TCP option that's not known to
1405 * the silicon. Send the SYN to the kernel instead.
1406 */
1407 if (__predict_false(cpl->tcpopt.unknown))
1408 REJECT_PASS_ACCEPT_REQ(true);
1409
1410 /*
1411 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1412 * involved. Don't offload if the SYN had a VLAN tag and the vid
1413 * doesn't match anything on this interface.
1414 *
1415 * XXX: lagg support, lagg + vlan support.
1416 */
1417 vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1418 if (vid != 0xfff && vid != 0) {
1419 ifp = VLAN_DEVAT(hw_ifp, vid);
1420 if (ifp == NULL)
1421 REJECT_PASS_ACCEPT_REQ(true);
1422 } else
1423 ifp = hw_ifp;
1424
1425 /*
1426 * Don't offload if the ifnet that the SYN came in on is not in the same
1427 * vnet as the listening socket.
1428 */
1429 if (lctx->vnet != if_getvnet(ifp))
1430 REJECT_PASS_ACCEPT_REQ(true);
1431
1432 pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
1433 if (inc.inc_flags & INC_ISIPV6) {
1434
1435 /* Don't offload if the ifcap isn't enabled */
1436 if ((if_getcapenable(ifp) & IFCAP_TOE6) == 0)
1437 REJECT_PASS_ACCEPT_REQ(true);
1438
1439 /*
1440 * SYN must be directed to an IP6 address on this ifnet. This
1441 * is more restrictive than in6_localip.
1442 */
1443 NET_EPOCH_ENTER(et);
1444 if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) {
1445 NET_EPOCH_EXIT(et);
1446 REJECT_PASS_ACCEPT_REQ(true);
1447 }
1448
1449 ntids = 2;
1450 } else {
1451
1452 /* Don't offload if the ifcap isn't enabled */
1453 if ((if_getcapenable(ifp) & IFCAP_TOE4) == 0)
1454 REJECT_PASS_ACCEPT_REQ(true);
1455
1456 /*
1457 * SYN must be directed to an IP address on this ifnet. This
1458 * is more restrictive than in_localip.
1459 */
1460 NET_EPOCH_ENTER(et);
1461 if (!in_ifhasaddr(ifp, inc.inc_laddr)) {
1462 NET_EPOCH_EXIT(et);
1463 REJECT_PASS_ACCEPT_REQ(true);
1464 }
1465
1466 ntids = 1;
1467 }
1468
1469 e = get_l2te_for_nexthop(pi, ifp, &inc);
1470 if (e == NULL) {
1471 NET_EPOCH_EXIT(et);
1472 REJECT_PASS_ACCEPT_REQ(true);
1473 }
1474
1475 /* Don't offload if the 4-tuple is already in use */
1476 if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1477 NET_EPOCH_EXIT(et);
1478 REJECT_PASS_ACCEPT_REQ(false);
1479 }
1480
1481 inp = lctx->inp; /* listening socket, not owned by TOE */
1482 tp = intotcpcb(inp);
1483 INP_RLOCK(inp);
1484
1485 /* Don't offload if the listening socket has closed */
1486 if (__predict_false(tp->t_flags & TF_DISCONNECTED)) {
1487 INP_RUNLOCK(inp);
1488 NET_EPOCH_EXIT(et);
1489 REJECT_PASS_ACCEPT_REQ(false);
1490 }
1491 so = inp->inp_socket;
1492 rw_rlock(&sc->policy_lock);
1493 settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m,
1494 EVL_MAKETAG(0xfff, 0, 0), inp);
1495 rw_runlock(&sc->policy_lock);
1496 if (!settings.offload) {
1497 INP_RUNLOCK(inp);
1498 NET_EPOCH_EXIT(et);
1499 REJECT_PASS_ACCEPT_REQ(true); /* Rejected by COP. */
1500 }
1501
1502 synqe = alloc_synqe(sc, lctx, M_NOWAIT);
1503 if (synqe == NULL) {
1504 INP_RUNLOCK(inp);
1505 NET_EPOCH_EXIT(et);
1506 REJECT_PASS_ACCEPT_REQ(true);
1507 }
1508 MPASS(rss->hash_type == RSS_HASH_TCP);
1509 synqe->rss_hash = be32toh(rss->hash_val);
1510 atomic_store_int(&synqe->ok_to_respond, 0);
1511
1512 init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx,
1513 &synqe->params);
1514 if (sc->params.tid_qid_sel_mask != 0)
1515 update_tid_qid_sel(vi, &synqe->params, tid);
1516
1517 /*
1518 * If all goes well t4_syncache_respond will get called during
1519 * syncache_add. Note that syncache_add releases the pcb lock.
1520 */
1521 t4opt_to_tcpopt(&cpl->tcpopt, &to);
1522 toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
1523
1524 if (atomic_load_int(&synqe->ok_to_respond) > 0) {
1525 uint64_t opt0;
1526 uint32_t opt2;
1527
1528 opt0 = calc_options0(vi, &synqe->params);
1529 opt2 = calc_options2(vi, &synqe->params);
1530
1531 insert_tid(sc, tid, synqe, ntids);
1532 synqe->tid = tid;
1533 synqe->syn = m;
1534 m = NULL;
1535 mtx_lock(&td->toep_list_lock);
1536 TAILQ_INSERT_TAIL(&td->synqe_list, synqe, link);
1537 mtx_unlock(&td->toep_list_lock);
1538
1539 if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
1540 remove_tid(sc, tid, ntids);
1541 m = synqe->syn;
1542 synqe->syn = NULL;
1543 mtx_lock(&td->toep_list_lock);
1544 TAILQ_REMOVE(&td->synqe_list, synqe, link);
1545 mtx_unlock(&td->toep_list_lock);
1546 NET_EPOCH_EXIT(et);
1547 REJECT_PASS_ACCEPT_REQ(true);
1548 }
1549 CTR6(KTR_CXGBE,
1550 "%s: stid %u, tid %u, synqe %p, opt0 %#016lx, opt2 %#08x",
1551 __func__, stid, tid, synqe, be64toh(opt0), be32toh(opt2));
1552 } else {
1553 NET_EPOCH_EXIT(et);
1554 REJECT_PASS_ACCEPT_REQ(false);
1555 }
1556
1557 NET_EPOCH_EXIT(et);
1558 CURVNET_RESTORE();
1559 return (0);
1560 reject:
1561 CURVNET_RESTORE();
1562 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1563 reject_reason);
1564
1565 if (e)
1566 t4_l2t_release(e);
1567 release_tid(sc, tid, lctx->ctrlq);
1568 if (synqe) {
1569 inp = synqe->lctx->inp;
1570 INP_WLOCK(inp);
1571 inp = release_synqe(sc, synqe);
1572 if (inp)
1573 INP_WUNLOCK(inp);
1574 }
1575
1576 if (m) {
1577 /*
1578 * The connection request hit a TOE listener but is being passed
1579 * on to the kernel sw stack instead of getting offloaded.
1580 */
1581 m_adj(m, sizeof(*cpl));
1582 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1583 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1584 m->m_pkthdr.csum_data = 0xffff;
1585 if_input(hw_ifp, m);
1586 }
1587
1588 return (reject_reason);
1589 }
1590
1591 static void
synqe_to_protohdrs(struct adapter * sc,struct synq_entry * synqe,const struct cpl_pass_establish * cpl,struct in_conninfo * inc,struct tcphdr * th,struct tcpopt * to)1592 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
1593 const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1594 struct tcphdr *th, struct tcpopt *to)
1595 {
1596 uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1597 uint8_t iptos;
1598
1599 /* start off with the original SYN */
1600 pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
1601
1602 /* modify parts to make it look like the ACK to our SYN|ACK */
1603 tcp_set_flags(th, TH_ACK);
1604 th->th_ack = synqe->iss + 1;
1605 th->th_seq = be32toh(cpl->rcv_isn);
1606 bzero(to, sizeof(*to));
1607 if (G_TCPOPT_TSTAMP(tcp_opt)) {
1608 to->to_flags |= TOF_TS;
1609 to->to_tsecr = synqe->ts;
1610 }
1611 }
1612
1613 static int
do_pass_establish(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1614 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1615 struct mbuf *m)
1616 {
1617 struct adapter *sc = iq->adapter;
1618 struct vi_info *vi;
1619 if_t ifp;
1620 const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1621 #if defined(KTR) || defined(INVARIANTS)
1622 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1623 #endif
1624 unsigned int tid = GET_TID(cpl);
1625 struct synq_entry *synqe = lookup_tid(sc, tid);
1626 struct listen_ctx *lctx = synqe->lctx;
1627 struct inpcb *inp = lctx->inp, *new_inp;
1628 struct tcpcb *tp = intotcpcb(inp);
1629 struct socket *so;
1630 struct tcphdr th;
1631 struct tcpopt to;
1632 struct in_conninfo inc;
1633 struct toepcb *toep;
1634 struct epoch_tracker et;
1635 int rstreason;
1636 #ifdef INVARIANTS
1637 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1638 #endif
1639
1640 KASSERT(opcode == CPL_PASS_ESTABLISH,
1641 ("%s: unexpected opcode 0x%x", __func__, opcode));
1642 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1643 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1644 KASSERT(synqe->flags & TPF_SYNQE,
1645 ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1646
1647 CURVNET_SET(lctx->vnet);
1648 NET_EPOCH_ENTER(et); /* for syncache_expand */
1649 INP_WLOCK(inp);
1650
1651 CTR6(KTR_CXGBE,
1652 "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1653 __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1654
1655 ifp = synqe->syn->m_pkthdr.rcvif;
1656 vi = if_getsoftc(ifp);
1657 KASSERT(vi->adapter == sc,
1658 ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
1659
1660 if (__predict_false(tp->t_flags & TF_DISCONNECTED)) {
1661 reset:
1662 send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST);
1663 INP_WUNLOCK(inp);
1664 NET_EPOCH_EXIT(et);
1665 CURVNET_RESTORE();
1666 return (0);
1667 }
1668
1669 KASSERT(synqe->params.rxq_idx == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1670 ("%s: CPL arrived on unexpected rxq. %d %d", __func__,
1671 synqe->params.rxq_idx,
1672 (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1673
1674 toep = alloc_toepcb(vi, M_NOWAIT);
1675 if (toep == NULL)
1676 goto reset;
1677 toep->tid = tid;
1678 toep->l2te = &sc->l2t->l2tab[synqe->params.l2t_idx];
1679 toep->vnet = lctx->vnet;
1680 bcopy(&synqe->params, &toep->params, sizeof(toep->params));
1681 init_toepcb(vi, toep);
1682
1683 MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
1684 MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
1685 synqe->tcp_opt = cpl->tcp_opt;
1686 synqe->toep = toep;
1687
1688 /* Come up with something that syncache_expand should be ok with. */
1689 synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
1690 if (inc.inc_flags & INC_ISIPV6) {
1691 if (lctx->ce == NULL) {
1692 toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true);
1693 if (toep->ce == NULL) {
1694 free_toepcb(toep);
1695 goto reset; /* RST without a CLIP entry? */
1696 }
1697 } else {
1698 t4_hold_clip_entry(sc, lctx->ce);
1699 toep->ce = lctx->ce;
1700 }
1701 }
1702 so = inp->inp_socket;
1703 KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1704
1705 rstreason = toe_syncache_expand(&inc, &to, &th, &so);
1706 if (rstreason < 0) {
1707 free_toepcb(toep);
1708 send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_NO_RST);
1709 INP_WUNLOCK(inp);
1710 NET_EPOCH_EXIT(et);
1711 CURVNET_RESTORE();
1712 return (0);
1713 } else if (rstreason == 0 || so == NULL) {
1714 free_toepcb(toep);
1715 goto reset;
1716 }
1717
1718 /* New connection inpcb is already locked by syncache_expand(). */
1719 new_inp = sotoinpcb(so);
1720 INP_WLOCK_ASSERT(new_inp);
1721 MPASS(so->so_vnet == lctx->vnet);
1722
1723 /*
1724 * This is for expansion from syncookies.
1725 *
1726 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1727 * anyone accept'ing a connection before we've installed our hooks, but
1728 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1729 */
1730 if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1731 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1732 t4_offload_socket(TOEDEV(ifp), synqe, so);
1733 }
1734
1735 INP_WUNLOCK(new_inp);
1736
1737 /* Done with the synqe */
1738 inp = release_synqe(sc, synqe);
1739 if (inp != NULL)
1740 INP_WUNLOCK(inp);
1741 NET_EPOCH_EXIT(et);
1742 CURVNET_RESTORE();
1743
1744 return (0);
1745 }
1746
1747 void
t4_init_listen_cpl_handlers(void)1748 t4_init_listen_cpl_handlers(void)
1749 {
1750
1751 t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1752 t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1753 t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1754 t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
1755 }
1756
1757 void
t4_uninit_listen_cpl_handlers(void)1758 t4_uninit_listen_cpl_handlers(void)
1759 {
1760
1761 t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
1762 t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
1763 t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
1764 t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);
1765 }
1766 #endif
1767