1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 Chelsio Communications, Inc.
5 * All rights reserved.
6 * Written by: Navdeep Parhar <np@FreeBSD.org>
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #ifdef TCP_OFFLOAD
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/if_vlan_var.h>
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_fib.h>
59 #include <netinet6/scope6_var.h>
60 #include <netinet/tcp_timer.h>
61 #define TCPSTATES
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/toecore.h>
65 #include <netinet/cc/cc.h>
66
67 #include "common/common.h"
68 #include "common/t4_msg.h"
69 #include "common/t4_regs.h"
70 #include "t4_clip.h"
71 #include "tom/t4_tom_l2t.h"
72 #include "tom/t4_tom.h"
73
74 /* stid services */
75 static int alloc_stid(struct adapter *, bool, void *);
76 static struct listen_ctx *lookup_stid(struct adapter *, int);
77 static void free_stid(struct adapter *, int , bool);
78
79 /* lctx services */
80 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
81 struct vi_info *);
82 static int free_lctx(struct adapter *, struct listen_ctx *);
83 static void hold_lctx(struct listen_ctx *);
84 static void listen_hash_add(struct adapter *, struct listen_ctx *);
85 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
86 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
87 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
88
89 static void send_abort_rpl_synqe(struct toedev *, struct synq_entry *, int);
90
91 static int create_server6(struct adapter *, struct listen_ctx *);
92 static int create_server(struct adapter *, struct listen_ctx *);
93
94 int
alloc_stid_tab(struct adapter * sc)95 alloc_stid_tab(struct adapter *sc)
96 {
97 struct tid_info *t = &sc->tids;
98
99 MPASS(t->nstids > 0);
100 MPASS(t->stid_tab == NULL);
101
102 t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
103 M_ZERO | M_NOWAIT);
104 if (t->stid_tab == NULL)
105 return (ENOMEM);
106 t->stid_bitmap = bit_alloc(t->nstids, M_CXGBE, M_NOWAIT);
107 if (t->stid_bitmap == NULL) {
108 free(t->stid_tab, M_CXGBE);
109 t->stid_tab = NULL;
110 return (ENOMEM);
111 }
112 mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
113 t->stids_in_use = 0;
114
115 return (0);
116 }
117
118 void
free_stid_tab(struct adapter * sc)119 free_stid_tab(struct adapter *sc)
120 {
121 struct tid_info *t = &sc->tids;
122
123 KASSERT(t->stids_in_use == 0,
124 ("%s: %d tids still in use.", __func__, t->stids_in_use));
125
126 if (mtx_initialized(&t->stid_lock))
127 mtx_destroy(&t->stid_lock);
128 free(t->stid_tab, M_CXGBE);
129 t->stid_tab = NULL;
130 free(t->stid_bitmap, M_CXGBE);
131 t->stid_bitmap = NULL;
132 }
133
134 void
stop_stid_tab(struct adapter * sc)135 stop_stid_tab(struct adapter *sc)
136 {
137 struct tid_info *t = &sc->tids;
138 struct tom_data *td = sc->tom_softc;
139 struct listen_ctx *lctx;
140 struct synq_entry *synqe;
141 int i, ntids;
142
143 mtx_lock(&t->stid_lock);
144 t->stid_tab_stopped = true;
145 mtx_unlock(&t->stid_lock);
146
147 mtx_lock(&td->lctx_hash_lock);
148 for (i = 0; i <= td->listen_mask; i++) {
149 LIST_FOREACH(lctx, &td->listen_hash[i], link)
150 lctx->flags &= ~(LCTX_RPL_PENDING | LCTX_SETUP_IN_HW);
151 }
152 mtx_unlock(&td->lctx_hash_lock);
153
154 mtx_lock(&td->toep_list_lock);
155 TAILQ_FOREACH(synqe, &td->synqe_list, link) {
156 MPASS(sc->incarnation == synqe->incarnation);
157 MPASS(synqe->tid >= 0);
158 MPASS(synqe == lookup_tid(sc, synqe->tid));
159 /* Remove tid from the lookup table immediately. */
160 CTR(KTR_CXGBE, "%s: tid %d@%d STRANDED, removed from table",
161 __func__, synqe->tid, synqe->incarnation);
162 ntids = synqe->lctx->inp->inp_vflag & INP_IPV6 ? 2 : 1;
163 remove_tid(sc, synqe->tid, ntids);
164 #if 0
165 /* synqe->tid is stale now but left alone for debug. */
166 synqe->tid = -1;
167 #endif
168 }
169 MPASS(TAILQ_EMPTY(&td->stranded_synqe));
170 TAILQ_CONCAT(&td->stranded_synqe, &td->synqe_list, link);
171 MPASS(TAILQ_EMPTY(&td->synqe_list));
172 mtx_unlock(&td->toep_list_lock);
173 }
174
175 void
restart_stid_tab(struct adapter * sc)176 restart_stid_tab(struct adapter *sc)
177 {
178 struct tid_info *t = &sc->tids;
179 struct tom_data *td = sc->tom_softc;
180 struct listen_ctx *lctx;
181 int i;
182
183 mtx_lock(&td->lctx_hash_lock);
184 for (i = 0; i <= td->listen_mask; i++) {
185 LIST_FOREACH(lctx, &td->listen_hash[i], link) {
186 MPASS((lctx->flags & (LCTX_RPL_PENDING | LCTX_SETUP_IN_HW)) == 0);
187 lctx->flags |= LCTX_RPL_PENDING;
188 if (lctx->inp->inp_vflag & INP_IPV6)
189 create_server6(sc, lctx);
190 else
191 create_server(sc, lctx);
192 }
193 }
194 mtx_unlock(&td->lctx_hash_lock);
195
196 mtx_lock(&t->stid_lock);
197 t->stid_tab_stopped = false;
198 mtx_unlock(&t->stid_lock);
199
200 }
201
202 static int
alloc_stid(struct adapter * sc,bool isipv6,void * ctx)203 alloc_stid(struct adapter *sc, bool isipv6, void *ctx)
204 {
205 struct tid_info *t = &sc->tids;
206 const u_int n = isipv6 ? 2 : 1;
207 int stid, pair_stid;
208 u_int i;
209 ssize_t val;
210
211 mtx_lock(&t->stid_lock);
212 MPASS(t->stids_in_use <= t->nstids);
213 if (n > t->nstids - t->stids_in_use || t->stid_tab_stopped) {
214 mtx_unlock(&t->stid_lock);
215 return (-1);
216 }
217
218 stid = -1;
219 if (isipv6) {
220 /*
221 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4
222 * cells) in the TCAM. We know that the start of the stid
223 * region is properly aligned already (the chip requires each
224 * region to be 128-cell aligned).
225 */
226 for (i = 0; i + 1 < t->nstids; i = roundup2(val + 1, 2)) {
227 bit_ffc_area_at(t->stid_bitmap, i, t->nstids, 2, &val);
228 if (val == -1)
229 break;
230 if ((val & 1) == 0) {
231 stid = val;
232 break;
233 }
234 }
235 } else {
236 /*
237 * An IPv4 server needs one stid without any alignment
238 * requirements. But we try extra hard to find an available
239 * stid adjacent to a used stid so that free "stid-pairs" are
240 * left intact for IPv6.
241 */
242 bit_ffc_at(t->stid_bitmap, 0, t->nstids, &val);
243 while (val != -1) {
244 if (stid == -1) {
245 /*
246 * First usable stid. Look no further if it's
247 * an ideal fit.
248 */
249 stid = val;
250 if (val & 1 || bit_test(t->stid_bitmap, val + 1))
251 break;
252 } else {
253 /*
254 * We have an unused stid already but are now
255 * looking for in-use stids because we'd prefer
256 * to grab an unused stid adjacent to one that's
257 * in use.
258 *
259 * Odd stids pair with the previous stid and
260 * even ones pair with the next stid.
261 */
262 pair_stid = val & 1 ? val - 1 : val + 1;
263 if (bit_test(t->stid_bitmap, pair_stid) == 0) {
264 stid = pair_stid;
265 break;
266 }
267 }
268 val = roundup2(val + 1, 2);
269 if (val >= t->nstids)
270 break;
271 bit_ffs_at(t->stid_bitmap, val, t->nstids, &val);
272 }
273 }
274
275 if (stid >= 0) {
276 MPASS(stid + n - 1 < t->nstids);
277 MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 0));
278 bit_nset(t->stid_bitmap, stid, stid + n - 1);
279 t->stids_in_use += n;
280 t->stid_tab[stid] = ctx;
281 #ifdef INVARIANTS
282 if (n == 2) {
283 MPASS((stid & 1) == 0);
284 t->stid_tab[stid + 1] = NULL;
285 }
286 #endif
287 stid += t->stid_base;
288 }
289 mtx_unlock(&t->stid_lock);
290 return (stid);
291 }
292
293 static struct listen_ctx *
lookup_stid(struct adapter * sc,int stid)294 lookup_stid(struct adapter *sc, int stid)
295 {
296 struct tid_info *t = &sc->tids;
297
298 return (t->stid_tab[stid - t->stid_base]);
299 }
300
301 static void
free_stid(struct adapter * sc,int stid,bool isipv6)302 free_stid(struct adapter *sc, int stid, bool isipv6)
303 {
304 struct tid_info *t = &sc->tids;
305 const u_int n = isipv6 ? 2 : 1;
306
307 mtx_lock(&t->stid_lock);
308 MPASS(stid >= t->stid_base);
309 stid -= t->stid_base;
310 MPASS(stid + n - 1 < t->nstids);
311 MPASS(t->stids_in_use <= t->nstids);
312 MPASS(t->stids_in_use >= n);
313 MPASS(t->stid_tab[stid] != NULL);
314 #ifdef INVARIANTS
315 if (n == 2) {
316 MPASS((stid & 1) == 0);
317 MPASS(t->stid_tab[stid + 1] == NULL);
318 }
319 #endif
320 MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 1));
321 bit_nclear(t->stid_bitmap, stid, stid + n - 1);
322 t->stid_tab[stid] = NULL;
323 t->stids_in_use -= n;
324 mtx_unlock(&t->stid_lock);
325 }
326
327 static struct listen_ctx *
alloc_lctx(struct adapter * sc,struct inpcb * inp,struct vi_info * vi)328 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
329 {
330 struct listen_ctx *lctx;
331
332 INP_WLOCK_ASSERT(inp);
333
334 lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
335 if (lctx == NULL)
336 return (NULL);
337
338 lctx->isipv6 = inp->inp_vflag & INP_IPV6;
339 lctx->stid = alloc_stid(sc, lctx->isipv6, lctx);
340 if (lctx->stid < 0) {
341 free(lctx, M_CXGBE);
342 return (NULL);
343 }
344
345 if (lctx->isipv6 &&
346 !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
347 lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
348 if (lctx->ce == NULL) {
349 free(lctx, M_CXGBE);
350 return (NULL);
351 }
352 }
353
354 lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
355 lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
356 refcount_init(&lctx->refcount, 1);
357
358 lctx->inp = inp;
359 lctx->vnet = inp->inp_socket->so_vnet;
360 in_pcbref(inp);
361
362 return (lctx);
363 }
364
365 /* Don't call this directly, use release_lctx instead */
366 static int
free_lctx(struct adapter * sc,struct listen_ctx * lctx)367 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
368 {
369 struct inpcb *inp = lctx->inp;
370
371 INP_WLOCK_ASSERT(inp);
372 KASSERT(lctx->refcount == 0,
373 ("%s: refcount %d", __func__, lctx->refcount));
374 KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
375
376 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
377 __func__, lctx->stid, lctx, lctx->inp);
378
379 if (lctx->ce)
380 t4_release_clip_entry(sc, lctx->ce);
381 free_stid(sc, lctx->stid, lctx->isipv6);
382 free(lctx, M_CXGBE);
383
384 return (in_pcbrele_wlocked(inp));
385 }
386
387 static void
hold_lctx(struct listen_ctx * lctx)388 hold_lctx(struct listen_ctx *lctx)
389 {
390
391 refcount_acquire(&lctx->refcount);
392 }
393
394 static inline uint32_t
listen_hashfn(void * key,u_long mask)395 listen_hashfn(void *key, u_long mask)
396 {
397
398 return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
399 }
400
401 /*
402 * Add a listen_ctx entry to the listen hash table.
403 */
404 static void
listen_hash_add(struct adapter * sc,struct listen_ctx * lctx)405 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
406 {
407 struct tom_data *td = sc->tom_softc;
408 int bucket = listen_hashfn(lctx->inp, td->listen_mask);
409
410 mtx_lock(&td->lctx_hash_lock);
411 LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
412 td->lctx_count++;
413 mtx_unlock(&td->lctx_hash_lock);
414 }
415
416 /*
417 * Look for the listening socket's context entry in the hash and return it.
418 */
419 static struct listen_ctx *
listen_hash_find(struct adapter * sc,struct inpcb * inp)420 listen_hash_find(struct adapter *sc, struct inpcb *inp)
421 {
422 struct tom_data *td = sc->tom_softc;
423 int bucket = listen_hashfn(inp, td->listen_mask);
424 struct listen_ctx *lctx;
425
426 mtx_lock(&td->lctx_hash_lock);
427 LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
428 if (lctx->inp == inp)
429 break;
430 }
431 mtx_unlock(&td->lctx_hash_lock);
432
433 return (lctx);
434 }
435
436 /*
437 * Removes the listen_ctx structure for inp from the hash and returns it.
438 */
439 static struct listen_ctx *
listen_hash_del(struct adapter * sc,struct inpcb * inp)440 listen_hash_del(struct adapter *sc, struct inpcb *inp)
441 {
442 struct tom_data *td = sc->tom_softc;
443 int bucket = listen_hashfn(inp, td->listen_mask);
444 struct listen_ctx *lctx, *l;
445
446 mtx_lock(&td->lctx_hash_lock);
447 LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
448 if (lctx->inp == inp) {
449 LIST_REMOVE(lctx, link);
450 td->lctx_count--;
451 break;
452 }
453 }
454 mtx_unlock(&td->lctx_hash_lock);
455
456 return (lctx);
457 }
458
459 /*
460 * Releases a hold on the lctx. Must be called with the listening socket's inp
461 * locked. The inp may be freed by this function and it returns NULL to
462 * indicate this.
463 */
464 static struct inpcb *
release_lctx(struct adapter * sc,struct listen_ctx * lctx)465 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
466 {
467 struct inpcb *inp = lctx->inp;
468 int inp_freed = 0;
469
470 INP_WLOCK_ASSERT(inp);
471 if (refcount_release(&lctx->refcount))
472 inp_freed = free_lctx(sc, lctx);
473
474 return (inp_freed ? NULL : inp);
475 }
476
477 static void
send_flowc_wr_synqe(struct adapter * sc,struct synq_entry * synqe)478 send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe)
479 {
480 struct mbuf *m = synqe->syn;
481 if_t ifp = m->m_pkthdr.rcvif;
482 struct vi_info *vi = if_getsoftc(ifp);
483 struct port_info *pi = vi->pi;
484 struct wrqe *wr;
485 struct fw_flowc_wr *flowc;
486 struct sge_ofld_txq *ofld_txq;
487 struct sge_ofld_rxq *ofld_rxq;
488 const int nparams = 6;
489 const int flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
490 const u_int pfvf = sc->pf << S_FW_VIID_PFN;
491
492 INP_WLOCK_ASSERT(synqe->lctx->inp);
493 MPASS((synqe->flags & TPF_FLOWC_WR_SENT) == 0);
494
495 ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
496 ofld_rxq = &sc->sge.ofld_rxq[synqe->params.rxq_idx];
497
498 wr = alloc_wrqe(roundup2(flowclen, 16), &ofld_txq->wrq);
499 if (wr == NULL) {
500 /* XXX */
501 panic("%s: allocation failure.", __func__);
502 }
503 flowc = wrtod(wr);
504 memset(flowc, 0, wr->wr_len);
505 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
506 V_FW_FLOWC_WR_NPARAMS(nparams));
507 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
508 V_FW_WR_FLOWID(synqe->tid));
509 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
510 flowc->mnemval[0].val = htobe32(pfvf);
511 flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
512 flowc->mnemval[1].val = htobe32(pi->tx_chan);
513 flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
514 flowc->mnemval[2].val = htobe32(pi->tx_chan);
515 flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
516 flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
517 flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
518 flowc->mnemval[4].val = htobe32(512);
519 flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
520 flowc->mnemval[5].val = htobe32(512);
521
522 synqe->flags |= TPF_FLOWC_WR_SENT;
523 t4_wrq_tx(sc, wr);
524 }
525
526 static void
send_abort_rpl_synqe(struct toedev * tod,struct synq_entry * synqe,int rst_status)527 send_abort_rpl_synqe(struct toedev *tod, struct synq_entry *synqe,
528 int rst_status)
529 {
530 struct adapter *sc = tod->tod_softc;
531 struct wrqe *wr;
532 struct cpl_abort_req *req;
533
534 INP_WLOCK_ASSERT(synqe->lctx->inp);
535
536 CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
537 __func__, synqe, synqe->flags, synqe->tid,
538 synqe->flags & TPF_ABORT_SHUTDOWN ?
539 " (abort already in progress)" : "");
540 if (synqe->flags & TPF_ABORT_SHUTDOWN)
541 return; /* abort already in progress */
542 synqe->flags |= TPF_ABORT_SHUTDOWN;
543
544 if (!(synqe->flags & TPF_FLOWC_WR_SENT))
545 send_flowc_wr_synqe(sc, synqe);
546
547 wr = alloc_wrqe(sizeof(*req),
548 &sc->sge.ofld_txq[synqe->params.txq_idx].wrq);
549 if (wr == NULL) {
550 /* XXX */
551 panic("%s: allocation failure.", __func__);
552 }
553 req = wrtod(wr);
554 INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
555 req->rsvd0 = 0; /* don't have a snd_nxt */
556 req->rsvd1 = 1; /* no data sent yet */
557 req->cmd = rst_status;
558
559 t4_l2t_send(sc, wr, &sc->l2t->l2tab[synqe->params.l2t_idx]);
560 }
561
562 static int
create_server(struct adapter * sc,struct listen_ctx * lctx)563 create_server(struct adapter *sc, struct listen_ctx *lctx)
564 {
565 struct wrqe *wr;
566 struct cpl_pass_open_req *req;
567 struct inpcb *inp = lctx->inp;
568
569 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
570 if (wr == NULL) {
571 log(LOG_ERR, "%s: allocation failure", __func__);
572 return (ENOMEM);
573 }
574 req = wrtod(wr);
575
576 INIT_TP_WR(req, 0);
577 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
578 req->local_port = inp->inp_lport;
579 req->peer_port = 0;
580 req->local_ip = inp->inp_laddr.s_addr;
581 req->peer_ip = 0;
582 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
583 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
584 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
585
586 t4_wrq_tx(sc, wr);
587 return (0);
588 }
589
590 static int
create_server6(struct adapter * sc,struct listen_ctx * lctx)591 create_server6(struct adapter *sc, struct listen_ctx *lctx)
592 {
593 struct wrqe *wr;
594 struct cpl_pass_open_req6 *req;
595 struct inpcb *inp = lctx->inp;
596
597 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
598 if (wr == NULL) {
599 log(LOG_ERR, "%s: allocation failure", __func__);
600 return (ENOMEM);
601 }
602 req = wrtod(wr);
603
604 INIT_TP_WR(req, 0);
605 OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
606 req->local_port = inp->inp_lport;
607 req->peer_port = 0;
608 req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
609 req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
610 req->peer_ip_hi = 0;
611 req->peer_ip_lo = 0;
612 req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
613 req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
614 F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
615
616 t4_wrq_tx(sc, wr);
617 return (0);
618 }
619
620 static int
destroy_server(struct adapter * sc,struct listen_ctx * lctx)621 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
622 {
623 struct wrqe *wr;
624 struct cpl_close_listsvr_req *req;
625
626 wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
627 if (wr == NULL) {
628 /* XXX */
629 panic("%s: allocation failure.", __func__);
630 }
631 req = wrtod(wr);
632
633 INIT_TP_WR(req, 0);
634 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
635 lctx->stid));
636 req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
637 req->rsvd = htobe16(0);
638
639 t4_wrq_tx(sc, wr);
640 return (0);
641 }
642
643 /*
644 * Start a listening server by sending a passive open request to HW.
645 *
646 * Can't take adapter lock here and access to sc->flags,
647 * sc->offload_map, if_capenable are all race prone.
648 */
649 int
t4_listen_start(struct toedev * tod,struct tcpcb * tp)650 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
651 {
652 struct adapter *sc = tod->tod_softc;
653 struct vi_info *vi;
654 struct port_info *pi;
655 struct inpcb *inp = tptoinpcb(tp);
656 struct listen_ctx *lctx;
657 int i, rc, v;
658 struct offload_settings settings;
659
660 INP_WLOCK_ASSERT(inp);
661
662 rw_rlock(&sc->policy_lock);
663 settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL,
664 EVL_MAKETAG(0xfff, 0, 0), inp);
665 rw_runlock(&sc->policy_lock);
666 if (!settings.offload)
667 return (0);
668
669 /* Don't start a hardware listener for any loopback address. */
670 if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
671 return (0);
672 if (!(inp->inp_vflag & INP_IPV6) &&
673 IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
674 return (0);
675 if (sc->flags & KERN_TLS_ON)
676 return (0);
677 #if 0
678 ADAPTER_LOCK(sc);
679 if (IS_BUSY(sc)) {
680 log(LOG_ERR, "%s: listen request ignored, %s is busy",
681 __func__, device_get_nameunit(sc->dev));
682 goto done;
683 }
684
685 KASSERT(uld_active(sc, ULD_TOM),
686 ("%s: TOM not initialized", __func__));
687 #endif
688
689 /*
690 * Find an initialized VI with IFCAP_TOE (4 or 6). We'll use the first
691 * such VI's queues to send the passive open and receive the reply to
692 * it.
693 *
694 * XXX: need a way to mark a port in use by offload. if_cxgbe should
695 * then reject any attempt to bring down such a port (and maybe reject
696 * attempts to disable IFCAP_TOE on that port too?).
697 */
698 for_each_port(sc, i) {
699 pi = sc->port[i];
700 for_each_vi(pi, v, vi) {
701 if (vi->flags & VI_INIT_DONE &&
702 if_getcapenable(vi->ifp) & IFCAP_TOE)
703 goto found;
704 }
705 }
706 goto done; /* no port that's UP with IFCAP_TOE enabled */
707 found:
708
709 if (listen_hash_find(sc, inp) != NULL)
710 goto done; /* already setup */
711
712 lctx = alloc_lctx(sc, inp, vi);
713 if (lctx == NULL) {
714 log(LOG_ERR,
715 "%s: listen request ignored, %s couldn't allocate lctx\n",
716 __func__, device_get_nameunit(sc->dev));
717 goto done;
718 }
719 listen_hash_add(sc, lctx);
720
721 CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
722 __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
723 inp->inp_vflag);
724
725 if (inp->inp_vflag & INP_IPV6)
726 rc = create_server6(sc, lctx);
727 else
728 rc = create_server(sc, lctx);
729 if (rc != 0) {
730 log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
731 __func__, device_get_nameunit(sc->dev), rc);
732 (void) listen_hash_del(sc, inp);
733 inp = release_lctx(sc, lctx);
734 /* can't be freed, host stack has a reference */
735 KASSERT(inp != NULL, ("%s: inp freed", __func__));
736 goto done;
737 }
738 lctx->flags |= LCTX_RPL_PENDING;
739 done:
740 #if 0
741 ADAPTER_UNLOCK(sc);
742 #endif
743 return (0);
744 }
745
746 int
t4_listen_stop(struct toedev * tod,struct tcpcb * tp)747 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
748 {
749 struct listen_ctx *lctx;
750 struct adapter *sc = tod->tod_softc;
751 struct inpcb *inp = tptoinpcb(tp);
752
753 INP_WLOCK_ASSERT(inp);
754
755 lctx = listen_hash_del(sc, inp);
756 if (lctx == NULL)
757 return (ENOENT); /* no hardware listener for this inp */
758
759 CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
760 lctx, lctx->flags);
761
762 /*
763 * If the reply to the PASS_OPEN is still pending we'll wait for it to
764 * arrive and clean up when it does.
765 */
766 if (lctx->flags & LCTX_RPL_PENDING) {
767 return (EINPROGRESS);
768 }
769
770 if (lctx->flags & LCTX_SETUP_IN_HW)
771 destroy_server(sc, lctx);
772 else
773 inp = release_lctx(sc, lctx);
774 return (0);
775 }
776
777 static inline struct synq_entry *
alloc_synqe(struct adapter * sc,struct listen_ctx * lctx,int flags)778 alloc_synqe(struct adapter *sc, struct listen_ctx *lctx, int flags)
779 {
780 struct synq_entry *synqe;
781
782 INP_RLOCK_ASSERT(lctx->inp);
783 MPASS(flags == M_WAITOK || flags == M_NOWAIT);
784
785 synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
786 if (__predict_true(synqe != NULL)) {
787 synqe->flags = TPF_SYNQE;
788 synqe->incarnation = sc->incarnation;
789 refcount_init(&synqe->refcnt, 1);
790 synqe->lctx = lctx;
791 hold_lctx(lctx); /* Every synqe has a ref on its lctx. */
792 synqe->syn = NULL;
793 }
794
795 return (synqe);
796 }
797
798 static inline void
hold_synqe(struct synq_entry * synqe)799 hold_synqe(struct synq_entry *synqe)
800 {
801
802 refcount_acquire(&synqe->refcnt);
803 }
804
805 static inline struct inpcb *
release_synqe(struct adapter * sc,struct synq_entry * synqe)806 release_synqe(struct adapter *sc, struct synq_entry *synqe)
807 {
808 struct inpcb *inp;
809
810 MPASS(synqe->flags & TPF_SYNQE);
811 MPASS(synqe->lctx != NULL);
812
813 inp = synqe->lctx->inp;
814 MPASS(inp != NULL);
815 INP_WLOCK_ASSERT(inp);
816
817 if (refcount_release(&synqe->refcnt)) {
818 inp = release_lctx(sc, synqe->lctx);
819 m_freem(synqe->syn);
820 free(synqe, M_CXGBE);
821 }
822
823 return (inp);
824 }
825
826 void
t4_syncache_added(struct toedev * tod __unused,void * arg)827 t4_syncache_added(struct toedev *tod __unused, void *arg)
828 {
829 struct synq_entry *synqe = arg;
830
831 hold_synqe(synqe);
832 }
833
834 void
t4_syncache_removed(struct toedev * tod,void * arg)835 t4_syncache_removed(struct toedev *tod, void *arg)
836 {
837 struct adapter *sc = tod->tod_softc;
838 struct synq_entry *synqe = arg;
839 struct inpcb *inp = synqe->lctx->inp;
840
841 /*
842 * XXX: this is a LOR but harmless when running from the softclock.
843 */
844 INP_WLOCK(inp);
845 inp = release_synqe(sc, synqe);
846 if (inp != NULL)
847 INP_WUNLOCK(inp);
848 }
849
850 int
t4_syncache_respond(struct toedev * tod,void * arg,struct mbuf * m)851 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
852 {
853 struct synq_entry *synqe = arg;
854
855 if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
856 struct tcpopt to;
857 struct ip *ip = mtod(m, struct ip *);
858 struct tcphdr *th;
859
860 if (ip->ip_v == IPVERSION)
861 th = (void *)(ip + 1);
862 else
863 th = (void *)((struct ip6_hdr *)ip + 1);
864 bzero(&to, sizeof(to));
865 tcp_dooptions(&to, (void *)(th + 1),
866 (th->th_off << 2) - sizeof(*th), TO_SYN);
867
868 /* save these for later */
869 synqe->iss = be32toh(th->th_seq);
870 synqe->irs = be32toh(th->th_ack) - 1;
871 synqe->ts = to.to_tsval;
872 }
873
874 m_freem(m); /* don't need this any more */
875 return (0);
876 }
877
878 static int
do_pass_open_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)879 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
880 struct mbuf *m)
881 {
882 struct adapter *sc = iq->adapter;
883 const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
884 int stid = GET_TID(cpl);
885 unsigned int status = cpl->status;
886 struct listen_ctx *lctx = lookup_stid(sc, stid);
887 struct inpcb *inp = lctx->inp;
888 #ifdef INVARIANTS
889 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
890 #endif
891
892 KASSERT(opcode == CPL_PASS_OPEN_RPL,
893 ("%s: unexpected opcode 0x%x", __func__, opcode));
894 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
895 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
896
897 INP_WLOCK(inp);
898
899 CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
900 __func__, stid, status, lctx->flags);
901
902 lctx->flags &= ~LCTX_RPL_PENDING;
903 if (status == CPL_ERR_NONE)
904 lctx->flags |= LCTX_SETUP_IN_HW;
905 else
906 log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
907
908 #ifdef INVARIANTS
909 /*
910 * If the inp has been dropped (listening socket closed) then
911 * listen_stop must have run and taken the inp out of the hash.
912 */
913 if (inp->inp_flags & INP_DROPPED) {
914 KASSERT(listen_hash_del(sc, inp) == NULL,
915 ("%s: inp %p still in listen hash", __func__, inp));
916 }
917 #endif
918
919 if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
920 if (release_lctx(sc, lctx) != NULL)
921 INP_WUNLOCK(inp);
922 return (status);
923 }
924
925 /*
926 * Listening socket stopped listening earlier and now the chip tells us
927 * it has started the hardware listener. Stop it; the lctx will be
928 * released in do_close_server_rpl.
929 */
930 if (inp->inp_flags & INP_DROPPED) {
931 destroy_server(sc, lctx);
932 INP_WUNLOCK(inp);
933 return (status);
934 }
935
936 /*
937 * Failed to start hardware listener. Take inp out of the hash and
938 * release our reference on it. An error message has been logged
939 * already.
940 */
941 if (status != CPL_ERR_NONE) {
942 listen_hash_del(sc, inp);
943 if (release_lctx(sc, lctx) != NULL)
944 INP_WUNLOCK(inp);
945 return (status);
946 }
947
948 /* hardware listener open for business */
949
950 INP_WUNLOCK(inp);
951 return (status);
952 }
953
954 static int
do_close_server_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)955 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
956 struct mbuf *m)
957 {
958 struct adapter *sc = iq->adapter;
959 const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
960 int stid = GET_TID(cpl);
961 unsigned int status = cpl->status;
962 struct listen_ctx *lctx = lookup_stid(sc, stid);
963 struct inpcb *inp = lctx->inp;
964 #ifdef INVARIANTS
965 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
966 #endif
967
968 KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
969 ("%s: unexpected opcode 0x%x", __func__, opcode));
970 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
971 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
972
973 CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
974
975 if (status != CPL_ERR_NONE) {
976 log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
977 __func__, status, stid);
978 return (status);
979 }
980
981 INP_WLOCK(inp);
982 inp = release_lctx(sc, lctx);
983 if (inp != NULL)
984 INP_WUNLOCK(inp);
985
986 return (status);
987 }
988
989 static void
done_with_synqe(struct adapter * sc,struct synq_entry * synqe)990 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
991 {
992 struct tom_data *td = sc->tom_softc;
993 struct listen_ctx *lctx = synqe->lctx;
994 struct inpcb *inp = lctx->inp;
995 struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
996 int ntids;
997
998 INP_WLOCK_ASSERT(inp);
999
1000 if (synqe->tid != -1) {
1001 ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
1002 remove_tid(sc, synqe->tid, ntids);
1003 mtx_lock(&td->toep_list_lock);
1004 TAILQ_REMOVE(&td->synqe_list, synqe, link);
1005 mtx_unlock(&td->toep_list_lock);
1006 release_tid(sc, synqe->tid, lctx->ctrlq);
1007 }
1008 t4_l2t_release(e);
1009 inp = release_synqe(sc, synqe);
1010 if (inp)
1011 INP_WUNLOCK(inp);
1012 }
1013
1014 void
synack_failure_cleanup(struct adapter * sc,struct synq_entry * synqe)1015 synack_failure_cleanup(struct adapter *sc, struct synq_entry *synqe)
1016 {
1017 INP_WLOCK(synqe->lctx->inp);
1018 done_with_synqe(sc, synqe);
1019 }
1020
1021 int
do_abort_req_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1022 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
1023 struct mbuf *m)
1024 {
1025 struct adapter *sc = iq->adapter;
1026 const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1027 unsigned int tid = GET_TID(cpl);
1028 struct synq_entry *synqe = lookup_tid(sc, tid);
1029 struct listen_ctx *lctx = synqe->lctx;
1030 struct inpcb *inp = lctx->inp;
1031 struct sge_ofld_txq *ofld_txq;
1032 #ifdef INVARIANTS
1033 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1034 #endif
1035
1036 KASSERT(opcode == CPL_ABORT_REQ_RSS,
1037 ("%s: unexpected opcode 0x%x", __func__, opcode));
1038 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1039 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1040
1041 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1042 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1043
1044 if (negative_advice(cpl->status))
1045 return (0); /* Ignore negative advice */
1046
1047 INP_WLOCK(inp);
1048
1049 ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
1050
1051 if (!(synqe->flags & TPF_FLOWC_WR_SENT))
1052 send_flowc_wr_synqe(sc, synqe);
1053
1054 /*
1055 * If we'd initiated an abort earlier the reply to it is responsible for
1056 * cleaning up resources. Otherwise we tear everything down right here
1057 * right now. We owe the T4 a CPL_ABORT_RPL no matter what.
1058 */
1059 if (synqe->flags & TPF_ABORT_SHUTDOWN) {
1060 INP_WUNLOCK(inp);
1061 goto done;
1062 }
1063
1064 done_with_synqe(sc, synqe);
1065 /* inp lock released by done_with_synqe */
1066 done:
1067 send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1068 return (0);
1069 }
1070
1071 int
do_abort_rpl_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1072 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
1073 struct mbuf *m)
1074 {
1075 struct adapter *sc = iq->adapter;
1076 const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1077 unsigned int tid = GET_TID(cpl);
1078 struct synq_entry *synqe = lookup_tid(sc, tid);
1079 struct listen_ctx *lctx = synqe->lctx;
1080 struct inpcb *inp = lctx->inp;
1081 #ifdef INVARIANTS
1082 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1083 #endif
1084
1085 KASSERT(opcode == CPL_ABORT_RPL_RSS,
1086 ("%s: unexpected opcode 0x%x", __func__, opcode));
1087 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1088 KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1089
1090 CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1091 __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1092
1093 INP_WLOCK(inp);
1094 KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1095 ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
1096 __func__, synqe, synqe->flags));
1097
1098 done_with_synqe(sc, synqe);
1099 /* inp lock released by done_with_synqe */
1100
1101 return (0);
1102 }
1103
1104 void
t4_offload_socket(struct toedev * tod,void * arg,struct socket * so)1105 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
1106 {
1107 struct adapter *sc = tod->tod_softc;
1108 struct tom_data *td = sc->tom_softc;
1109 struct synq_entry *synqe = arg;
1110 struct inpcb *inp = sotoinpcb(so);
1111 struct toepcb *toep = synqe->toep;
1112
1113 NET_EPOCH_ASSERT(); /* prevents bad race with accept() */
1114 INP_WLOCK_ASSERT(inp);
1115 KASSERT(synqe->flags & TPF_SYNQE,
1116 ("%s: %p not a synq_entry?", __func__, arg));
1117 MPASS(toep->tid == synqe->tid);
1118
1119 offload_socket(so, toep);
1120 make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
1121 toep->flags |= TPF_CPL_PENDING;
1122 update_tid(sc, synqe->tid, toep);
1123 synqe->flags |= TPF_SYNQE_EXPANDED;
1124 mtx_lock(&td->toep_list_lock);
1125 /* Remove synqe from its list and add the TOE PCB to the active list. */
1126 TAILQ_REMOVE(&td->synqe_list, synqe, link);
1127 TAILQ_INSERT_TAIL(&td->toep_list, toep, link);
1128 toep->flags |= TPF_IN_TOEP_LIST;
1129 mtx_unlock(&td->toep_list_lock);
1130 inp->inp_flowtype = (inp->inp_vflag & INP_IPV6) ?
1131 M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4;
1132 inp->inp_flowid = synqe->rss_hash;
1133 }
1134
1135 static void
t4opt_to_tcpopt(const struct tcp_options * t4opt,struct tcpopt * to)1136 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
1137 {
1138 bzero(to, sizeof(*to));
1139
1140 if (t4opt->mss) {
1141 to->to_flags |= TOF_MSS;
1142 to->to_mss = be16toh(t4opt->mss);
1143 }
1144
1145 if (t4opt->wsf > 0 && t4opt->wsf < 15) {
1146 to->to_flags |= TOF_SCALE;
1147 to->to_wscale = t4opt->wsf;
1148 }
1149
1150 if (t4opt->tstamp)
1151 to->to_flags |= TOF_TS;
1152
1153 if (t4opt->sack)
1154 to->to_flags |= TOF_SACKPERM;
1155 }
1156
1157 static bool
encapsulated_syn(struct adapter * sc,const struct cpl_pass_accept_req * cpl)1158 encapsulated_syn(struct adapter *sc, const struct cpl_pass_accept_req *cpl)
1159 {
1160 u_int hlen = be32toh(cpl->hdr_len);
1161
1162 if (chip_id(sc) >= CHELSIO_T6)
1163 return (G_T6_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1164 else
1165 return (G_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1166 }
1167
1168 static void
pass_accept_req_to_protohdrs(struct adapter * sc,const struct mbuf * m,struct in_conninfo * inc,struct tcphdr * th,uint8_t * iptos)1169 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
1170 struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
1171 {
1172 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1173 const struct ether_header *eh;
1174 unsigned int hlen = be32toh(cpl->hdr_len);
1175 uintptr_t l3hdr;
1176 const struct tcphdr *tcp;
1177
1178 eh = (const void *)(cpl + 1);
1179 if (chip_id(sc) >= CHELSIO_T6) {
1180 l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
1181 tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
1182 } else {
1183 l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1184 tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1185 }
1186
1187 /* extract TOS (DiffServ + ECN) byte for AccECN */
1188 if (iptos) {
1189 if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1190 const struct ip *ip = (const void *)l3hdr;
1191 *iptos = ip->ip_tos;
1192 }
1193 #ifdef INET6
1194 else
1195 if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
1196 const struct ip6_hdr *ip6 = (const void *)l3hdr;
1197 *iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1198 }
1199 #endif /* INET */
1200 }
1201
1202 if (inc) {
1203 bzero(inc, sizeof(*inc));
1204 inc->inc_fport = tcp->th_sport;
1205 inc->inc_lport = tcp->th_dport;
1206 if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1207 const struct ip *ip = (const void *)l3hdr;
1208
1209 inc->inc_faddr = ip->ip_src;
1210 inc->inc_laddr = ip->ip_dst;
1211 } else {
1212 const struct ip6_hdr *ip6 = (const void *)l3hdr;
1213
1214 inc->inc_flags |= INC_ISIPV6;
1215 inc->inc6_faddr = ip6->ip6_src;
1216 inc->inc6_laddr = ip6->ip6_dst;
1217 }
1218 }
1219
1220 if (th) {
1221 bcopy(tcp, th, sizeof(*th));
1222 tcp_fields_to_host(th); /* just like tcp_input */
1223 }
1224 }
1225
1226 static struct l2t_entry *
get_l2te_for_nexthop(struct port_info * pi,if_t ifp,struct in_conninfo * inc)1227 get_l2te_for_nexthop(struct port_info *pi, if_t ifp,
1228 struct in_conninfo *inc)
1229 {
1230 struct l2t_entry *e;
1231 struct sockaddr_in6 sin6;
1232 struct sockaddr *dst = (void *)&sin6;
1233 struct nhop_object *nh;
1234
1235 if (inc->inc_flags & INC_ISIPV6) {
1236 bzero(dst, sizeof(struct sockaddr_in6));
1237 dst->sa_len = sizeof(struct sockaddr_in6);
1238 dst->sa_family = AF_INET6;
1239
1240 if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1241 /* no need for route lookup */
1242 e = t4_l2t_get(pi, ifp, dst);
1243 return (e);
1244 }
1245
1246 nh = fib6_lookup(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, NHR_NONE, 0);
1247 if (nh == NULL)
1248 return (NULL);
1249 if (nh->nh_ifp != ifp)
1250 return (NULL);
1251 if (nh->nh_flags & NHF_GATEWAY)
1252 ((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr;
1253 else
1254 ((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1255 } else {
1256 dst->sa_len = sizeof(struct sockaddr_in);
1257 dst->sa_family = AF_INET;
1258
1259 nh = fib4_lookup(RT_DEFAULT_FIB, inc->inc_faddr, 0, NHR_NONE, 0);
1260 if (nh == NULL)
1261 return (NULL);
1262 if (nh->nh_ifp != ifp)
1263 return (NULL);
1264 if (nh->nh_flags & NHF_GATEWAY)
1265 if (nh->gw_sa.sa_family == AF_INET)
1266 ((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
1267 else
1268 *((struct sockaddr_in6 *)dst) = nh->gw6_sa;
1269 else
1270 ((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1271 }
1272
1273 e = t4_l2t_get(pi, ifp, dst);
1274 return (e);
1275 }
1276
1277 static int
send_synack(struct adapter * sc,struct synq_entry * synqe,uint64_t opt0,uint32_t opt2,int tid)1278 send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
1279 uint32_t opt2, int tid)
1280 {
1281 struct wrqe *wr;
1282 struct cpl_pass_accept_rpl *rpl;
1283 struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
1284
1285 wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1286 sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
1287 if (wr == NULL)
1288 return (ENOMEM);
1289 rpl = wrtod(wr);
1290
1291 if (is_t4(sc))
1292 INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1293 else {
1294 struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1295
1296 INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1297 rpl5->iss = htobe32(synqe->iss);
1298 }
1299 rpl->opt0 = opt0;
1300 rpl->opt2 = opt2;
1301
1302 return (t4_l2t_send(sc, wr, e));
1303 }
1304
1305 #define REJECT_PASS_ACCEPT_REQ(tunnel) do { \
1306 if (!tunnel) { \
1307 m_freem(m); \
1308 m = NULL; \
1309 } \
1310 reject_reason = __LINE__; \
1311 goto reject; \
1312 } while (0)
1313
1314 /*
1315 * The context associated with a tid entry via insert_tid could be a synq_entry
1316 * or a toepcb. The only way CPL handlers can tell is via a bit in these flags.
1317 */
1318 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1319
1320 /*
1321 * Incoming SYN on a listening socket.
1322 *
1323 * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1324 * etc.
1325 */
1326 static int
do_pass_accept_req(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1327 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1328 struct mbuf *m)
1329 {
1330 struct adapter *sc = iq->adapter;
1331 struct tom_data *td = sc->tom_softc;
1332 struct toedev *tod;
1333 const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1334 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1335 unsigned int tid = GET_TID(cpl);
1336 struct listen_ctx *lctx = lookup_stid(sc, stid);
1337 struct inpcb *inp;
1338 struct socket *so;
1339 struct in_conninfo inc;
1340 struct tcphdr th;
1341 struct tcpopt to;
1342 struct port_info *pi;
1343 struct vi_info *vi;
1344 if_t hw_ifp, ifp;
1345 struct l2t_entry *e = NULL;
1346 struct synq_entry *synqe = NULL;
1347 int reject_reason, v, ntids;
1348 uint16_t vid, l2info;
1349 struct epoch_tracker et;
1350 #ifdef INVARIANTS
1351 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1352 #endif
1353 struct offload_settings settings;
1354 uint8_t iptos;
1355
1356 KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1357 ("%s: unexpected opcode 0x%x", __func__, opcode));
1358 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1359
1360 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1361 lctx);
1362
1363 /*
1364 * Figure out the port the SYN arrived on. We'll look for an exact VI
1365 * match in a bit but in case we don't find any we'll use the main VI as
1366 * the incoming ifnet.
1367 */
1368 l2info = be16toh(cpl->l2info);
1369 pi = sc->port[G_SYN_INTF(l2info)];
1370 hw_ifp = pi->vi[0].ifp;
1371 m->m_pkthdr.rcvif = hw_ifp;
1372
1373 CURVNET_SET(lctx->vnet); /* before any potential REJECT */
1374
1375 /*
1376 * If VXLAN/NVGRE parsing is enabled then SYNs in the inner traffic will
1377 * also hit the listener. We don't want to offload those.
1378 */
1379 if (encapsulated_syn(sc, cpl)) {
1380 REJECT_PASS_ACCEPT_REQ(true);
1381 }
1382
1383 /*
1384 * Use the MAC index to lookup the associated VI. If this SYN didn't
1385 * match a perfect MAC filter, punt.
1386 */
1387 if (!(l2info & F_SYN_XACT_MATCH)) {
1388 REJECT_PASS_ACCEPT_REQ(true);
1389 }
1390 for_each_vi(pi, v, vi) {
1391 if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
1392 goto found;
1393 }
1394 REJECT_PASS_ACCEPT_REQ(true);
1395 found:
1396 hw_ifp = vi->ifp; /* the cxgbe ifnet */
1397 m->m_pkthdr.rcvif = hw_ifp;
1398 tod = TOEDEV(hw_ifp);
1399
1400 /*
1401 * Don't offload if the peer requested a TCP option that's not known to
1402 * the silicon. Send the SYN to the kernel instead.
1403 */
1404 if (__predict_false(cpl->tcpopt.unknown))
1405 REJECT_PASS_ACCEPT_REQ(true);
1406
1407 /*
1408 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1409 * involved. Don't offload if the SYN had a VLAN tag and the vid
1410 * doesn't match anything on this interface.
1411 *
1412 * XXX: lagg support, lagg + vlan support.
1413 */
1414 vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1415 if (vid != 0xfff && vid != 0) {
1416 ifp = VLAN_DEVAT(hw_ifp, vid);
1417 if (ifp == NULL)
1418 REJECT_PASS_ACCEPT_REQ(true);
1419 } else
1420 ifp = hw_ifp;
1421
1422 /*
1423 * Don't offload if the ifnet that the SYN came in on is not in the same
1424 * vnet as the listening socket.
1425 */
1426 if (lctx->vnet != if_getvnet(ifp))
1427 REJECT_PASS_ACCEPT_REQ(true);
1428
1429 pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
1430 if (inc.inc_flags & INC_ISIPV6) {
1431
1432 /* Don't offload if the ifcap isn't enabled */
1433 if ((if_getcapenable(ifp) & IFCAP_TOE6) == 0)
1434 REJECT_PASS_ACCEPT_REQ(true);
1435
1436 /*
1437 * SYN must be directed to an IP6 address on this ifnet. This
1438 * is more restrictive than in6_localip.
1439 */
1440 NET_EPOCH_ENTER(et);
1441 if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) {
1442 NET_EPOCH_EXIT(et);
1443 REJECT_PASS_ACCEPT_REQ(true);
1444 }
1445
1446 ntids = 2;
1447 } else {
1448
1449 /* Don't offload if the ifcap isn't enabled */
1450 if ((if_getcapenable(ifp) & IFCAP_TOE4) == 0)
1451 REJECT_PASS_ACCEPT_REQ(true);
1452
1453 /*
1454 * SYN must be directed to an IP address on this ifnet. This
1455 * is more restrictive than in_localip.
1456 */
1457 NET_EPOCH_ENTER(et);
1458 if (!in_ifhasaddr(ifp, inc.inc_laddr)) {
1459 NET_EPOCH_EXIT(et);
1460 REJECT_PASS_ACCEPT_REQ(true);
1461 }
1462
1463 ntids = 1;
1464 }
1465
1466 e = get_l2te_for_nexthop(pi, ifp, &inc);
1467 if (e == NULL) {
1468 NET_EPOCH_EXIT(et);
1469 REJECT_PASS_ACCEPT_REQ(true);
1470 }
1471
1472 /* Don't offload if the 4-tuple is already in use */
1473 if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1474 NET_EPOCH_EXIT(et);
1475 REJECT_PASS_ACCEPT_REQ(false);
1476 }
1477
1478 inp = lctx->inp; /* listening socket, not owned by TOE */
1479 INP_RLOCK(inp);
1480
1481 /* Don't offload if the listening socket has closed */
1482 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1483 INP_RUNLOCK(inp);
1484 NET_EPOCH_EXIT(et);
1485 REJECT_PASS_ACCEPT_REQ(false);
1486 }
1487 so = inp->inp_socket;
1488 rw_rlock(&sc->policy_lock);
1489 settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m,
1490 EVL_MAKETAG(0xfff, 0, 0), inp);
1491 rw_runlock(&sc->policy_lock);
1492 if (!settings.offload) {
1493 INP_RUNLOCK(inp);
1494 NET_EPOCH_EXIT(et);
1495 REJECT_PASS_ACCEPT_REQ(true); /* Rejected by COP. */
1496 }
1497
1498 synqe = alloc_synqe(sc, lctx, M_NOWAIT);
1499 if (synqe == NULL) {
1500 INP_RUNLOCK(inp);
1501 NET_EPOCH_EXIT(et);
1502 REJECT_PASS_ACCEPT_REQ(true);
1503 }
1504 MPASS(rss->hash_type == RSS_HASH_TCP);
1505 synqe->rss_hash = be32toh(rss->hash_val);
1506 atomic_store_int(&synqe->ok_to_respond, 0);
1507
1508 init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx,
1509 &synqe->params);
1510
1511 /*
1512 * If all goes well t4_syncache_respond will get called during
1513 * syncache_add. Note that syncache_add releases the pcb lock.
1514 */
1515 t4opt_to_tcpopt(&cpl->tcpopt, &to);
1516 toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
1517
1518 if (atomic_load_int(&synqe->ok_to_respond) > 0) {
1519 uint64_t opt0;
1520 uint32_t opt2;
1521
1522 opt0 = calc_options0(vi, &synqe->params);
1523 opt2 = calc_options2(vi, &synqe->params);
1524
1525 insert_tid(sc, tid, synqe, ntids);
1526 synqe->tid = tid;
1527 synqe->syn = m;
1528 m = NULL;
1529 mtx_lock(&td->toep_list_lock);
1530 TAILQ_INSERT_TAIL(&td->synqe_list, synqe, link);
1531 mtx_unlock(&td->toep_list_lock);
1532
1533 if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
1534 remove_tid(sc, tid, ntids);
1535 m = synqe->syn;
1536 synqe->syn = NULL;
1537 mtx_lock(&td->toep_list_lock);
1538 TAILQ_REMOVE(&td->synqe_list, synqe, link);
1539 mtx_unlock(&td->toep_list_lock);
1540 NET_EPOCH_EXIT(et);
1541 REJECT_PASS_ACCEPT_REQ(true);
1542 }
1543 CTR6(KTR_CXGBE,
1544 "%s: stid %u, tid %u, synqe %p, opt0 %#016lx, opt2 %#08x",
1545 __func__, stid, tid, synqe, be64toh(opt0), be32toh(opt2));
1546 } else {
1547 NET_EPOCH_EXIT(et);
1548 REJECT_PASS_ACCEPT_REQ(false);
1549 }
1550
1551 NET_EPOCH_EXIT(et);
1552 CURVNET_RESTORE();
1553 return (0);
1554 reject:
1555 CURVNET_RESTORE();
1556 CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1557 reject_reason);
1558
1559 if (e)
1560 t4_l2t_release(e);
1561 release_tid(sc, tid, lctx->ctrlq);
1562 if (synqe) {
1563 inp = synqe->lctx->inp;
1564 INP_WLOCK(inp);
1565 inp = release_synqe(sc, synqe);
1566 if (inp)
1567 INP_WUNLOCK(inp);
1568 }
1569
1570 if (m) {
1571 /*
1572 * The connection request hit a TOE listener but is being passed
1573 * on to the kernel sw stack instead of getting offloaded.
1574 */
1575 m_adj(m, sizeof(*cpl));
1576 m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1577 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1578 m->m_pkthdr.csum_data = 0xffff;
1579 if_input(hw_ifp, m);
1580 }
1581
1582 return (reject_reason);
1583 }
1584
1585 static void
synqe_to_protohdrs(struct adapter * sc,struct synq_entry * synqe,const struct cpl_pass_establish * cpl,struct in_conninfo * inc,struct tcphdr * th,struct tcpopt * to)1586 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
1587 const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1588 struct tcphdr *th, struct tcpopt *to)
1589 {
1590 uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1591 uint8_t iptos;
1592
1593 /* start off with the original SYN */
1594 pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
1595
1596 /* modify parts to make it look like the ACK to our SYN|ACK */
1597 tcp_set_flags(th, TH_ACK);
1598 th->th_ack = synqe->iss + 1;
1599 th->th_seq = be32toh(cpl->rcv_isn);
1600 bzero(to, sizeof(*to));
1601 if (G_TCPOPT_TSTAMP(tcp_opt)) {
1602 to->to_flags |= TOF_TS;
1603 to->to_tsecr = synqe->ts;
1604 }
1605 }
1606
1607 static int
do_pass_establish(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1608 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1609 struct mbuf *m)
1610 {
1611 struct adapter *sc = iq->adapter;
1612 struct vi_info *vi;
1613 if_t ifp;
1614 const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1615 #if defined(KTR) || defined(INVARIANTS)
1616 unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1617 #endif
1618 unsigned int tid = GET_TID(cpl);
1619 struct synq_entry *synqe = lookup_tid(sc, tid);
1620 struct listen_ctx *lctx = synqe->lctx;
1621 struct inpcb *inp = lctx->inp, *new_inp;
1622 struct socket *so;
1623 struct tcphdr th;
1624 struct tcpopt to;
1625 struct in_conninfo inc;
1626 struct toepcb *toep;
1627 struct epoch_tracker et;
1628 int rstreason;
1629 #ifdef INVARIANTS
1630 unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1631 #endif
1632
1633 KASSERT(opcode == CPL_PASS_ESTABLISH,
1634 ("%s: unexpected opcode 0x%x", __func__, opcode));
1635 KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1636 KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1637 KASSERT(synqe->flags & TPF_SYNQE,
1638 ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1639
1640 CURVNET_SET(lctx->vnet);
1641 NET_EPOCH_ENTER(et); /* for syncache_expand */
1642 INP_WLOCK(inp);
1643
1644 CTR6(KTR_CXGBE,
1645 "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1646 __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1647
1648 ifp = synqe->syn->m_pkthdr.rcvif;
1649 vi = if_getsoftc(ifp);
1650 KASSERT(vi->adapter == sc,
1651 ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
1652
1653 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1654 reset:
1655 send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST);
1656 INP_WUNLOCK(inp);
1657 NET_EPOCH_EXIT(et);
1658 CURVNET_RESTORE();
1659 return (0);
1660 }
1661
1662 KASSERT(synqe->params.rxq_idx == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1663 ("%s: CPL arrived on unexpected rxq. %d %d", __func__,
1664 synqe->params.rxq_idx,
1665 (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1666
1667 toep = alloc_toepcb(vi, M_NOWAIT);
1668 if (toep == NULL)
1669 goto reset;
1670 toep->tid = tid;
1671 toep->l2te = &sc->l2t->l2tab[synqe->params.l2t_idx];
1672 toep->vnet = lctx->vnet;
1673 bcopy(&synqe->params, &toep->params, sizeof(toep->params));
1674 init_toepcb(vi, toep);
1675
1676 MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
1677 MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
1678 synqe->tcp_opt = cpl->tcp_opt;
1679 synqe->toep = toep;
1680
1681 /* Come up with something that syncache_expand should be ok with. */
1682 synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
1683 if (inc.inc_flags & INC_ISIPV6) {
1684 if (lctx->ce == NULL) {
1685 toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true);
1686 if (toep->ce == NULL) {
1687 free_toepcb(toep);
1688 goto reset; /* RST without a CLIP entry? */
1689 }
1690 } else {
1691 t4_hold_clip_entry(sc, lctx->ce);
1692 toep->ce = lctx->ce;
1693 }
1694 }
1695 so = inp->inp_socket;
1696 KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1697
1698 rstreason = toe_syncache_expand(&inc, &to, &th, &so);
1699 if (rstreason < 0) {
1700 free_toepcb(toep);
1701 send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_NO_RST);
1702 INP_WUNLOCK(inp);
1703 NET_EPOCH_EXIT(et);
1704 CURVNET_RESTORE();
1705 return (0);
1706 } else if (rstreason == 0 || so == NULL) {
1707 free_toepcb(toep);
1708 goto reset;
1709 }
1710
1711 /* New connection inpcb is already locked by syncache_expand(). */
1712 new_inp = sotoinpcb(so);
1713 INP_WLOCK_ASSERT(new_inp);
1714 MPASS(so->so_vnet == lctx->vnet);
1715
1716 /*
1717 * This is for expansion from syncookies.
1718 *
1719 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1720 * anyone accept'ing a connection before we've installed our hooks, but
1721 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1722 */
1723 if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1724 tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1725 t4_offload_socket(TOEDEV(ifp), synqe, so);
1726 }
1727
1728 INP_WUNLOCK(new_inp);
1729
1730 /* Done with the synqe */
1731 inp = release_synqe(sc, synqe);
1732 if (inp != NULL)
1733 INP_WUNLOCK(inp);
1734 NET_EPOCH_EXIT(et);
1735 CURVNET_RESTORE();
1736
1737 return (0);
1738 }
1739
1740 void
t4_init_listen_cpl_handlers(void)1741 t4_init_listen_cpl_handlers(void)
1742 {
1743
1744 t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1745 t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1746 t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1747 t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
1748 }
1749
1750 void
t4_uninit_listen_cpl_handlers(void)1751 t4_uninit_listen_cpl_handlers(void)
1752 {
1753
1754 t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
1755 t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
1756 t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
1757 t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);
1758 }
1759 #endif
1760