xref: /freebsd/sys/dev/cxgbe/tom/t4_listen.c (revision 3d76a4feeead2bbda7792a3c4ca534fd4c159721)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2012 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #ifdef TCP_OFFLOAD
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/if_vlan_var.h>
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_fib.h>
59 #include <netinet6/scope6_var.h>
60 #include <netinet/tcp_timer.h>
61 #define TCPSTATES
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/toecore.h>
65 #include <netinet/cc/cc.h>
66 
67 #include "common/common.h"
68 #include "common/t4_msg.h"
69 #include "common/t4_regs.h"
70 #include "t4_clip.h"
71 #include "tom/t4_tom_l2t.h"
72 #include "tom/t4_tom.h"
73 
74 /* stid services */
75 static int alloc_stid(struct adapter *, bool, void *);
76 static struct listen_ctx *lookup_stid(struct adapter *, int);
77 static void free_stid(struct adapter *, int , bool);
78 
79 /* lctx services */
80 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
81     struct vi_info *);
82 static int free_lctx(struct adapter *, struct listen_ctx *);
83 static void hold_lctx(struct listen_ctx *);
84 static void listen_hash_add(struct adapter *, struct listen_ctx *);
85 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
86 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
87 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
88 
89 static void send_abort_rpl_synqe(struct toedev *, struct synq_entry *, int);
90 
91 static int create_server6(struct adapter *, struct listen_ctx *);
92 static int create_server(struct adapter *, struct listen_ctx *);
93 
94 int
95 alloc_stid_tab(struct adapter *sc)
96 {
97 	struct tid_info *t = &sc->tids;
98 
99 	MPASS(t->nstids > 0);
100 	MPASS(t->stid_tab == NULL);
101 
102 	t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
103 	    M_ZERO | M_NOWAIT);
104 	if (t->stid_tab == NULL)
105 		return (ENOMEM);
106 	t->stid_bitmap = bit_alloc(t->nstids, M_CXGBE, M_NOWAIT);
107 	if (t->stid_bitmap == NULL) {
108 		free(t->stid_tab, M_CXGBE);
109 		t->stid_tab = NULL;
110 		return (ENOMEM);
111 	}
112 	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
113 	t->stids_in_use = 0;
114 
115 	return (0);
116 }
117 
118 void
119 free_stid_tab(struct adapter *sc)
120 {
121 	struct tid_info *t = &sc->tids;
122 
123 	KASSERT(t->stids_in_use == 0,
124 	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
125 
126 	if (mtx_initialized(&t->stid_lock))
127 		mtx_destroy(&t->stid_lock);
128 	free(t->stid_tab, M_CXGBE);
129 	t->stid_tab = NULL;
130 	free(t->stid_bitmap, M_CXGBE);
131 	t->stid_bitmap = NULL;
132 }
133 
134 void
135 stop_stid_tab(struct adapter *sc)
136 {
137 	struct tid_info *t = &sc->tids;
138 	struct tom_data *td = sc->tom_softc;
139 	struct listen_ctx *lctx;
140 	struct synq_entry *synqe;
141 	int i, ntids;
142 
143 	mtx_lock(&t->stid_lock);
144 	t->stid_tab_stopped = true;
145 	mtx_unlock(&t->stid_lock);
146 
147 	mtx_lock(&td->lctx_hash_lock);
148 	for (i = 0; i <= td->listen_mask; i++) {
149 		LIST_FOREACH(lctx, &td->listen_hash[i], link)
150 			lctx->flags &= ~(LCTX_RPL_PENDING | LCTX_SETUP_IN_HW);
151 	}
152 	mtx_unlock(&td->lctx_hash_lock);
153 
154 	mtx_lock(&td->toep_list_lock);
155 	TAILQ_FOREACH(synqe, &td->synqe_list, link) {
156 		MPASS(sc->incarnation == synqe->incarnation);
157 		MPASS(synqe->tid >= 0);
158 		MPASS(synqe == lookup_tid(sc, synqe->tid));
159 		/* Remove tid from the lookup table immediately. */
160 		CTR(KTR_CXGBE, "%s: tid %d@%d STRANDED, removed from table",
161 		    __func__, synqe->tid, synqe->incarnation);
162 		ntids = synqe->lctx->inp->inp_vflag & INP_IPV6 ? 2 : 1;
163 		remove_tid(sc, synqe->tid, ntids);
164 #if 0
165 		/* synqe->tid is stale now but left alone for debug. */
166 		synqe->tid = -1;
167 #endif
168 	}
169 	MPASS(TAILQ_EMPTY(&td->stranded_synqe));
170 	TAILQ_CONCAT(&td->stranded_synqe, &td->synqe_list, link);
171 	MPASS(TAILQ_EMPTY(&td->synqe_list));
172 	mtx_unlock(&td->toep_list_lock);
173 }
174 
175 void
176 restart_stid_tab(struct adapter *sc)
177 {
178 	struct tid_info *t = &sc->tids;
179 	struct tom_data *td = sc->tom_softc;
180 	struct listen_ctx *lctx;
181 	int i;
182 
183 	mtx_lock(&td->lctx_hash_lock);
184 	for (i = 0; i <= td->listen_mask; i++) {
185 		LIST_FOREACH(lctx, &td->listen_hash[i], link) {
186 			MPASS((lctx->flags & (LCTX_RPL_PENDING | LCTX_SETUP_IN_HW)) == 0);
187 			lctx->flags |= LCTX_RPL_PENDING;
188 			if (lctx->inp->inp_vflag & INP_IPV6)
189 				create_server6(sc, lctx);
190 			else
191 				create_server(sc, lctx);
192 		}
193 	}
194 	mtx_unlock(&td->lctx_hash_lock);
195 
196 	mtx_lock(&t->stid_lock);
197 	t->stid_tab_stopped = false;
198 	mtx_unlock(&t->stid_lock);
199 
200 }
201 
202 static int
203 alloc_stid(struct adapter *sc, bool isipv6, void *ctx)
204 {
205 	struct tid_info *t = &sc->tids;
206 	const u_int n = isipv6 ? 2 : 1;
207 	int stid, pair_stid;
208 	u_int i;
209 	ssize_t val;
210 
211 	mtx_lock(&t->stid_lock);
212 	MPASS(t->stids_in_use <= t->nstids);
213 	if (n > t->nstids - t->stids_in_use || t->stid_tab_stopped) {
214 		mtx_unlock(&t->stid_lock);
215 		return (-1);
216 	}
217 
218 	stid = -1;
219 	if (isipv6) {
220 		/*
221 		 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4
222 		 * cells) in the TCAM.  We know that the start of the stid
223 		 * region is properly aligned already (the chip requires each
224 		 * region to be 128-cell aligned).
225 		 */
226 		for (i = 0; i + 1 < t->nstids; i = roundup2(val + 1, 2)) {
227 			bit_ffc_area_at(t->stid_bitmap, i, t->nstids, 2, &val);
228 			if (val == -1)
229 				break;
230 			if ((val & 1) == 0) {
231 				stid = val;
232 				break;
233 			}
234 		}
235 	} else {
236 		/*
237 		 * An IPv4 server needs one stid without any alignment
238 		 * requirements.  But we try extra hard to find an available
239 		 * stid adjacent to a used stid so that free "stid-pairs" are
240 		 * left intact for IPv6.
241 		 */
242 		bit_ffc_at(t->stid_bitmap, 0, t->nstids, &val);
243 		while (val != -1) {
244 			if (stid == -1) {
245 				/*
246 				 * First usable stid.  Look no further if it's
247 				 * an ideal fit.
248 				 */
249 				stid = val;
250 				if (val & 1 || bit_test(t->stid_bitmap, val + 1))
251 					break;
252 			} else {
253 				/*
254 				 * We have an unused stid already but are now
255 				 * looking for in-use stids because we'd prefer
256 				 * to grab an unused stid adjacent to one that's
257 				 * in use.
258 				 *
259 				 * Odd stids pair with the previous stid and
260 				 * even ones pair with the next stid.
261 				 */
262 				pair_stid = val & 1 ? val - 1 : val + 1;
263 				if (bit_test(t->stid_bitmap, pair_stid) == 0) {
264 					stid = pair_stid;
265 					break;
266 				}
267 			}
268 			val = roundup2(val + 1, 2);
269 			if (val >= t->nstids)
270 				break;
271 			bit_ffs_at(t->stid_bitmap, val, t->nstids, &val);
272 		}
273 	}
274 
275 	if (stid >= 0) {
276 		MPASS(stid + n - 1 < t->nstids);
277 		MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 0));
278 		bit_nset(t->stid_bitmap, stid, stid + n - 1);
279 		t->stids_in_use += n;
280 		t->stid_tab[stid] = ctx;
281 #ifdef INVARIANTS
282 		if (n == 2) {
283 			MPASS((stid & 1) == 0);
284 			t->stid_tab[stid + 1] = NULL;
285 		}
286 #endif
287 		stid += t->stid_base;
288 	}
289 	mtx_unlock(&t->stid_lock);
290 	return (stid);
291 }
292 
293 static struct listen_ctx *
294 lookup_stid(struct adapter *sc, int stid)
295 {
296 	struct tid_info *t = &sc->tids;
297 
298 	return (t->stid_tab[stid - t->stid_base]);
299 }
300 
301 static void
302 free_stid(struct adapter *sc, int stid, bool isipv6)
303 {
304 	struct tid_info *t = &sc->tids;
305 	const u_int n = isipv6 ? 2 : 1;
306 
307 	mtx_lock(&t->stid_lock);
308 	MPASS(stid >= t->stid_base);
309 	stid -= t->stid_base;
310 	MPASS(stid + n - 1 < t->nstids);
311 	MPASS(t->stids_in_use <= t->nstids);
312 	MPASS(t->stids_in_use >= n);
313 	MPASS(t->stid_tab[stid] != NULL);
314 #ifdef INVARIANTS
315 	if (n == 2) {
316 		MPASS((stid & 1) == 0);
317 		MPASS(t->stid_tab[stid + 1] == NULL);
318 	}
319 #endif
320 	MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 1));
321 	bit_nclear(t->stid_bitmap, stid, stid + n - 1);
322 	t->stid_tab[stid] = NULL;
323 	t->stids_in_use -= n;
324 	mtx_unlock(&t->stid_lock);
325 }
326 
327 static struct listen_ctx *
328 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
329 {
330 	struct listen_ctx *lctx;
331 
332 	INP_WLOCK_ASSERT(inp);
333 
334 	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
335 	if (lctx == NULL)
336 		return (NULL);
337 
338 	lctx->isipv6 = inp->inp_vflag & INP_IPV6;
339 	lctx->stid = alloc_stid(sc, lctx->isipv6, lctx);
340 	if (lctx->stid < 0) {
341 		free(lctx, M_CXGBE);
342 		return (NULL);
343 	}
344 
345 	if (lctx->isipv6 &&
346 	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
347 		lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
348 		if (lctx->ce == NULL) {
349 			free(lctx, M_CXGBE);
350 			return (NULL);
351 		}
352 	}
353 
354 	lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
355 	lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
356 	refcount_init(&lctx->refcount, 1);
357 
358 	lctx->inp = inp;
359 	lctx->vnet = inp->inp_socket->so_vnet;
360 	in_pcbref(inp);
361 
362 	return (lctx);
363 }
364 
365 /* Don't call this directly, use release_lctx instead */
366 static int
367 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
368 {
369 	struct inpcb *inp = lctx->inp;
370 
371 	INP_WLOCK_ASSERT(inp);
372 	KASSERT(lctx->refcount == 0,
373 	    ("%s: refcount %d", __func__, lctx->refcount));
374 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
375 
376 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
377 	    __func__, lctx->stid, lctx, lctx->inp);
378 
379 	if (lctx->ce)
380 		t4_release_clip_entry(sc, lctx->ce);
381 	free_stid(sc, lctx->stid, lctx->isipv6);
382 	free(lctx, M_CXGBE);
383 
384 	return (in_pcbrele_wlocked(inp));
385 }
386 
387 static void
388 hold_lctx(struct listen_ctx *lctx)
389 {
390 
391 	refcount_acquire(&lctx->refcount);
392 }
393 
394 static inline uint32_t
395 listen_hashfn(void *key, u_long mask)
396 {
397 
398 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
399 }
400 
401 /*
402  * Add a listen_ctx entry to the listen hash table.
403  */
404 static void
405 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
406 {
407 	struct tom_data *td = sc->tom_softc;
408 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
409 
410 	mtx_lock(&td->lctx_hash_lock);
411 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
412 	td->lctx_count++;
413 	mtx_unlock(&td->lctx_hash_lock);
414 }
415 
416 /*
417  * Look for the listening socket's context entry in the hash and return it.
418  */
419 static struct listen_ctx *
420 listen_hash_find(struct adapter *sc, struct inpcb *inp)
421 {
422 	struct tom_data *td = sc->tom_softc;
423 	int bucket = listen_hashfn(inp, td->listen_mask);
424 	struct listen_ctx *lctx;
425 
426 	mtx_lock(&td->lctx_hash_lock);
427 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
428 		if (lctx->inp == inp)
429 			break;
430 	}
431 	mtx_unlock(&td->lctx_hash_lock);
432 
433 	return (lctx);
434 }
435 
436 /*
437  * Removes the listen_ctx structure for inp from the hash and returns it.
438  */
439 static struct listen_ctx *
440 listen_hash_del(struct adapter *sc, struct inpcb *inp)
441 {
442 	struct tom_data *td = sc->tom_softc;
443 	int bucket = listen_hashfn(inp, td->listen_mask);
444 	struct listen_ctx *lctx, *l;
445 
446 	mtx_lock(&td->lctx_hash_lock);
447 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
448 		if (lctx->inp == inp) {
449 			LIST_REMOVE(lctx, link);
450 			td->lctx_count--;
451 			break;
452 		}
453 	}
454 	mtx_unlock(&td->lctx_hash_lock);
455 
456 	return (lctx);
457 }
458 
459 /*
460  * Releases a hold on the lctx.  Must be called with the listening socket's inp
461  * locked.  The inp may be freed by this function and it returns NULL to
462  * indicate this.
463  */
464 static struct inpcb *
465 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
466 {
467 	struct inpcb *inp = lctx->inp;
468 	int inp_freed = 0;
469 
470 	INP_WLOCK_ASSERT(inp);
471 	if (refcount_release(&lctx->refcount))
472 		inp_freed = free_lctx(sc, lctx);
473 
474 	return (inp_freed ? NULL : inp);
475 }
476 
477 static void
478 send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe)
479 {
480 	struct mbuf *m = synqe->syn;
481 	if_t ifp = m->m_pkthdr.rcvif;
482 	struct vi_info *vi = if_getsoftc(ifp);
483 	struct port_info *pi = vi->pi;
484 	struct wrqe *wr;
485 	struct fw_flowc_wr *flowc;
486 	struct sge_ofld_txq *ofld_txq;
487 	struct sge_ofld_rxq *ofld_rxq;
488 	const int nparams = 6;
489 	const int flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
490 	const u_int pfvf = sc->pf << S_FW_VIID_PFN;
491 
492 	INP_WLOCK_ASSERT(synqe->lctx->inp);
493 	MPASS((synqe->flags & TPF_FLOWC_WR_SENT) == 0);
494 
495 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
496 	ofld_rxq = &sc->sge.ofld_rxq[synqe->params.rxq_idx];
497 
498 	wr = alloc_wrqe(roundup2(flowclen, 16), &ofld_txq->wrq);
499 	if (wr == NULL) {
500 		/* XXX */
501 		panic("%s: allocation failure.", __func__);
502 	}
503 	flowc = wrtod(wr);
504 	memset(flowc, 0, wr->wr_len);
505 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
506 	    V_FW_FLOWC_WR_NPARAMS(nparams));
507 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
508 	    V_FW_WR_FLOWID(synqe->tid));
509 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
510 	flowc->mnemval[0].val = htobe32(pfvf);
511 	/* Firmware expects hw port and will translate to channel itself. */
512 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
513 	flowc->mnemval[1].val = htobe32(pi->hw_port);
514 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
515 	flowc->mnemval[2].val = htobe32(pi->hw_port);
516 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
517 	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
518 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
519 	flowc->mnemval[4].val = htobe32(512);
520 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
521 	flowc->mnemval[5].val = htobe32(512);
522 
523 	synqe->flags |= TPF_FLOWC_WR_SENT;
524 	t4_wrq_tx(sc, wr);
525 }
526 
527 static void
528 send_abort_rpl_synqe(struct toedev *tod, struct synq_entry *synqe,
529     int rst_status)
530 {
531 	struct adapter *sc = tod->tod_softc;
532 	struct wrqe *wr;
533 	struct cpl_abort_req *req;
534 
535 	INP_WLOCK_ASSERT(synqe->lctx->inp);
536 
537 	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
538 	    __func__, synqe, synqe->flags, synqe->tid,
539 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
540 	    " (abort already in progress)" : "");
541 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
542 		return;	/* abort already in progress */
543 	synqe->flags |= TPF_ABORT_SHUTDOWN;
544 
545 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
546 		send_flowc_wr_synqe(sc, synqe);
547 
548 	wr = alloc_wrqe(sizeof(*req),
549 	    &sc->sge.ofld_txq[synqe->params.txq_idx].wrq);
550 	if (wr == NULL) {
551 		/* XXX */
552 		panic("%s: allocation failure.", __func__);
553 	}
554 	req = wrtod(wr);
555 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
556 	req->rsvd0 = 0;	/* don't have a snd_nxt */
557 	req->rsvd1 = 1;	/* no data sent yet */
558 	req->cmd = rst_status;
559 
560 	t4_l2t_send(sc, wr, &sc->l2t->l2tab[synqe->params.l2t_idx]);
561 }
562 
563 static int
564 create_server(struct adapter *sc, struct listen_ctx *lctx)
565 {
566 	struct wrqe *wr;
567 	struct cpl_pass_open_req *req;
568 	struct inpcb *inp = lctx->inp;
569 
570 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
571 	if (wr == NULL) {
572 		log(LOG_ERR, "%s: allocation failure", __func__);
573 		return (ENOMEM);
574 	}
575 	req = wrtod(wr);
576 
577 	INIT_TP_WR(req, 0);
578 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
579 	req->local_port = inp->inp_lport;
580 	req->peer_port = 0;
581 	req->local_ip = inp->inp_laddr.s_addr;
582 	req->peer_ip = 0;
583 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
584 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
585 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
586 
587 	t4_wrq_tx(sc, wr);
588 	return (0);
589 }
590 
591 static int
592 create_server6(struct adapter *sc, struct listen_ctx *lctx)
593 {
594 	struct wrqe *wr;
595 	struct cpl_pass_open_req6 *req;
596 	struct inpcb *inp = lctx->inp;
597 
598 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
599 	if (wr == NULL) {
600 		log(LOG_ERR, "%s: allocation failure", __func__);
601 		return (ENOMEM);
602 	}
603 	req = wrtod(wr);
604 
605 	INIT_TP_WR(req, 0);
606 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
607 	req->local_port = inp->inp_lport;
608 	req->peer_port = 0;
609 	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
610 	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
611 	req->peer_ip_hi = 0;
612 	req->peer_ip_lo = 0;
613 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
614 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
615 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
616 
617 	t4_wrq_tx(sc, wr);
618 	return (0);
619 }
620 
621 static int
622 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
623 {
624 	struct wrqe *wr;
625 	struct cpl_close_listsvr_req *req;
626 
627 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
628 	if (wr == NULL) {
629 		/* XXX */
630 		panic("%s: allocation failure.", __func__);
631 	}
632 	req = wrtod(wr);
633 
634 	INIT_TP_WR(req, 0);
635 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
636 	    lctx->stid));
637 	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
638 	req->rsvd = htobe16(0);
639 
640 	t4_wrq_tx(sc, wr);
641 	return (0);
642 }
643 
644 /*
645  * Start a listening server by sending a passive open request to HW.
646  *
647  * Can't take adapter lock here and access to sc->flags,
648  * sc->offload_map, if_capenable are all race prone.
649  */
650 int
651 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
652 {
653 	struct adapter *sc = tod->tod_softc;
654 	struct vi_info *vi;
655 	struct port_info *pi;
656 	struct inpcb *inp = tptoinpcb(tp);
657 	struct listen_ctx *lctx;
658 	int i, rc, v;
659 	struct offload_settings settings;
660 
661 	INP_WLOCK_ASSERT(inp);
662 
663 	rw_rlock(&sc->policy_lock);
664 	settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL,
665 	    EVL_MAKETAG(0xfff, 0, 0), inp);
666 	rw_runlock(&sc->policy_lock);
667 	if (!settings.offload)
668 		return (0);
669 
670 	/* Don't start a hardware listener for any loopback address. */
671 	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
672 		return (0);
673 	if (!(inp->inp_vflag & INP_IPV6) &&
674 	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
675 		return (0);
676 	if (sc->flags & KERN_TLS_ON)
677 		return (0);
678 #if 0
679 	ADAPTER_LOCK(sc);
680 	if (IS_BUSY(sc)) {
681 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
682 		    __func__, device_get_nameunit(sc->dev));
683 		goto done;
684 	}
685 
686 	KASSERT(uld_active(sc, ULD_TOM),
687 	    ("%s: TOM not initialized", __func__));
688 #endif
689 
690 	/*
691 	 * Find an initialized VI with IFCAP_TOE (4 or 6).  We'll use the first
692 	 * such VI's queues to send the passive open and receive the reply to
693 	 * it.
694 	 *
695 	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
696 	 * then reject any attempt to bring down such a port (and maybe reject
697 	 * attempts to disable IFCAP_TOE on that port too?).
698 	 */
699 	for_each_port(sc, i) {
700 		pi = sc->port[i];
701 		for_each_vi(pi, v, vi) {
702 			if (vi->flags & VI_INIT_DONE &&
703 			    if_getcapenable(vi->ifp) & IFCAP_TOE)
704 				goto found;
705 		}
706 	}
707 	goto done;	/* no port that's UP with IFCAP_TOE enabled */
708 found:
709 
710 	if (listen_hash_find(sc, inp) != NULL)
711 		goto done;	/* already setup */
712 
713 	lctx = alloc_lctx(sc, inp, vi);
714 	if (lctx == NULL) {
715 		log(LOG_ERR,
716 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
717 		    __func__, device_get_nameunit(sc->dev));
718 		goto done;
719 	}
720 	listen_hash_add(sc, lctx);
721 
722 	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
723 	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
724 	    inp->inp_vflag);
725 
726 	if (inp->inp_vflag & INP_IPV6)
727 		rc = create_server6(sc, lctx);
728 	else
729 		rc = create_server(sc, lctx);
730 	if (rc != 0) {
731 		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
732 		    __func__, device_get_nameunit(sc->dev), rc);
733 		(void) listen_hash_del(sc, inp);
734 		inp = release_lctx(sc, lctx);
735 		/* can't be freed, host stack has a reference */
736 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
737 		goto done;
738 	}
739 	lctx->flags |= LCTX_RPL_PENDING;
740 done:
741 #if 0
742 	ADAPTER_UNLOCK(sc);
743 #endif
744 	return (0);
745 }
746 
747 int
748 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
749 {
750 	struct listen_ctx *lctx;
751 	struct adapter *sc = tod->tod_softc;
752 	struct inpcb *inp = tptoinpcb(tp);
753 
754 	INP_WLOCK_ASSERT(inp);
755 
756 	lctx = listen_hash_del(sc, inp);
757 	if (lctx == NULL)
758 		return (ENOENT);	/* no hardware listener for this inp */
759 
760 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
761 	    lctx, lctx->flags);
762 
763 	/*
764 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
765 	 * arrive and clean up when it does.
766 	 */
767 	if (lctx->flags & LCTX_RPL_PENDING) {
768 		return (EINPROGRESS);
769 	}
770 
771 	if (lctx->flags & LCTX_SETUP_IN_HW)
772 		destroy_server(sc, lctx);
773 	else
774 		inp = release_lctx(sc, lctx);
775 	return (0);
776 }
777 
778 static inline struct synq_entry *
779 alloc_synqe(struct adapter *sc, struct listen_ctx *lctx, int flags)
780 {
781 	struct synq_entry *synqe;
782 
783 	INP_RLOCK_ASSERT(lctx->inp);
784 	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
785 
786 	synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
787 	if (__predict_true(synqe != NULL)) {
788 		synqe->flags = TPF_SYNQE;
789 		synqe->incarnation = sc->incarnation;
790 		refcount_init(&synqe->refcnt, 1);
791 		synqe->lctx = lctx;
792 		hold_lctx(lctx);	/* Every synqe has a ref on its lctx. */
793 		synqe->syn = NULL;
794 	}
795 
796 	return (synqe);
797 }
798 
799 static inline void
800 hold_synqe(struct synq_entry *synqe)
801 {
802 
803 	refcount_acquire(&synqe->refcnt);
804 }
805 
806 static inline struct inpcb *
807 release_synqe(struct adapter *sc, struct synq_entry *synqe)
808 {
809 	struct inpcb *inp;
810 
811 	MPASS(synqe->flags & TPF_SYNQE);
812 	MPASS(synqe->lctx != NULL);
813 
814 	inp = synqe->lctx->inp;
815 	MPASS(inp != NULL);
816 	INP_WLOCK_ASSERT(inp);
817 
818 	if (refcount_release(&synqe->refcnt)) {
819 		inp = release_lctx(sc, synqe->lctx);
820 		m_freem(synqe->syn);
821 		free(synqe, M_CXGBE);
822 	}
823 
824 	return (inp);
825 }
826 
827 void
828 t4_syncache_added(struct toedev *tod __unused, void *arg)
829 {
830 	struct synq_entry *synqe = arg;
831 
832 	hold_synqe(synqe);
833 }
834 
835 void
836 t4_syncache_removed(struct toedev *tod, void *arg)
837 {
838 	struct adapter *sc = tod->tod_softc;
839 	struct synq_entry *synqe = arg;
840 	struct inpcb *inp = synqe->lctx->inp;
841 
842 	/*
843 	 * XXX: this is a LOR but harmless when running from the softclock.
844 	 */
845 	INP_WLOCK(inp);
846 	inp = release_synqe(sc, synqe);
847 	if (inp != NULL)
848 		INP_WUNLOCK(inp);
849 }
850 
851 int
852 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
853 {
854 	struct synq_entry *synqe = arg;
855 
856 	if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
857 		struct tcpopt to;
858 		struct ip *ip = mtod(m, struct ip *);
859 		struct tcphdr *th;
860 
861 		if (ip->ip_v == IPVERSION)
862 			th = (void *)(ip + 1);
863 		else
864 			th = (void *)((struct ip6_hdr *)ip + 1);
865 		bzero(&to, sizeof(to));
866 		tcp_dooptions(&to, (void *)(th + 1),
867 		    (th->th_off << 2) - sizeof(*th), TO_SYN);
868 
869 		/* save these for later */
870 		synqe->iss = be32toh(th->th_seq);
871 		synqe->irs = be32toh(th->th_ack) - 1;
872 		synqe->ts = to.to_tsval;
873 	}
874 
875 	m_freem(m);	/* don't need this any more */
876 	return (0);
877 }
878 
879 static int
880 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
881     struct mbuf *m)
882 {
883 	struct adapter *sc = iq->adapter;
884 	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
885 	int stid = GET_TID(cpl);
886 	unsigned int status = cpl->status;
887 	struct listen_ctx *lctx = lookup_stid(sc, stid);
888 	struct inpcb *inp = lctx->inp;
889 #ifdef INVARIANTS
890 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
891 #endif
892 
893 	KASSERT(opcode == CPL_PASS_OPEN_RPL,
894 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
895 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
896 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
897 
898 	INP_WLOCK(inp);
899 
900 	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
901 	    __func__, stid, status, lctx->flags);
902 
903 	lctx->flags &= ~LCTX_RPL_PENDING;
904 	if (status == CPL_ERR_NONE)
905 		lctx->flags |= LCTX_SETUP_IN_HW;
906 	else
907 		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
908 
909 #ifdef INVARIANTS
910 	/*
911 	 * If the inp has been dropped (listening socket closed) then
912 	 * listen_stop must have run and taken the inp out of the hash.
913 	 */
914 	if (inp->inp_flags & INP_DROPPED) {
915 		KASSERT(listen_hash_del(sc, inp) == NULL,
916 		    ("%s: inp %p still in listen hash", __func__, inp));
917 	}
918 #endif
919 
920 	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
921 		if (release_lctx(sc, lctx) != NULL)
922 			INP_WUNLOCK(inp);
923 		return (status);
924 	}
925 
926 	/*
927 	 * Listening socket stopped listening earlier and now the chip tells us
928 	 * it has started the hardware listener.  Stop it; the lctx will be
929 	 * released in do_close_server_rpl.
930 	 */
931 	if (inp->inp_flags & INP_DROPPED) {
932 		destroy_server(sc, lctx);
933 		INP_WUNLOCK(inp);
934 		return (status);
935 	}
936 
937 	/*
938 	 * Failed to start hardware listener.  Take inp out of the hash and
939 	 * release our reference on it.  An error message has been logged
940 	 * already.
941 	 */
942 	if (status != CPL_ERR_NONE) {
943 		listen_hash_del(sc, inp);
944 		if (release_lctx(sc, lctx) != NULL)
945 			INP_WUNLOCK(inp);
946 		return (status);
947 	}
948 
949 	/* hardware listener open for business */
950 
951 	INP_WUNLOCK(inp);
952 	return (status);
953 }
954 
955 static int
956 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
957     struct mbuf *m)
958 {
959 	struct adapter *sc = iq->adapter;
960 	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
961 	int stid = GET_TID(cpl);
962 	unsigned int status = cpl->status;
963 	struct listen_ctx *lctx = lookup_stid(sc, stid);
964 	struct inpcb *inp = lctx->inp;
965 #ifdef INVARIANTS
966 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
967 #endif
968 
969 	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
970 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
971 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
972 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
973 
974 	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
975 
976 	if (status != CPL_ERR_NONE) {
977 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
978 		    __func__, status, stid);
979 		return (status);
980 	}
981 
982 	INP_WLOCK(inp);
983 	inp = release_lctx(sc, lctx);
984 	if (inp != NULL)
985 		INP_WUNLOCK(inp);
986 
987 	return (status);
988 }
989 
990 static void
991 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
992 {
993 	struct tom_data *td = sc->tom_softc;
994 	struct listen_ctx *lctx = synqe->lctx;
995 	struct inpcb *inp = lctx->inp;
996 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
997 	int ntids;
998 
999 	INP_WLOCK_ASSERT(inp);
1000 
1001 	if (synqe->tid != -1) {
1002 		ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
1003 		remove_tid(sc, synqe->tid, ntids);
1004 		mtx_lock(&td->toep_list_lock);
1005 		TAILQ_REMOVE(&td->synqe_list, synqe, link);
1006 		mtx_unlock(&td->toep_list_lock);
1007 		release_tid(sc, synqe->tid, lctx->ctrlq);
1008 	}
1009 	t4_l2t_release(e);
1010 	inp = release_synqe(sc, synqe);
1011 	if (inp)
1012 		INP_WUNLOCK(inp);
1013 }
1014 
1015 void
1016 synack_failure_cleanup(struct adapter *sc, struct synq_entry *synqe)
1017 {
1018 	INP_WLOCK(synqe->lctx->inp);
1019 	done_with_synqe(sc, synqe);
1020 }
1021 
1022 int
1023 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
1024     struct mbuf *m)
1025 {
1026 	struct adapter *sc = iq->adapter;
1027 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1028 	unsigned int tid = GET_TID(cpl);
1029 	struct synq_entry *synqe = lookup_tid(sc, tid);
1030 	struct listen_ctx *lctx = synqe->lctx;
1031 	struct inpcb *inp = lctx->inp;
1032 	struct sge_ofld_txq *ofld_txq;
1033 #ifdef INVARIANTS
1034 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1035 #endif
1036 
1037 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
1038 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1039 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1040 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1041 
1042 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1043 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1044 
1045 	if (negative_advice(cpl->status))
1046 		return (0);	/* Ignore negative advice */
1047 
1048 	INP_WLOCK(inp);
1049 
1050 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
1051 
1052 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
1053 		send_flowc_wr_synqe(sc, synqe);
1054 
1055 	/*
1056 	 * If we'd initiated an abort earlier the reply to it is responsible for
1057 	 * cleaning up resources.  Otherwise we tear everything down right here
1058 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
1059 	 */
1060 	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
1061 		INP_WUNLOCK(inp);
1062 		goto done;
1063 	}
1064 
1065 	done_with_synqe(sc, synqe);
1066 	/* inp lock released by done_with_synqe */
1067 done:
1068 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1069 	return (0);
1070 }
1071 
1072 int
1073 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
1074     struct mbuf *m)
1075 {
1076 	struct adapter *sc = iq->adapter;
1077 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1078 	unsigned int tid = GET_TID(cpl);
1079 	struct synq_entry *synqe = lookup_tid(sc, tid);
1080 	struct listen_ctx *lctx = synqe->lctx;
1081 	struct inpcb *inp = lctx->inp;
1082 #ifdef INVARIANTS
1083 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1084 #endif
1085 
1086 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
1087 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1088 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1089 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1090 
1091 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1092 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1093 
1094 	INP_WLOCK(inp);
1095 	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1096 	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
1097 	    __func__, synqe, synqe->flags));
1098 
1099 	done_with_synqe(sc, synqe);
1100 	/* inp lock released by done_with_synqe */
1101 
1102 	return (0);
1103 }
1104 
1105 void
1106 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
1107 {
1108 	struct adapter *sc = tod->tod_softc;
1109 	struct tom_data *td = sc->tom_softc;
1110 	struct synq_entry *synqe = arg;
1111 	struct inpcb *inp = sotoinpcb(so);
1112 	struct toepcb *toep = synqe->toep;
1113 
1114 	NET_EPOCH_ASSERT();	/* prevents bad race with accept() */
1115 	INP_WLOCK_ASSERT(inp);
1116 	KASSERT(synqe->flags & TPF_SYNQE,
1117 	    ("%s: %p not a synq_entry?", __func__, arg));
1118 	MPASS(toep->tid == synqe->tid);
1119 
1120 	offload_socket(so, toep);
1121 	make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
1122 	toep->flags |= TPF_CPL_PENDING;
1123 	update_tid(sc, synqe->tid, toep);
1124 	synqe->flags |= TPF_SYNQE_EXPANDED;
1125 	mtx_lock(&td->toep_list_lock);
1126 	/* Remove synqe from its list and add the TOE PCB to the active list. */
1127 	TAILQ_REMOVE(&td->synqe_list, synqe, link);
1128 	TAILQ_INSERT_TAIL(&td->toep_list, toep, link);
1129 	toep->flags |= TPF_IN_TOEP_LIST;
1130 	mtx_unlock(&td->toep_list_lock);
1131 	inp->inp_flowtype = (inp->inp_vflag & INP_IPV6) ?
1132 	    M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4;
1133 	inp->inp_flowid = synqe->rss_hash;
1134 }
1135 
1136 static void
1137 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
1138 {
1139 	bzero(to, sizeof(*to));
1140 
1141 	if (t4opt->mss) {
1142 		to->to_flags |= TOF_MSS;
1143 		to->to_mss = be16toh(t4opt->mss);
1144 	}
1145 
1146 	if (t4opt->wsf > 0 && t4opt->wsf < 15) {
1147 		to->to_flags |= TOF_SCALE;
1148 		to->to_wscale = t4opt->wsf;
1149 	}
1150 
1151 	if (t4opt->tstamp)
1152 		to->to_flags |= TOF_TS;
1153 
1154 	if (t4opt->sack)
1155 		to->to_flags |= TOF_SACKPERM;
1156 }
1157 
1158 static bool
1159 encapsulated_syn(struct adapter *sc, const struct cpl_pass_accept_req *cpl)
1160 {
1161 	u_int hlen = be32toh(cpl->hdr_len);
1162 
1163 	if (chip_id(sc) >= CHELSIO_T6)
1164 		return (G_T6_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1165 	else
1166 		return (G_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1167 }
1168 
1169 static void
1170 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
1171     struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
1172 {
1173 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1174 	const struct ether_header *eh;
1175 	unsigned int hlen = be32toh(cpl->hdr_len);
1176 	uintptr_t l3hdr;
1177 	const struct tcphdr *tcp;
1178 
1179 	eh = (const void *)(cpl + 1);
1180 	if (chip_id(sc) >= CHELSIO_T6) {
1181 		l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
1182 		tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
1183 	} else {
1184 		l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1185 		tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1186 	}
1187 
1188 	/* extract TOS (DiffServ + ECN) byte for AccECN */
1189 	if (iptos) {
1190 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1191 			const struct ip *ip = (const void *)l3hdr;
1192 			*iptos = ip->ip_tos;
1193 		}
1194 #ifdef INET6
1195 		else
1196 		if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
1197 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1198 			*iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1199 		}
1200 #endif /* INET */
1201 	}
1202 
1203 	if (inc) {
1204 		bzero(inc, sizeof(*inc));
1205 		inc->inc_fport = tcp->th_sport;
1206 		inc->inc_lport = tcp->th_dport;
1207 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1208 			const struct ip *ip = (const void *)l3hdr;
1209 
1210 			inc->inc_faddr = ip->ip_src;
1211 			inc->inc_laddr = ip->ip_dst;
1212 		} else {
1213 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1214 
1215 			inc->inc_flags |= INC_ISIPV6;
1216 			inc->inc6_faddr = ip6->ip6_src;
1217 			inc->inc6_laddr = ip6->ip6_dst;
1218 		}
1219 	}
1220 
1221 	if (th) {
1222 		bcopy(tcp, th, sizeof(*th));
1223 		tcp_fields_to_host(th);		/* just like tcp_input */
1224 	}
1225 }
1226 
1227 static struct l2t_entry *
1228 get_l2te_for_nexthop(struct port_info *pi, if_t ifp,
1229     struct in_conninfo *inc)
1230 {
1231 	struct l2t_entry *e;
1232 	struct sockaddr_in6 sin6;
1233 	struct sockaddr *dst = (void *)&sin6;
1234 	struct nhop_object *nh;
1235 
1236 	if (inc->inc_flags & INC_ISIPV6) {
1237 		bzero(dst, sizeof(struct sockaddr_in6));
1238 		dst->sa_len = sizeof(struct sockaddr_in6);
1239 		dst->sa_family = AF_INET6;
1240 
1241 		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1242 			/* no need for route lookup */
1243 			e = t4_l2t_get(pi, ifp, dst);
1244 			return (e);
1245 		}
1246 
1247 		nh = fib6_lookup(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, NHR_NONE, 0);
1248 		if (nh == NULL)
1249 			return (NULL);
1250 		if (nh->nh_ifp != ifp)
1251 			return (NULL);
1252 		if (nh->nh_flags & NHF_GATEWAY)
1253 			((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr;
1254 		else
1255 			((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1256 	} else {
1257 		dst->sa_len = sizeof(struct sockaddr_in);
1258 		dst->sa_family = AF_INET;
1259 
1260 		nh = fib4_lookup(RT_DEFAULT_FIB, inc->inc_faddr, 0, NHR_NONE, 0);
1261 		if (nh == NULL)
1262 			return (NULL);
1263 		if (nh->nh_ifp != ifp)
1264 			return (NULL);
1265 		if (nh->nh_flags & NHF_GATEWAY)
1266 			if (nh->gw_sa.sa_family == AF_INET)
1267 				((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
1268 			else
1269 				*((struct sockaddr_in6 *)dst) = nh->gw6_sa;
1270 		else
1271 			((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1272 	}
1273 
1274 	e = t4_l2t_get(pi, ifp, dst);
1275 	return (e);
1276 }
1277 
1278 static int
1279 send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
1280     uint32_t opt2, int tid)
1281 {
1282 	struct wrqe *wr;
1283 	struct cpl_pass_accept_rpl *rpl;
1284 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
1285 
1286 	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1287 	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
1288 	if (wr == NULL)
1289 		return (ENOMEM);
1290 	rpl = wrtod(wr);
1291 
1292 	if (is_t4(sc))
1293 		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1294 	else {
1295 		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1296 
1297 		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1298 		rpl5->iss = htobe32(synqe->iss);
1299 	}
1300 	rpl->opt0 = opt0;
1301 	rpl->opt2 = opt2;
1302 
1303 	return (t4_l2t_send(sc, wr, e));
1304 }
1305 
1306 #define REJECT_PASS_ACCEPT_REQ(tunnel)	do { \
1307 	if (!tunnel) { \
1308 		m_freem(m); \
1309 		m = NULL; \
1310 	} \
1311 	reject_reason = __LINE__; \
1312 	goto reject; \
1313 } while (0)
1314 
1315 /*
1316  * The context associated with a tid entry via insert_tid could be a synq_entry
1317  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
1318  */
1319 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1320 
1321 /*
1322  * Incoming SYN on a listening socket.
1323  *
1324  * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1325  * etc.
1326  */
1327 static int
1328 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1329     struct mbuf *m)
1330 {
1331 	struct adapter *sc = iq->adapter;
1332 	struct tom_data *td = sc->tom_softc;
1333 	struct toedev *tod;
1334 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1335 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1336 	unsigned int tid = GET_TID(cpl);
1337 	struct listen_ctx *lctx = lookup_stid(sc, stid);
1338 	struct inpcb *inp;
1339 	struct socket *so;
1340 	struct in_conninfo inc;
1341 	struct tcphdr th;
1342 	struct tcpopt to;
1343 	struct port_info *pi;
1344 	struct vi_info *vi;
1345 	if_t hw_ifp, ifp;
1346 	struct l2t_entry *e = NULL;
1347 	struct synq_entry *synqe = NULL;
1348 	int reject_reason, v, ntids;
1349 	uint16_t vid, l2info;
1350 	struct epoch_tracker et;
1351 #ifdef INVARIANTS
1352 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1353 #endif
1354 	struct offload_settings settings;
1355 	uint8_t iptos;
1356 
1357 	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1358 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1359 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1360 
1361 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1362 	    lctx);
1363 
1364 	/*
1365 	 * Figure out the port the SYN arrived on.  We'll look for an exact VI
1366 	 * match in a bit but in case we don't find any we'll use the main VI as
1367 	 * the incoming ifnet.
1368 	 */
1369 	l2info = be16toh(cpl->l2info);
1370 	pi = sc->port[G_SYN_INTF(l2info)];
1371 	hw_ifp = pi->vi[0].ifp;
1372 	m->m_pkthdr.rcvif = hw_ifp;
1373 
1374 	CURVNET_SET(lctx->vnet);	/* before any potential REJECT */
1375 
1376 	/*
1377 	 * If VXLAN/NVGRE parsing is enabled then SYNs in the inner traffic will
1378 	 * also hit the listener.  We don't want to offload those.
1379 	 */
1380 	if (encapsulated_syn(sc, cpl)) {
1381 		REJECT_PASS_ACCEPT_REQ(true);
1382 	}
1383 
1384 	/*
1385 	 * Use the MAC index to lookup the associated VI.  If this SYN didn't
1386 	 * match a perfect MAC filter, punt.
1387 	 */
1388 	if (!(l2info & F_SYN_XACT_MATCH)) {
1389 		REJECT_PASS_ACCEPT_REQ(true);
1390 	}
1391 	for_each_vi(pi, v, vi) {
1392 		if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
1393 			goto found;
1394 	}
1395 	REJECT_PASS_ACCEPT_REQ(true);
1396 found:
1397 	hw_ifp = vi->ifp;	/* the cxgbe ifnet */
1398 	m->m_pkthdr.rcvif = hw_ifp;
1399 	tod = TOEDEV(hw_ifp);
1400 
1401 	/*
1402 	 * Don't offload if the peer requested a TCP option that's not known to
1403 	 * the silicon.  Send the SYN to the kernel instead.
1404 	 */
1405 	if (__predict_false(cpl->tcpopt.unknown))
1406 		REJECT_PASS_ACCEPT_REQ(true);
1407 
1408 	/*
1409 	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1410 	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
1411 	 * doesn't match anything on this interface.
1412 	 *
1413 	 * XXX: lagg support, lagg + vlan support.
1414 	 */
1415 	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1416 	if (vid != 0xfff && vid != 0) {
1417 		ifp = VLAN_DEVAT(hw_ifp, vid);
1418 		if (ifp == NULL)
1419 			REJECT_PASS_ACCEPT_REQ(true);
1420 	} else
1421 		ifp = hw_ifp;
1422 
1423 	/*
1424 	 * Don't offload if the ifnet that the SYN came in on is not in the same
1425 	 * vnet as the listening socket.
1426 	 */
1427 	if (lctx->vnet != if_getvnet(ifp))
1428 		REJECT_PASS_ACCEPT_REQ(true);
1429 
1430 	pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
1431 	if (inc.inc_flags & INC_ISIPV6) {
1432 
1433 		/* Don't offload if the ifcap isn't enabled */
1434 		if ((if_getcapenable(ifp) & IFCAP_TOE6) == 0)
1435 			REJECT_PASS_ACCEPT_REQ(true);
1436 
1437 		/*
1438 		 * SYN must be directed to an IP6 address on this ifnet.  This
1439 		 * is more restrictive than in6_localip.
1440 		 */
1441 		NET_EPOCH_ENTER(et);
1442 		if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) {
1443 			NET_EPOCH_EXIT(et);
1444 			REJECT_PASS_ACCEPT_REQ(true);
1445 		}
1446 
1447 		ntids = 2;
1448 	} else {
1449 
1450 		/* Don't offload if the ifcap isn't enabled */
1451 		if ((if_getcapenable(ifp) & IFCAP_TOE4) == 0)
1452 			REJECT_PASS_ACCEPT_REQ(true);
1453 
1454 		/*
1455 		 * SYN must be directed to an IP address on this ifnet.  This
1456 		 * is more restrictive than in_localip.
1457 		 */
1458 		NET_EPOCH_ENTER(et);
1459 		if (!in_ifhasaddr(ifp, inc.inc_laddr)) {
1460 			NET_EPOCH_EXIT(et);
1461 			REJECT_PASS_ACCEPT_REQ(true);
1462 		}
1463 
1464 		ntids = 1;
1465 	}
1466 
1467 	e = get_l2te_for_nexthop(pi, ifp, &inc);
1468 	if (e == NULL) {
1469 		NET_EPOCH_EXIT(et);
1470 		REJECT_PASS_ACCEPT_REQ(true);
1471 	}
1472 
1473 	/* Don't offload if the 4-tuple is already in use */
1474 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1475 		NET_EPOCH_EXIT(et);
1476 		REJECT_PASS_ACCEPT_REQ(false);
1477 	}
1478 
1479 	inp = lctx->inp;		/* listening socket, not owned by TOE */
1480 	INP_RLOCK(inp);
1481 
1482 	/* Don't offload if the listening socket has closed */
1483 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1484 		INP_RUNLOCK(inp);
1485 		NET_EPOCH_EXIT(et);
1486 		REJECT_PASS_ACCEPT_REQ(false);
1487 	}
1488 	so = inp->inp_socket;
1489 	rw_rlock(&sc->policy_lock);
1490 	settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m,
1491 	    EVL_MAKETAG(0xfff, 0, 0), inp);
1492 	rw_runlock(&sc->policy_lock);
1493 	if (!settings.offload) {
1494 		INP_RUNLOCK(inp);
1495 		NET_EPOCH_EXIT(et);
1496 		REJECT_PASS_ACCEPT_REQ(true);	/* Rejected by COP. */
1497 	}
1498 
1499 	synqe = alloc_synqe(sc, lctx, M_NOWAIT);
1500 	if (synqe == NULL) {
1501 		INP_RUNLOCK(inp);
1502 		NET_EPOCH_EXIT(et);
1503 		REJECT_PASS_ACCEPT_REQ(true);
1504 	}
1505 	MPASS(rss->hash_type == RSS_HASH_TCP);
1506 	synqe->rss_hash = be32toh(rss->hash_val);
1507 	atomic_store_int(&synqe->ok_to_respond, 0);
1508 
1509 	init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx,
1510 	    &synqe->params);
1511 	if (sc->params.tid_qid_sel_mask != 0)
1512 		update_tid_qid_sel(vi, &synqe->params, tid);
1513 
1514 	/*
1515 	 * If all goes well t4_syncache_respond will get called during
1516 	 * syncache_add.  Note that syncache_add releases the pcb lock.
1517 	 */
1518 	t4opt_to_tcpopt(&cpl->tcpopt, &to);
1519 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
1520 
1521 	if (atomic_load_int(&synqe->ok_to_respond) > 0) {
1522 		uint64_t opt0;
1523 		uint32_t opt2;
1524 
1525 		opt0 = calc_options0(vi, &synqe->params);
1526 		opt2 = calc_options2(vi, &synqe->params);
1527 
1528 		insert_tid(sc, tid, synqe, ntids);
1529 		synqe->tid = tid;
1530 		synqe->syn = m;
1531 		m = NULL;
1532 		mtx_lock(&td->toep_list_lock);
1533 		TAILQ_INSERT_TAIL(&td->synqe_list, synqe, link);
1534 		mtx_unlock(&td->toep_list_lock);
1535 
1536 		if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
1537 			remove_tid(sc, tid, ntids);
1538 			m = synqe->syn;
1539 			synqe->syn = NULL;
1540 			mtx_lock(&td->toep_list_lock);
1541 			TAILQ_REMOVE(&td->synqe_list, synqe, link);
1542 			mtx_unlock(&td->toep_list_lock);
1543 			NET_EPOCH_EXIT(et);
1544 			REJECT_PASS_ACCEPT_REQ(true);
1545 		}
1546 		CTR6(KTR_CXGBE,
1547 		    "%s: stid %u, tid %u, synqe %p, opt0 %#016lx, opt2 %#08x",
1548 		    __func__, stid, tid, synqe, be64toh(opt0), be32toh(opt2));
1549 	} else {
1550 		NET_EPOCH_EXIT(et);
1551 		REJECT_PASS_ACCEPT_REQ(false);
1552 	}
1553 
1554 	NET_EPOCH_EXIT(et);
1555 	CURVNET_RESTORE();
1556 	return (0);
1557 reject:
1558 	CURVNET_RESTORE();
1559 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1560 	    reject_reason);
1561 
1562 	if (e)
1563 		t4_l2t_release(e);
1564 	release_tid(sc, tid, lctx->ctrlq);
1565 	if (synqe) {
1566 		inp = synqe->lctx->inp;
1567 		INP_WLOCK(inp);
1568 		inp = release_synqe(sc, synqe);
1569 		if (inp)
1570 			INP_WUNLOCK(inp);
1571 	}
1572 
1573 	if (m) {
1574 		/*
1575 		 * The connection request hit a TOE listener but is being passed
1576 		 * on to the kernel sw stack instead of getting offloaded.
1577 		 */
1578 		m_adj(m, sizeof(*cpl));
1579 		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1580 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1581 		m->m_pkthdr.csum_data = 0xffff;
1582 		if_input(hw_ifp, m);
1583 	}
1584 
1585 	return (reject_reason);
1586 }
1587 
1588 static void
1589 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
1590     const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1591     struct tcphdr *th, struct tcpopt *to)
1592 {
1593 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1594 	uint8_t iptos;
1595 
1596 	/* start off with the original SYN */
1597 	pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
1598 
1599 	/* modify parts to make it look like the ACK to our SYN|ACK */
1600 	tcp_set_flags(th, TH_ACK);
1601 	th->th_ack = synqe->iss + 1;
1602 	th->th_seq = be32toh(cpl->rcv_isn);
1603 	bzero(to, sizeof(*to));
1604 	if (G_TCPOPT_TSTAMP(tcp_opt)) {
1605 		to->to_flags |= TOF_TS;
1606 		to->to_tsecr = synqe->ts;
1607 	}
1608 }
1609 
1610 static int
1611 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1612     struct mbuf *m)
1613 {
1614 	struct adapter *sc = iq->adapter;
1615 	struct vi_info *vi;
1616 	if_t ifp;
1617 	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1618 #if defined(KTR) || defined(INVARIANTS)
1619 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1620 #endif
1621 	unsigned int tid = GET_TID(cpl);
1622 	struct synq_entry *synqe = lookup_tid(sc, tid);
1623 	struct listen_ctx *lctx = synqe->lctx;
1624 	struct inpcb *inp = lctx->inp, *new_inp;
1625 	struct socket *so;
1626 	struct tcphdr th;
1627 	struct tcpopt to;
1628 	struct in_conninfo inc;
1629 	struct toepcb *toep;
1630 	struct epoch_tracker et;
1631 	int rstreason;
1632 #ifdef INVARIANTS
1633 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1634 #endif
1635 
1636 	KASSERT(opcode == CPL_PASS_ESTABLISH,
1637 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1638 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1639 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1640 	KASSERT(synqe->flags & TPF_SYNQE,
1641 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1642 
1643 	CURVNET_SET(lctx->vnet);
1644 	NET_EPOCH_ENTER(et);	/* for syncache_expand */
1645 	INP_WLOCK(inp);
1646 
1647 	CTR6(KTR_CXGBE,
1648 	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1649 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1650 
1651 	ifp = synqe->syn->m_pkthdr.rcvif;
1652 	vi = if_getsoftc(ifp);
1653 	KASSERT(vi->adapter == sc,
1654 	    ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
1655 
1656 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1657 reset:
1658 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST);
1659 		INP_WUNLOCK(inp);
1660 		NET_EPOCH_EXIT(et);
1661 		CURVNET_RESTORE();
1662 		return (0);
1663 	}
1664 
1665 	KASSERT(synqe->params.rxq_idx == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1666 	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__,
1667 	    synqe->params.rxq_idx,
1668 	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1669 
1670 	toep = alloc_toepcb(vi, M_NOWAIT);
1671 	if (toep == NULL)
1672 		goto reset;
1673 	toep->tid = tid;
1674 	toep->l2te = &sc->l2t->l2tab[synqe->params.l2t_idx];
1675 	toep->vnet = lctx->vnet;
1676 	bcopy(&synqe->params, &toep->params, sizeof(toep->params));
1677 	init_toepcb(vi, toep);
1678 
1679 	MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
1680 	MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
1681 	synqe->tcp_opt = cpl->tcp_opt;
1682 	synqe->toep = toep;
1683 
1684 	/* Come up with something that syncache_expand should be ok with. */
1685 	synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
1686 	if (inc.inc_flags & INC_ISIPV6) {
1687 		if (lctx->ce == NULL) {
1688 			toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true);
1689 			if (toep->ce == NULL) {
1690 				free_toepcb(toep);
1691 				goto reset;	/* RST without a CLIP entry? */
1692 			}
1693 		} else {
1694 			t4_hold_clip_entry(sc, lctx->ce);
1695 			toep->ce = lctx->ce;
1696 		}
1697 	}
1698 	so = inp->inp_socket;
1699 	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1700 
1701 	rstreason = toe_syncache_expand(&inc, &to, &th, &so);
1702 	if (rstreason < 0) {
1703 		free_toepcb(toep);
1704 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_NO_RST);
1705 		INP_WUNLOCK(inp);
1706 		NET_EPOCH_EXIT(et);
1707 		CURVNET_RESTORE();
1708 		return (0);
1709 	} else if (rstreason == 0 || so == NULL) {
1710 		free_toepcb(toep);
1711 		goto reset;
1712 	}
1713 
1714 	/* New connection inpcb is already locked by syncache_expand(). */
1715 	new_inp = sotoinpcb(so);
1716 	INP_WLOCK_ASSERT(new_inp);
1717 	MPASS(so->so_vnet == lctx->vnet);
1718 
1719 	/*
1720 	 * This is for expansion from syncookies.
1721 	 *
1722 	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1723 	 * anyone accept'ing a connection before we've installed our hooks, but
1724 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1725 	 */
1726 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1727 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1728 		t4_offload_socket(TOEDEV(ifp), synqe, so);
1729 	}
1730 
1731 	INP_WUNLOCK(new_inp);
1732 
1733 	/* Done with the synqe */
1734 	inp = release_synqe(sc, synqe);
1735 	if (inp != NULL)
1736 		INP_WUNLOCK(inp);
1737 	NET_EPOCH_EXIT(et);
1738 	CURVNET_RESTORE();
1739 
1740 	return (0);
1741 }
1742 
1743 void
1744 t4_init_listen_cpl_handlers(void)
1745 {
1746 
1747 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1748 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1749 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1750 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
1751 }
1752 
1753 void
1754 t4_uninit_listen_cpl_handlers(void)
1755 {
1756 
1757 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
1758 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
1759 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
1760 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);
1761 }
1762 #endif
1763