xref: /freebsd/sys/dev/cxgbe/tom/t4_listen.c (revision 45d5b9f0324a13df06712b7a9df5f2fbe8475764)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2012 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: Navdeep Parhar <np@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #ifdef TCP_OFFLOAD
35 #include <sys/param.h>
36 #include <sys/types.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/module.h>
40 #include <sys/protosw.h>
41 #include <sys/refcount.h>
42 #include <sys/domain.h>
43 #include <sys/fnv_hash.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <net/ethernet.h>
48 #include <net/if.h>
49 #include <net/if_types.h>
50 #include <net/if_vlan_var.h>
51 #include <net/route.h>
52 #include <net/route/nhop.h>
53 #include <netinet/in.h>
54 #include <netinet/in_fib.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/ip.h>
57 #include <netinet/ip6.h>
58 #include <netinet6/in6_fib.h>
59 #include <netinet6/scope6_var.h>
60 #include <netinet/tcp_timer.h>
61 #define TCPSTATES
62 #include <netinet/tcp_fsm.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/toecore.h>
65 #include <netinet/cc/cc.h>
66 
67 #include "common/common.h"
68 #include "common/t4_msg.h"
69 #include "common/t4_regs.h"
70 #include "t4_clip.h"
71 #include "tom/t4_tom_l2t.h"
72 #include "tom/t4_tom.h"
73 
74 /* stid services */
75 static int alloc_stid(struct adapter *, bool, void *);
76 static struct listen_ctx *lookup_stid(struct adapter *, int);
77 static void free_stid(struct adapter *, int , bool);
78 
79 /* lctx services */
80 static struct listen_ctx *alloc_lctx(struct adapter *, struct inpcb *,
81     struct vi_info *);
82 static int free_lctx(struct adapter *, struct listen_ctx *);
83 static void hold_lctx(struct listen_ctx *);
84 static void listen_hash_add(struct adapter *, struct listen_ctx *);
85 static struct listen_ctx *listen_hash_find(struct adapter *, struct inpcb *);
86 static struct listen_ctx *listen_hash_del(struct adapter *, struct inpcb *);
87 static struct inpcb *release_lctx(struct adapter *, struct listen_ctx *);
88 
89 static void send_abort_rpl_synqe(struct toedev *, struct synq_entry *, int);
90 
91 static int create_server6(struct adapter *, struct listen_ctx *);
92 static int create_server(struct adapter *, struct listen_ctx *);
93 
94 int
alloc_stid_tab(struct adapter * sc)95 alloc_stid_tab(struct adapter *sc)
96 {
97 	struct tid_info *t = &sc->tids;
98 
99 	MPASS(t->nstids > 0);
100 	MPASS(t->stid_tab == NULL);
101 
102 	t->stid_tab = malloc(t->nstids * sizeof(*t->stid_tab), M_CXGBE,
103 	    M_ZERO | M_NOWAIT);
104 	if (t->stid_tab == NULL)
105 		return (ENOMEM);
106 	t->stid_bitmap = bit_alloc(t->nstids, M_CXGBE, M_NOWAIT);
107 	if (t->stid_bitmap == NULL) {
108 		free(t->stid_tab, M_CXGBE);
109 		t->stid_tab = NULL;
110 		return (ENOMEM);
111 	}
112 	mtx_init(&t->stid_lock, "stid lock", NULL, MTX_DEF);
113 	t->stids_in_use = 0;
114 
115 	return (0);
116 }
117 
118 void
free_stid_tab(struct adapter * sc)119 free_stid_tab(struct adapter *sc)
120 {
121 	struct tid_info *t = &sc->tids;
122 
123 	KASSERT(t->stids_in_use == 0,
124 	    ("%s: %d tids still in use.", __func__, t->stids_in_use));
125 
126 	if (mtx_initialized(&t->stid_lock))
127 		mtx_destroy(&t->stid_lock);
128 	free(t->stid_tab, M_CXGBE);
129 	t->stid_tab = NULL;
130 	free(t->stid_bitmap, M_CXGBE);
131 	t->stid_bitmap = NULL;
132 }
133 
134 void
stop_stid_tab(struct adapter * sc)135 stop_stid_tab(struct adapter *sc)
136 {
137 	struct tid_info *t = &sc->tids;
138 	struct tom_data *td = sc->tom_softc;
139 	struct listen_ctx *lctx;
140 	struct synq_entry *synqe;
141 	int i, ntids;
142 
143 	mtx_lock(&t->stid_lock);
144 	t->stid_tab_stopped = true;
145 	mtx_unlock(&t->stid_lock);
146 
147 	mtx_lock(&td->lctx_hash_lock);
148 	for (i = 0; i <= td->listen_mask; i++) {
149 		LIST_FOREACH(lctx, &td->listen_hash[i], link)
150 			lctx->flags &= ~(LCTX_RPL_PENDING | LCTX_SETUP_IN_HW);
151 	}
152 	mtx_unlock(&td->lctx_hash_lock);
153 
154 	mtx_lock(&td->toep_list_lock);
155 	TAILQ_FOREACH(synqe, &td->synqe_list, link) {
156 		MPASS(sc->incarnation == synqe->incarnation);
157 		MPASS(synqe->tid >= 0);
158 		MPASS(synqe == lookup_tid(sc, synqe->tid));
159 		/* Remove tid from the lookup table immediately. */
160 		CTR(KTR_CXGBE, "%s: tid %d@%d STRANDED, removed from table",
161 		    __func__, synqe->tid, synqe->incarnation);
162 		ntids = synqe->lctx->inp->inp_vflag & INP_IPV6 ? 2 : 1;
163 		remove_tid(sc, synqe->tid, ntids);
164 #if 0
165 		/* synqe->tid is stale now but left alone for debug. */
166 		synqe->tid = -1;
167 #endif
168 	}
169 	MPASS(TAILQ_EMPTY(&td->stranded_synqe));
170 	TAILQ_CONCAT(&td->stranded_synqe, &td->synqe_list, link);
171 	MPASS(TAILQ_EMPTY(&td->synqe_list));
172 	mtx_unlock(&td->toep_list_lock);
173 }
174 
175 void
restart_stid_tab(struct adapter * sc)176 restart_stid_tab(struct adapter *sc)
177 {
178 	struct tid_info *t = &sc->tids;
179 	struct tom_data *td = sc->tom_softc;
180 	struct listen_ctx *lctx;
181 	int i;
182 
183 	mtx_lock(&td->lctx_hash_lock);
184 	for (i = 0; i <= td->listen_mask; i++) {
185 		LIST_FOREACH(lctx, &td->listen_hash[i], link) {
186 			MPASS((lctx->flags & (LCTX_RPL_PENDING | LCTX_SETUP_IN_HW)) == 0);
187 			lctx->flags |= LCTX_RPL_PENDING;
188 			if (lctx->inp->inp_vflag & INP_IPV6)
189 				create_server6(sc, lctx);
190 			else
191 				create_server(sc, lctx);
192 		}
193 	}
194 	mtx_unlock(&td->lctx_hash_lock);
195 
196 	mtx_lock(&t->stid_lock);
197 	t->stid_tab_stopped = false;
198 	mtx_unlock(&t->stid_lock);
199 
200 }
201 
202 static int
alloc_stid(struct adapter * sc,bool isipv6,void * ctx)203 alloc_stid(struct adapter *sc, bool isipv6, void *ctx)
204 {
205 	struct tid_info *t = &sc->tids;
206 	const u_int n = isipv6 ? 2 : 1;
207 	int stid, pair_stid;
208 	u_int i;
209 	ssize_t val;
210 
211 	mtx_lock(&t->stid_lock);
212 	MPASS(t->stids_in_use <= t->nstids);
213 	if (n > t->nstids - t->stids_in_use || t->stid_tab_stopped) {
214 		mtx_unlock(&t->stid_lock);
215 		return (-1);
216 	}
217 
218 	stid = -1;
219 	if (isipv6) {
220 		/*
221 		 * An IPv6 server needs 2 naturally aligned stids (1 stid = 4
222 		 * cells) in the TCAM.  We know that the start of the stid
223 		 * region is properly aligned already (the chip requires each
224 		 * region to be 128-cell aligned).
225 		 */
226 		for (i = 0; i + 1 < t->nstids; i = roundup2(val + 1, 2)) {
227 			bit_ffc_area_at(t->stid_bitmap, i, t->nstids, 2, &val);
228 			if (val == -1)
229 				break;
230 			if ((val & 1) == 0) {
231 				stid = val;
232 				break;
233 			}
234 		}
235 	} else {
236 		/*
237 		 * An IPv4 server needs one stid without any alignment
238 		 * requirements.  But we try extra hard to find an available
239 		 * stid adjacent to a used stid so that free "stid-pairs" are
240 		 * left intact for IPv6.
241 		 */
242 		bit_ffc_at(t->stid_bitmap, 0, t->nstids, &val);
243 		while (val != -1) {
244 			if (stid == -1) {
245 				/*
246 				 * First usable stid.  Look no further if it's
247 				 * an ideal fit.
248 				 */
249 				stid = val;
250 				if (val & 1 || bit_test(t->stid_bitmap, val + 1))
251 					break;
252 			} else {
253 				/*
254 				 * We have an unused stid already but are now
255 				 * looking for in-use stids because we'd prefer
256 				 * to grab an unused stid adjacent to one that's
257 				 * in use.
258 				 *
259 				 * Odd stids pair with the previous stid and
260 				 * even ones pair with the next stid.
261 				 */
262 				pair_stid = val & 1 ? val - 1 : val + 1;
263 				if (bit_test(t->stid_bitmap, pair_stid) == 0) {
264 					stid = pair_stid;
265 					break;
266 				}
267 			}
268 			val = roundup2(val + 1, 2);
269 			if (val >= t->nstids)
270 				break;
271 			bit_ffs_at(t->stid_bitmap, val, t->nstids, &val);
272 		}
273 	}
274 
275 	if (stid >= 0) {
276 		MPASS(stid + n - 1 < t->nstids);
277 		MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 0));
278 		bit_nset(t->stid_bitmap, stid, stid + n - 1);
279 		t->stids_in_use += n;
280 		t->stid_tab[stid] = ctx;
281 #ifdef INVARIANTS
282 		if (n == 2) {
283 			MPASS((stid & 1) == 0);
284 			t->stid_tab[stid + 1] = NULL;
285 		}
286 #endif
287 		stid += t->stid_base;
288 	}
289 	mtx_unlock(&t->stid_lock);
290 	return (stid);
291 }
292 
293 static struct listen_ctx *
lookup_stid(struct adapter * sc,int stid)294 lookup_stid(struct adapter *sc, int stid)
295 {
296 	struct tid_info *t = &sc->tids;
297 
298 	return (t->stid_tab[stid - t->stid_base]);
299 }
300 
301 static void
free_stid(struct adapter * sc,int stid,bool isipv6)302 free_stid(struct adapter *sc, int stid, bool isipv6)
303 {
304 	struct tid_info *t = &sc->tids;
305 	const u_int n = isipv6 ? 2 : 1;
306 
307 	mtx_lock(&t->stid_lock);
308 	MPASS(stid >= t->stid_base);
309 	stid -= t->stid_base;
310 	MPASS(stid + n - 1 < t->nstids);
311 	MPASS(t->stids_in_use <= t->nstids);
312 	MPASS(t->stids_in_use >= n);
313 	MPASS(t->stid_tab[stid] != NULL);
314 #ifdef INVARIANTS
315 	if (n == 2) {
316 		MPASS((stid & 1) == 0);
317 		MPASS(t->stid_tab[stid + 1] == NULL);
318 	}
319 #endif
320 	MPASS(bit_ntest(t->stid_bitmap, stid, stid + n - 1, 1));
321 	bit_nclear(t->stid_bitmap, stid, stid + n - 1);
322 	t->stid_tab[stid] = NULL;
323 	t->stids_in_use -= n;
324 	mtx_unlock(&t->stid_lock);
325 }
326 
327 static struct listen_ctx *
alloc_lctx(struct adapter * sc,struct inpcb * inp,struct vi_info * vi)328 alloc_lctx(struct adapter *sc, struct inpcb *inp, struct vi_info *vi)
329 {
330 	struct listen_ctx *lctx;
331 
332 	INP_WLOCK_ASSERT(inp);
333 
334 	lctx = malloc(sizeof(struct listen_ctx), M_CXGBE, M_NOWAIT | M_ZERO);
335 	if (lctx == NULL)
336 		return (NULL);
337 
338 	lctx->isipv6 = inp->inp_vflag & INP_IPV6;
339 	lctx->stid = alloc_stid(sc, lctx->isipv6, lctx);
340 	if (lctx->stid < 0) {
341 		free(lctx, M_CXGBE);
342 		return (NULL);
343 	}
344 
345 	if (lctx->isipv6 &&
346 	    !IN6_ARE_ADDR_EQUAL(&in6addr_any, &inp->in6p_laddr)) {
347 		lctx->ce = t4_get_clip_entry(sc, &inp->in6p_laddr, true);
348 		if (lctx->ce == NULL) {
349 			free(lctx, M_CXGBE);
350 			return (NULL);
351 		}
352 	}
353 
354 	lctx->ctrlq = &sc->sge.ctrlq[vi->pi->port_id];
355 	lctx->ofld_rxq = &sc->sge.ofld_rxq[vi->first_ofld_rxq];
356 	refcount_init(&lctx->refcount, 1);
357 
358 	lctx->inp = inp;
359 	lctx->vnet = inp->inp_socket->so_vnet;
360 	in_pcbref(inp);
361 
362 	return (lctx);
363 }
364 
365 /* Don't call this directly, use release_lctx instead */
366 static int
free_lctx(struct adapter * sc,struct listen_ctx * lctx)367 free_lctx(struct adapter *sc, struct listen_ctx *lctx)
368 {
369 	struct inpcb *inp = lctx->inp;
370 
371 	INP_WLOCK_ASSERT(inp);
372 	KASSERT(lctx->refcount == 0,
373 	    ("%s: refcount %d", __func__, lctx->refcount));
374 	KASSERT(lctx->stid >= 0, ("%s: bad stid %d.", __func__, lctx->stid));
375 
376 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, inp %p",
377 	    __func__, lctx->stid, lctx, lctx->inp);
378 
379 	if (lctx->ce)
380 		t4_release_clip_entry(sc, lctx->ce);
381 	free_stid(sc, lctx->stid, lctx->isipv6);
382 	free(lctx, M_CXGBE);
383 
384 	return (in_pcbrele_wlocked(inp));
385 }
386 
387 static void
hold_lctx(struct listen_ctx * lctx)388 hold_lctx(struct listen_ctx *lctx)
389 {
390 
391 	refcount_acquire(&lctx->refcount);
392 }
393 
394 static inline uint32_t
listen_hashfn(void * key,u_long mask)395 listen_hashfn(void *key, u_long mask)
396 {
397 
398 	return (fnv_32_buf(&key, sizeof(key), FNV1_32_INIT) & mask);
399 }
400 
401 /*
402  * Add a listen_ctx entry to the listen hash table.
403  */
404 static void
listen_hash_add(struct adapter * sc,struct listen_ctx * lctx)405 listen_hash_add(struct adapter *sc, struct listen_ctx *lctx)
406 {
407 	struct tom_data *td = sc->tom_softc;
408 	int bucket = listen_hashfn(lctx->inp, td->listen_mask);
409 
410 	mtx_lock(&td->lctx_hash_lock);
411 	LIST_INSERT_HEAD(&td->listen_hash[bucket], lctx, link);
412 	td->lctx_count++;
413 	mtx_unlock(&td->lctx_hash_lock);
414 }
415 
416 /*
417  * Look for the listening socket's context entry in the hash and return it.
418  */
419 static struct listen_ctx *
listen_hash_find(struct adapter * sc,struct inpcb * inp)420 listen_hash_find(struct adapter *sc, struct inpcb *inp)
421 {
422 	struct tom_data *td = sc->tom_softc;
423 	int bucket = listen_hashfn(inp, td->listen_mask);
424 	struct listen_ctx *lctx;
425 
426 	mtx_lock(&td->lctx_hash_lock);
427 	LIST_FOREACH(lctx, &td->listen_hash[bucket], link) {
428 		if (lctx->inp == inp)
429 			break;
430 	}
431 	mtx_unlock(&td->lctx_hash_lock);
432 
433 	return (lctx);
434 }
435 
436 /*
437  * Removes the listen_ctx structure for inp from the hash and returns it.
438  */
439 static struct listen_ctx *
listen_hash_del(struct adapter * sc,struct inpcb * inp)440 listen_hash_del(struct adapter *sc, struct inpcb *inp)
441 {
442 	struct tom_data *td = sc->tom_softc;
443 	int bucket = listen_hashfn(inp, td->listen_mask);
444 	struct listen_ctx *lctx, *l;
445 
446 	mtx_lock(&td->lctx_hash_lock);
447 	LIST_FOREACH_SAFE(lctx, &td->listen_hash[bucket], link, l) {
448 		if (lctx->inp == inp) {
449 			LIST_REMOVE(lctx, link);
450 			td->lctx_count--;
451 			break;
452 		}
453 	}
454 	mtx_unlock(&td->lctx_hash_lock);
455 
456 	return (lctx);
457 }
458 
459 /*
460  * Releases a hold on the lctx.  Must be called with the listening socket's inp
461  * locked.  The inp may be freed by this function and it returns NULL to
462  * indicate this.
463  */
464 static struct inpcb *
release_lctx(struct adapter * sc,struct listen_ctx * lctx)465 release_lctx(struct adapter *sc, struct listen_ctx *lctx)
466 {
467 	struct inpcb *inp = lctx->inp;
468 	int inp_freed = 0;
469 
470 	INP_WLOCK_ASSERT(inp);
471 	if (refcount_release(&lctx->refcount))
472 		inp_freed = free_lctx(sc, lctx);
473 
474 	return (inp_freed ? NULL : inp);
475 }
476 
477 static void
send_flowc_wr_synqe(struct adapter * sc,struct synq_entry * synqe)478 send_flowc_wr_synqe(struct adapter *sc, struct synq_entry *synqe)
479 {
480 	struct mbuf *m = synqe->syn;
481 	if_t ifp = m->m_pkthdr.rcvif;
482 	struct vi_info *vi = if_getsoftc(ifp);
483 	struct port_info *pi = vi->pi;
484 	struct wrqe *wr;
485 	struct fw_flowc_wr *flowc;
486 	struct sge_ofld_txq *ofld_txq;
487 	struct sge_ofld_rxq *ofld_rxq;
488 	const int nparams = 6;
489 	const int flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval);
490 	const u_int pfvf = sc->pf << S_FW_VIID_PFN;
491 
492 	INP_WLOCK_ASSERT(synqe->lctx->inp);
493 	MPASS((synqe->flags & TPF_FLOWC_WR_SENT) == 0);
494 
495 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
496 	ofld_rxq = &sc->sge.ofld_rxq[synqe->params.rxq_idx];
497 
498 	wr = alloc_wrqe(roundup2(flowclen, 16), &ofld_txq->wrq);
499 	if (wr == NULL) {
500 		/* XXX */
501 		panic("%s: allocation failure.", __func__);
502 	}
503 	flowc = wrtod(wr);
504 	memset(flowc, 0, wr->wr_len);
505 	flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) |
506 	    V_FW_FLOWC_WR_NPARAMS(nparams));
507 	flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) |
508 	    V_FW_WR_FLOWID(synqe->tid));
509 	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
510 	flowc->mnemval[0].val = htobe32(pfvf);
511 	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
512 	flowc->mnemval[1].val = htobe32(pi->tx_chan);
513 	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
514 	flowc->mnemval[2].val = htobe32(pi->tx_chan);
515 	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
516 	flowc->mnemval[3].val = htobe32(ofld_rxq->iq.abs_id);
517 	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDBUF;
518 	flowc->mnemval[4].val = htobe32(512);
519 	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_MSS;
520 	flowc->mnemval[5].val = htobe32(512);
521 
522 	synqe->flags |= TPF_FLOWC_WR_SENT;
523 	t4_wrq_tx(sc, wr);
524 }
525 
526 static void
send_abort_rpl_synqe(struct toedev * tod,struct synq_entry * synqe,int rst_status)527 send_abort_rpl_synqe(struct toedev *tod, struct synq_entry *synqe,
528     int rst_status)
529 {
530 	struct adapter *sc = tod->tod_softc;
531 	struct wrqe *wr;
532 	struct cpl_abort_req *req;
533 
534 	INP_WLOCK_ASSERT(synqe->lctx->inp);
535 
536 	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
537 	    __func__, synqe, synqe->flags, synqe->tid,
538 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
539 	    " (abort already in progress)" : "");
540 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
541 		return;	/* abort already in progress */
542 	synqe->flags |= TPF_ABORT_SHUTDOWN;
543 
544 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
545 		send_flowc_wr_synqe(sc, synqe);
546 
547 	wr = alloc_wrqe(sizeof(*req),
548 	    &sc->sge.ofld_txq[synqe->params.txq_idx].wrq);
549 	if (wr == NULL) {
550 		/* XXX */
551 		panic("%s: allocation failure.", __func__);
552 	}
553 	req = wrtod(wr);
554 	INIT_TP_WR_MIT_CPL(req, CPL_ABORT_REQ, synqe->tid);
555 	req->rsvd0 = 0;	/* don't have a snd_nxt */
556 	req->rsvd1 = 1;	/* no data sent yet */
557 	req->cmd = rst_status;
558 
559 	t4_l2t_send(sc, wr, &sc->l2t->l2tab[synqe->params.l2t_idx]);
560 }
561 
562 static int
create_server(struct adapter * sc,struct listen_ctx * lctx)563 create_server(struct adapter *sc, struct listen_ctx *lctx)
564 {
565 	struct wrqe *wr;
566 	struct cpl_pass_open_req *req;
567 	struct inpcb *inp = lctx->inp;
568 
569 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
570 	if (wr == NULL) {
571 		log(LOG_ERR, "%s: allocation failure", __func__);
572 		return (ENOMEM);
573 	}
574 	req = wrtod(wr);
575 
576 	INIT_TP_WR(req, 0);
577 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, lctx->stid));
578 	req->local_port = inp->inp_lport;
579 	req->peer_port = 0;
580 	req->local_ip = inp->inp_laddr.s_addr;
581 	req->peer_ip = 0;
582 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
583 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
584 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
585 
586 	t4_wrq_tx(sc, wr);
587 	return (0);
588 }
589 
590 static int
create_server6(struct adapter * sc,struct listen_ctx * lctx)591 create_server6(struct adapter *sc, struct listen_ctx *lctx)
592 {
593 	struct wrqe *wr;
594 	struct cpl_pass_open_req6 *req;
595 	struct inpcb *inp = lctx->inp;
596 
597 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
598 	if (wr == NULL) {
599 		log(LOG_ERR, "%s: allocation failure", __func__);
600 		return (ENOMEM);
601 	}
602 	req = wrtod(wr);
603 
604 	INIT_TP_WR(req, 0);
605 	OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_PASS_OPEN_REQ6, lctx->stid));
606 	req->local_port = inp->inp_lport;
607 	req->peer_port = 0;
608 	req->local_ip_hi = *(uint64_t *)&inp->in6p_laddr.s6_addr[0];
609 	req->local_ip_lo = *(uint64_t *)&inp->in6p_laddr.s6_addr[8];
610 	req->peer_ip_hi = 0;
611 	req->peer_ip_lo = 0;
612 	req->opt0 = htobe64(V_TX_CHAN(lctx->ctrlq->eq.tx_chan));
613 	req->opt1 = htobe64(V_CONN_POLICY(CPL_CONN_POLICY_ASK) |
614 	    F_SYN_RSS_ENABLE | V_SYN_RSS_QUEUE(lctx->ofld_rxq->iq.abs_id));
615 
616 	t4_wrq_tx(sc, wr);
617 	return (0);
618 }
619 
620 static int
destroy_server(struct adapter * sc,struct listen_ctx * lctx)621 destroy_server(struct adapter *sc, struct listen_ctx *lctx)
622 {
623 	struct wrqe *wr;
624 	struct cpl_close_listsvr_req *req;
625 
626 	wr = alloc_wrqe(sizeof(*req), lctx->ctrlq);
627 	if (wr == NULL) {
628 		/* XXX */
629 		panic("%s: allocation failure.", __func__);
630 	}
631 	req = wrtod(wr);
632 
633 	INIT_TP_WR(req, 0);
634 	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ,
635 	    lctx->stid));
636 	req->reply_ctrl = htobe16(lctx->ofld_rxq->iq.abs_id);
637 	req->rsvd = htobe16(0);
638 
639 	t4_wrq_tx(sc, wr);
640 	return (0);
641 }
642 
643 /*
644  * Start a listening server by sending a passive open request to HW.
645  *
646  * Can't take adapter lock here and access to sc->flags,
647  * sc->offload_map, if_capenable are all race prone.
648  */
649 int
t4_listen_start(struct toedev * tod,struct tcpcb * tp)650 t4_listen_start(struct toedev *tod, struct tcpcb *tp)
651 {
652 	struct adapter *sc = tod->tod_softc;
653 	struct vi_info *vi;
654 	struct port_info *pi;
655 	struct inpcb *inp = tptoinpcb(tp);
656 	struct listen_ctx *lctx;
657 	int i, rc, v;
658 	struct offload_settings settings;
659 
660 	INP_WLOCK_ASSERT(inp);
661 
662 	rw_rlock(&sc->policy_lock);
663 	settings = *lookup_offload_policy(sc, OPEN_TYPE_LISTEN, NULL,
664 	    EVL_MAKETAG(0xfff, 0, 0), inp);
665 	rw_runlock(&sc->policy_lock);
666 	if (!settings.offload)
667 		return (0);
668 
669 	/* Don't start a hardware listener for any loopback address. */
670 	if (inp->inp_vflag & INP_IPV6 && IN6_IS_ADDR_LOOPBACK(&inp->in6p_laddr))
671 		return (0);
672 	if (!(inp->inp_vflag & INP_IPV6) &&
673 	    IN_LOOPBACK(ntohl(inp->inp_laddr.s_addr)))
674 		return (0);
675 	if (sc->flags & KERN_TLS_ON)
676 		return (0);
677 #if 0
678 	ADAPTER_LOCK(sc);
679 	if (IS_BUSY(sc)) {
680 		log(LOG_ERR, "%s: listen request ignored, %s is busy",
681 		    __func__, device_get_nameunit(sc->dev));
682 		goto done;
683 	}
684 
685 	KASSERT(uld_active(sc, ULD_TOM),
686 	    ("%s: TOM not initialized", __func__));
687 #endif
688 
689 	/*
690 	 * Find an initialized VI with IFCAP_TOE (4 or 6).  We'll use the first
691 	 * such VI's queues to send the passive open and receive the reply to
692 	 * it.
693 	 *
694 	 * XXX: need a way to mark a port in use by offload.  if_cxgbe should
695 	 * then reject any attempt to bring down such a port (and maybe reject
696 	 * attempts to disable IFCAP_TOE on that port too?).
697 	 */
698 	for_each_port(sc, i) {
699 		pi = sc->port[i];
700 		for_each_vi(pi, v, vi) {
701 			if (vi->flags & VI_INIT_DONE &&
702 			    if_getcapenable(vi->ifp) & IFCAP_TOE)
703 				goto found;
704 		}
705 	}
706 	goto done;	/* no port that's UP with IFCAP_TOE enabled */
707 found:
708 
709 	if (listen_hash_find(sc, inp) != NULL)
710 		goto done;	/* already setup */
711 
712 	lctx = alloc_lctx(sc, inp, vi);
713 	if (lctx == NULL) {
714 		log(LOG_ERR,
715 		    "%s: listen request ignored, %s couldn't allocate lctx\n",
716 		    __func__, device_get_nameunit(sc->dev));
717 		goto done;
718 	}
719 	listen_hash_add(sc, lctx);
720 
721 	CTR6(KTR_CXGBE, "%s: stid %u (%s), lctx %p, inp %p vflag 0x%x",
722 	    __func__, lctx->stid, tcpstates[tp->t_state], lctx, inp,
723 	    inp->inp_vflag);
724 
725 	if (inp->inp_vflag & INP_IPV6)
726 		rc = create_server6(sc, lctx);
727 	else
728 		rc = create_server(sc, lctx);
729 	if (rc != 0) {
730 		log(LOG_ERR, "%s: %s failed to create hw listener: %d.\n",
731 		    __func__, device_get_nameunit(sc->dev), rc);
732 		(void) listen_hash_del(sc, inp);
733 		inp = release_lctx(sc, lctx);
734 		/* can't be freed, host stack has a reference */
735 		KASSERT(inp != NULL, ("%s: inp freed", __func__));
736 		goto done;
737 	}
738 	lctx->flags |= LCTX_RPL_PENDING;
739 done:
740 #if 0
741 	ADAPTER_UNLOCK(sc);
742 #endif
743 	return (0);
744 }
745 
746 int
t4_listen_stop(struct toedev * tod,struct tcpcb * tp)747 t4_listen_stop(struct toedev *tod, struct tcpcb *tp)
748 {
749 	struct listen_ctx *lctx;
750 	struct adapter *sc = tod->tod_softc;
751 	struct inpcb *inp = tptoinpcb(tp);
752 
753 	INP_WLOCK_ASSERT(inp);
754 
755 	lctx = listen_hash_del(sc, inp);
756 	if (lctx == NULL)
757 		return (ENOENT);	/* no hardware listener for this inp */
758 
759 	CTR4(KTR_CXGBE, "%s: stid %u, lctx %p, flags %x", __func__, lctx->stid,
760 	    lctx, lctx->flags);
761 
762 	/*
763 	 * If the reply to the PASS_OPEN is still pending we'll wait for it to
764 	 * arrive and clean up when it does.
765 	 */
766 	if (lctx->flags & LCTX_RPL_PENDING) {
767 		return (EINPROGRESS);
768 	}
769 
770 	if (lctx->flags & LCTX_SETUP_IN_HW)
771 		destroy_server(sc, lctx);
772 	else
773 		inp = release_lctx(sc, lctx);
774 	return (0);
775 }
776 
777 static inline struct synq_entry *
alloc_synqe(struct adapter * sc,struct listen_ctx * lctx,int flags)778 alloc_synqe(struct adapter *sc, struct listen_ctx *lctx, int flags)
779 {
780 	struct synq_entry *synqe;
781 
782 	INP_RLOCK_ASSERT(lctx->inp);
783 	MPASS(flags == M_WAITOK || flags == M_NOWAIT);
784 
785 	synqe = malloc(sizeof(*synqe), M_CXGBE, flags);
786 	if (__predict_true(synqe != NULL)) {
787 		synqe->flags = TPF_SYNQE;
788 		synqe->incarnation = sc->incarnation;
789 		refcount_init(&synqe->refcnt, 1);
790 		synqe->lctx = lctx;
791 		hold_lctx(lctx);	/* Every synqe has a ref on its lctx. */
792 		synqe->syn = NULL;
793 	}
794 
795 	return (synqe);
796 }
797 
798 static inline void
hold_synqe(struct synq_entry * synqe)799 hold_synqe(struct synq_entry *synqe)
800 {
801 
802 	refcount_acquire(&synqe->refcnt);
803 }
804 
805 static inline struct inpcb *
release_synqe(struct adapter * sc,struct synq_entry * synqe)806 release_synqe(struct adapter *sc, struct synq_entry *synqe)
807 {
808 	struct inpcb *inp;
809 
810 	MPASS(synqe->flags & TPF_SYNQE);
811 	MPASS(synqe->lctx != NULL);
812 
813 	inp = synqe->lctx->inp;
814 	MPASS(inp != NULL);
815 	INP_WLOCK_ASSERT(inp);
816 
817 	if (refcount_release(&synqe->refcnt)) {
818 		inp = release_lctx(sc, synqe->lctx);
819 		m_freem(synqe->syn);
820 		free(synqe, M_CXGBE);
821 	}
822 
823 	return (inp);
824 }
825 
826 void
t4_syncache_added(struct toedev * tod __unused,void * arg)827 t4_syncache_added(struct toedev *tod __unused, void *arg)
828 {
829 	struct synq_entry *synqe = arg;
830 
831 	hold_synqe(synqe);
832 }
833 
834 void
t4_syncache_removed(struct toedev * tod,void * arg)835 t4_syncache_removed(struct toedev *tod, void *arg)
836 {
837 	struct adapter *sc = tod->tod_softc;
838 	struct synq_entry *synqe = arg;
839 	struct inpcb *inp = synqe->lctx->inp;
840 
841 	/*
842 	 * XXX: this is a LOR but harmless when running from the softclock.
843 	 */
844 	INP_WLOCK(inp);
845 	inp = release_synqe(sc, synqe);
846 	if (inp != NULL)
847 		INP_WUNLOCK(inp);
848 }
849 
850 int
t4_syncache_respond(struct toedev * tod,void * arg,struct mbuf * m)851 t4_syncache_respond(struct toedev *tod, void *arg, struct mbuf *m)
852 {
853 	struct synq_entry *synqe = arg;
854 
855 	if (atomic_fetchadd_int(&synqe->ok_to_respond, 1) == 0) {
856 		struct tcpopt to;
857 		struct ip *ip = mtod(m, struct ip *);
858 		struct tcphdr *th;
859 
860 		if (ip->ip_v == IPVERSION)
861 			th = (void *)(ip + 1);
862 		else
863 			th = (void *)((struct ip6_hdr *)ip + 1);
864 		bzero(&to, sizeof(to));
865 		tcp_dooptions(&to, (void *)(th + 1),
866 		    (th->th_off << 2) - sizeof(*th), TO_SYN);
867 
868 		/* save these for later */
869 		synqe->iss = be32toh(th->th_seq);
870 		synqe->irs = be32toh(th->th_ack) - 1;
871 		synqe->ts = to.to_tsval;
872 	}
873 
874 	m_freem(m);	/* don't need this any more */
875 	return (0);
876 }
877 
878 static int
do_pass_open_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)879 do_pass_open_rpl(struct sge_iq *iq, const struct rss_header *rss,
880     struct mbuf *m)
881 {
882 	struct adapter *sc = iq->adapter;
883 	const struct cpl_pass_open_rpl *cpl = (const void *)(rss + 1);
884 	int stid = GET_TID(cpl);
885 	unsigned int status = cpl->status;
886 	struct listen_ctx *lctx = lookup_stid(sc, stid);
887 	struct inpcb *inp = lctx->inp;
888 #ifdef INVARIANTS
889 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
890 #endif
891 
892 	KASSERT(opcode == CPL_PASS_OPEN_RPL,
893 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
894 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
895 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
896 
897 	INP_WLOCK(inp);
898 
899 	CTR4(KTR_CXGBE, "%s: stid %d, status %u, flags 0x%x",
900 	    __func__, stid, status, lctx->flags);
901 
902 	lctx->flags &= ~LCTX_RPL_PENDING;
903 	if (status == CPL_ERR_NONE)
904 		lctx->flags |= LCTX_SETUP_IN_HW;
905 	else
906 		log(LOG_ERR, "listener (stid %u) failed: %d\n", stid, status);
907 
908 #ifdef INVARIANTS
909 	/*
910 	 * If the inp has been dropped (listening socket closed) then
911 	 * listen_stop must have run and taken the inp out of the hash.
912 	 */
913 	if (inp->inp_flags & INP_DROPPED) {
914 		KASSERT(listen_hash_del(sc, inp) == NULL,
915 		    ("%s: inp %p still in listen hash", __func__, inp));
916 	}
917 #endif
918 
919 	if (inp->inp_flags & INP_DROPPED && status != CPL_ERR_NONE) {
920 		if (release_lctx(sc, lctx) != NULL)
921 			INP_WUNLOCK(inp);
922 		return (status);
923 	}
924 
925 	/*
926 	 * Listening socket stopped listening earlier and now the chip tells us
927 	 * it has started the hardware listener.  Stop it; the lctx will be
928 	 * released in do_close_server_rpl.
929 	 */
930 	if (inp->inp_flags & INP_DROPPED) {
931 		destroy_server(sc, lctx);
932 		INP_WUNLOCK(inp);
933 		return (status);
934 	}
935 
936 	/*
937 	 * Failed to start hardware listener.  Take inp out of the hash and
938 	 * release our reference on it.  An error message has been logged
939 	 * already.
940 	 */
941 	if (status != CPL_ERR_NONE) {
942 		listen_hash_del(sc, inp);
943 		if (release_lctx(sc, lctx) != NULL)
944 			INP_WUNLOCK(inp);
945 		return (status);
946 	}
947 
948 	/* hardware listener open for business */
949 
950 	INP_WUNLOCK(inp);
951 	return (status);
952 }
953 
954 static int
do_close_server_rpl(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)955 do_close_server_rpl(struct sge_iq *iq, const struct rss_header *rss,
956     struct mbuf *m)
957 {
958 	struct adapter *sc = iq->adapter;
959 	const struct cpl_close_listsvr_rpl *cpl = (const void *)(rss + 1);
960 	int stid = GET_TID(cpl);
961 	unsigned int status = cpl->status;
962 	struct listen_ctx *lctx = lookup_stid(sc, stid);
963 	struct inpcb *inp = lctx->inp;
964 #ifdef INVARIANTS
965 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
966 #endif
967 
968 	KASSERT(opcode == CPL_CLOSE_LISTSRV_RPL,
969 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
970 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
971 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
972 
973 	CTR3(KTR_CXGBE, "%s: stid %u, status %u", __func__, stid, status);
974 
975 	if (status != CPL_ERR_NONE) {
976 		log(LOG_ERR, "%s: failed (%u) to close listener for stid %u\n",
977 		    __func__, status, stid);
978 		return (status);
979 	}
980 
981 	INP_WLOCK(inp);
982 	inp = release_lctx(sc, lctx);
983 	if (inp != NULL)
984 		INP_WUNLOCK(inp);
985 
986 	return (status);
987 }
988 
989 static void
done_with_synqe(struct adapter * sc,struct synq_entry * synqe)990 done_with_synqe(struct adapter *sc, struct synq_entry *synqe)
991 {
992 	struct tom_data *td = sc->tom_softc;
993 	struct listen_ctx *lctx = synqe->lctx;
994 	struct inpcb *inp = lctx->inp;
995 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
996 	int ntids;
997 
998 	INP_WLOCK_ASSERT(inp);
999 
1000 	if (synqe->tid != -1) {
1001 		ntids = inp->inp_vflag & INP_IPV6 ? 2 : 1;
1002 		remove_tid(sc, synqe->tid, ntids);
1003 		mtx_lock(&td->toep_list_lock);
1004 		TAILQ_REMOVE(&td->synqe_list, synqe, link);
1005 		mtx_unlock(&td->toep_list_lock);
1006 		release_tid(sc, synqe->tid, lctx->ctrlq);
1007 	}
1008 	t4_l2t_release(e);
1009 	inp = release_synqe(sc, synqe);
1010 	if (inp)
1011 		INP_WUNLOCK(inp);
1012 }
1013 
1014 void
synack_failure_cleanup(struct adapter * sc,struct synq_entry * synqe)1015 synack_failure_cleanup(struct adapter *sc, struct synq_entry *synqe)
1016 {
1017 	INP_WLOCK(synqe->lctx->inp);
1018 	done_with_synqe(sc, synqe);
1019 }
1020 
1021 int
do_abort_req_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1022 do_abort_req_synqe(struct sge_iq *iq, const struct rss_header *rss,
1023     struct mbuf *m)
1024 {
1025 	struct adapter *sc = iq->adapter;
1026 	const struct cpl_abort_req_rss *cpl = (const void *)(rss + 1);
1027 	unsigned int tid = GET_TID(cpl);
1028 	struct synq_entry *synqe = lookup_tid(sc, tid);
1029 	struct listen_ctx *lctx = synqe->lctx;
1030 	struct inpcb *inp = lctx->inp;
1031 	struct sge_ofld_txq *ofld_txq;
1032 #ifdef INVARIANTS
1033 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1034 #endif
1035 
1036 	KASSERT(opcode == CPL_ABORT_REQ_RSS,
1037 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1038 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1039 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1040 
1041 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1042 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1043 
1044 	if (negative_advice(cpl->status))
1045 		return (0);	/* Ignore negative advice */
1046 
1047 	INP_WLOCK(inp);
1048 
1049 	ofld_txq = &sc->sge.ofld_txq[synqe->params.txq_idx];
1050 
1051 	if (!(synqe->flags & TPF_FLOWC_WR_SENT))
1052 		send_flowc_wr_synqe(sc, synqe);
1053 
1054 	/*
1055 	 * If we'd initiated an abort earlier the reply to it is responsible for
1056 	 * cleaning up resources.  Otherwise we tear everything down right here
1057 	 * right now.  We owe the T4 a CPL_ABORT_RPL no matter what.
1058 	 */
1059 	if (synqe->flags & TPF_ABORT_SHUTDOWN) {
1060 		INP_WUNLOCK(inp);
1061 		goto done;
1062 	}
1063 
1064 	done_with_synqe(sc, synqe);
1065 	/* inp lock released by done_with_synqe */
1066 done:
1067 	send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST);
1068 	return (0);
1069 }
1070 
1071 int
do_abort_rpl_synqe(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1072 do_abort_rpl_synqe(struct sge_iq *iq, const struct rss_header *rss,
1073     struct mbuf *m)
1074 {
1075 	struct adapter *sc = iq->adapter;
1076 	const struct cpl_abort_rpl_rss *cpl = (const void *)(rss + 1);
1077 	unsigned int tid = GET_TID(cpl);
1078 	struct synq_entry *synqe = lookup_tid(sc, tid);
1079 	struct listen_ctx *lctx = synqe->lctx;
1080 	struct inpcb *inp = lctx->inp;
1081 #ifdef INVARIANTS
1082 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1083 #endif
1084 
1085 	KASSERT(opcode == CPL_ABORT_RPL_RSS,
1086 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1087 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1088 	KASSERT(synqe->tid == tid, ("%s: toep tid mismatch", __func__));
1089 
1090 	CTR6(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x), lctx %p, status %d",
1091 	    __func__, tid, synqe, synqe->flags, synqe->lctx, cpl->status);
1092 
1093 	INP_WLOCK(inp);
1094 	KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
1095 	    ("%s: wasn't expecting abort reply for synqe %p (0x%x)",
1096 	    __func__, synqe, synqe->flags));
1097 
1098 	done_with_synqe(sc, synqe);
1099 	/* inp lock released by done_with_synqe */
1100 
1101 	return (0);
1102 }
1103 
1104 void
t4_offload_socket(struct toedev * tod,void * arg,struct socket * so)1105 t4_offload_socket(struct toedev *tod, void *arg, struct socket *so)
1106 {
1107 	struct adapter *sc = tod->tod_softc;
1108 	struct tom_data *td = sc->tom_softc;
1109 	struct synq_entry *synqe = arg;
1110 	struct inpcb *inp = sotoinpcb(so);
1111 	struct toepcb *toep = synqe->toep;
1112 
1113 	NET_EPOCH_ASSERT();	/* prevents bad race with accept() */
1114 	INP_WLOCK_ASSERT(inp);
1115 	KASSERT(synqe->flags & TPF_SYNQE,
1116 	    ("%s: %p not a synq_entry?", __func__, arg));
1117 	MPASS(toep->tid == synqe->tid);
1118 
1119 	offload_socket(so, toep);
1120 	make_established(toep, synqe->iss, synqe->irs, synqe->tcp_opt);
1121 	toep->flags |= TPF_CPL_PENDING;
1122 	update_tid(sc, synqe->tid, toep);
1123 	synqe->flags |= TPF_SYNQE_EXPANDED;
1124 	mtx_lock(&td->toep_list_lock);
1125 	/* Remove synqe from its list and add the TOE PCB to the active list. */
1126 	TAILQ_REMOVE(&td->synqe_list, synqe, link);
1127 	TAILQ_INSERT_TAIL(&td->toep_list, toep, link);
1128 	toep->flags |= TPF_IN_TOEP_LIST;
1129 	mtx_unlock(&td->toep_list_lock);
1130 	inp->inp_flowtype = (inp->inp_vflag & INP_IPV6) ?
1131 	    M_HASHTYPE_RSS_TCP_IPV6 : M_HASHTYPE_RSS_TCP_IPV4;
1132 	inp->inp_flowid = synqe->rss_hash;
1133 }
1134 
1135 static void
t4opt_to_tcpopt(const struct tcp_options * t4opt,struct tcpopt * to)1136 t4opt_to_tcpopt(const struct tcp_options *t4opt, struct tcpopt *to)
1137 {
1138 	bzero(to, sizeof(*to));
1139 
1140 	if (t4opt->mss) {
1141 		to->to_flags |= TOF_MSS;
1142 		to->to_mss = be16toh(t4opt->mss);
1143 	}
1144 
1145 	if (t4opt->wsf > 0 && t4opt->wsf < 15) {
1146 		to->to_flags |= TOF_SCALE;
1147 		to->to_wscale = t4opt->wsf;
1148 	}
1149 
1150 	if (t4opt->tstamp)
1151 		to->to_flags |= TOF_TS;
1152 
1153 	if (t4opt->sack)
1154 		to->to_flags |= TOF_SACKPERM;
1155 }
1156 
1157 static bool
encapsulated_syn(struct adapter * sc,const struct cpl_pass_accept_req * cpl)1158 encapsulated_syn(struct adapter *sc, const struct cpl_pass_accept_req *cpl)
1159 {
1160 	u_int hlen = be32toh(cpl->hdr_len);
1161 
1162 	if (chip_id(sc) >= CHELSIO_T6)
1163 		return (G_T6_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1164 	else
1165 		return (G_ETH_HDR_LEN(hlen) > sizeof(struct ether_vlan_header));
1166 }
1167 
1168 static void
pass_accept_req_to_protohdrs(struct adapter * sc,const struct mbuf * m,struct in_conninfo * inc,struct tcphdr * th,uint8_t * iptos)1169 pass_accept_req_to_protohdrs(struct adapter *sc, const struct mbuf *m,
1170     struct in_conninfo *inc, struct tcphdr *th, uint8_t *iptos)
1171 {
1172 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1173 	const struct ether_header *eh;
1174 	unsigned int hlen = be32toh(cpl->hdr_len);
1175 	uintptr_t l3hdr;
1176 	const struct tcphdr *tcp;
1177 
1178 	eh = (const void *)(cpl + 1);
1179 	if (chip_id(sc) >= CHELSIO_T6) {
1180 		l3hdr = ((uintptr_t)eh + G_T6_ETH_HDR_LEN(hlen));
1181 		tcp = (const void *)(l3hdr + G_T6_IP_HDR_LEN(hlen));
1182 	} else {
1183 		l3hdr = ((uintptr_t)eh + G_ETH_HDR_LEN(hlen));
1184 		tcp = (const void *)(l3hdr + G_IP_HDR_LEN(hlen));
1185 	}
1186 
1187 	/* extract TOS (DiffServ + ECN) byte for AccECN */
1188 	if (iptos) {
1189 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1190 			const struct ip *ip = (const void *)l3hdr;
1191 			*iptos = ip->ip_tos;
1192 		}
1193 #ifdef INET6
1194 		else
1195 		if (((struct ip *)l3hdr)->ip_v == (IPV6_VERSION >> 4)) {
1196 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1197 			*iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1198 		}
1199 #endif /* INET */
1200 	}
1201 
1202 	if (inc) {
1203 		bzero(inc, sizeof(*inc));
1204 		inc->inc_fport = tcp->th_sport;
1205 		inc->inc_lport = tcp->th_dport;
1206 		if (((struct ip *)l3hdr)->ip_v == IPVERSION) {
1207 			const struct ip *ip = (const void *)l3hdr;
1208 
1209 			inc->inc_faddr = ip->ip_src;
1210 			inc->inc_laddr = ip->ip_dst;
1211 		} else {
1212 			const struct ip6_hdr *ip6 = (const void *)l3hdr;
1213 
1214 			inc->inc_flags |= INC_ISIPV6;
1215 			inc->inc6_faddr = ip6->ip6_src;
1216 			inc->inc6_laddr = ip6->ip6_dst;
1217 		}
1218 	}
1219 
1220 	if (th) {
1221 		bcopy(tcp, th, sizeof(*th));
1222 		tcp_fields_to_host(th);		/* just like tcp_input */
1223 	}
1224 }
1225 
1226 static struct l2t_entry *
get_l2te_for_nexthop(struct port_info * pi,if_t ifp,struct in_conninfo * inc)1227 get_l2te_for_nexthop(struct port_info *pi, if_t ifp,
1228     struct in_conninfo *inc)
1229 {
1230 	struct l2t_entry *e;
1231 	struct sockaddr_in6 sin6;
1232 	struct sockaddr *dst = (void *)&sin6;
1233 	struct nhop_object *nh;
1234 
1235 	if (inc->inc_flags & INC_ISIPV6) {
1236 		bzero(dst, sizeof(struct sockaddr_in6));
1237 		dst->sa_len = sizeof(struct sockaddr_in6);
1238 		dst->sa_family = AF_INET6;
1239 
1240 		if (IN6_IS_ADDR_LINKLOCAL(&inc->inc6_laddr)) {
1241 			/* no need for route lookup */
1242 			e = t4_l2t_get(pi, ifp, dst);
1243 			return (e);
1244 		}
1245 
1246 		nh = fib6_lookup(RT_DEFAULT_FIB, &inc->inc6_faddr, 0, NHR_NONE, 0);
1247 		if (nh == NULL)
1248 			return (NULL);
1249 		if (nh->nh_ifp != ifp)
1250 			return (NULL);
1251 		if (nh->nh_flags & NHF_GATEWAY)
1252 			((struct sockaddr_in6 *)dst)->sin6_addr = nh->gw6_sa.sin6_addr;
1253 		else
1254 			((struct sockaddr_in6 *)dst)->sin6_addr = inc->inc6_faddr;
1255 	} else {
1256 		dst->sa_len = sizeof(struct sockaddr_in);
1257 		dst->sa_family = AF_INET;
1258 
1259 		nh = fib4_lookup(RT_DEFAULT_FIB, inc->inc_faddr, 0, NHR_NONE, 0);
1260 		if (nh == NULL)
1261 			return (NULL);
1262 		if (nh->nh_ifp != ifp)
1263 			return (NULL);
1264 		if (nh->nh_flags & NHF_GATEWAY)
1265 			if (nh->gw_sa.sa_family == AF_INET)
1266 				((struct sockaddr_in *)dst)->sin_addr = nh->gw4_sa.sin_addr;
1267 			else
1268 				*((struct sockaddr_in6 *)dst) = nh->gw6_sa;
1269 		else
1270 			((struct sockaddr_in *)dst)->sin_addr = inc->inc_faddr;
1271 	}
1272 
1273 	e = t4_l2t_get(pi, ifp, dst);
1274 	return (e);
1275 }
1276 
1277 static int
send_synack(struct adapter * sc,struct synq_entry * synqe,uint64_t opt0,uint32_t opt2,int tid)1278 send_synack(struct adapter *sc, struct synq_entry *synqe, uint64_t opt0,
1279     uint32_t opt2, int tid)
1280 {
1281 	struct wrqe *wr;
1282 	struct cpl_pass_accept_rpl *rpl;
1283 	struct l2t_entry *e = &sc->l2t->l2tab[synqe->params.l2t_idx];
1284 
1285 	wr = alloc_wrqe(is_t4(sc) ? sizeof(struct cpl_pass_accept_rpl) :
1286 	    sizeof(struct cpl_t5_pass_accept_rpl), &sc->sge.ctrlq[0]);
1287 	if (wr == NULL)
1288 		return (ENOMEM);
1289 	rpl = wrtod(wr);
1290 
1291 	if (is_t4(sc))
1292 		INIT_TP_WR_MIT_CPL(rpl, CPL_PASS_ACCEPT_RPL, tid);
1293 	else {
1294 		struct cpl_t5_pass_accept_rpl *rpl5 = (void *)rpl;
1295 
1296 		INIT_TP_WR_MIT_CPL(rpl5, CPL_PASS_ACCEPT_RPL, tid);
1297 		rpl5->iss = htobe32(synqe->iss);
1298 	}
1299 	rpl->opt0 = opt0;
1300 	rpl->opt2 = opt2;
1301 
1302 	return (t4_l2t_send(sc, wr, e));
1303 }
1304 
1305 #define REJECT_PASS_ACCEPT_REQ(tunnel)	do { \
1306 	if (!tunnel) { \
1307 		m_freem(m); \
1308 		m = NULL; \
1309 	} \
1310 	reject_reason = __LINE__; \
1311 	goto reject; \
1312 } while (0)
1313 
1314 /*
1315  * The context associated with a tid entry via insert_tid could be a synq_entry
1316  * or a toepcb.  The only way CPL handlers can tell is via a bit in these flags.
1317  */
1318 CTASSERT(offsetof(struct toepcb, flags) == offsetof(struct synq_entry, flags));
1319 
1320 /*
1321  * Incoming SYN on a listening socket.
1322  *
1323  * XXX: Every use of ifp in this routine has a bad race with up/down, toe/-toe,
1324  * etc.
1325  */
1326 static int
do_pass_accept_req(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1327 do_pass_accept_req(struct sge_iq *iq, const struct rss_header *rss,
1328     struct mbuf *m)
1329 {
1330 	struct adapter *sc = iq->adapter;
1331 	struct tom_data *td = sc->tom_softc;
1332 	struct toedev *tod;
1333 	const struct cpl_pass_accept_req *cpl = mtod(m, const void *);
1334 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1335 	unsigned int tid = GET_TID(cpl);
1336 	struct listen_ctx *lctx = lookup_stid(sc, stid);
1337 	struct inpcb *inp;
1338 	struct socket *so;
1339 	struct in_conninfo inc;
1340 	struct tcphdr th;
1341 	struct tcpopt to;
1342 	struct port_info *pi;
1343 	struct vi_info *vi;
1344 	if_t hw_ifp, ifp;
1345 	struct l2t_entry *e = NULL;
1346 	struct synq_entry *synqe = NULL;
1347 	int reject_reason, v, ntids;
1348 	uint16_t vid, l2info;
1349 	struct epoch_tracker et;
1350 #ifdef INVARIANTS
1351 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1352 #endif
1353 	struct offload_settings settings;
1354 	uint8_t iptos;
1355 
1356 	KASSERT(opcode == CPL_PASS_ACCEPT_REQ,
1357 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1358 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1359 
1360 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p", __func__, stid, tid,
1361 	    lctx);
1362 
1363 	/*
1364 	 * Figure out the port the SYN arrived on.  We'll look for an exact VI
1365 	 * match in a bit but in case we don't find any we'll use the main VI as
1366 	 * the incoming ifnet.
1367 	 */
1368 	l2info = be16toh(cpl->l2info);
1369 	pi = sc->port[G_SYN_INTF(l2info)];
1370 	hw_ifp = pi->vi[0].ifp;
1371 	m->m_pkthdr.rcvif = hw_ifp;
1372 
1373 	CURVNET_SET(lctx->vnet);	/* before any potential REJECT */
1374 
1375 	/*
1376 	 * If VXLAN/NVGRE parsing is enabled then SYNs in the inner traffic will
1377 	 * also hit the listener.  We don't want to offload those.
1378 	 */
1379 	if (encapsulated_syn(sc, cpl)) {
1380 		REJECT_PASS_ACCEPT_REQ(true);
1381 	}
1382 
1383 	/*
1384 	 * Use the MAC index to lookup the associated VI.  If this SYN didn't
1385 	 * match a perfect MAC filter, punt.
1386 	 */
1387 	if (!(l2info & F_SYN_XACT_MATCH)) {
1388 		REJECT_PASS_ACCEPT_REQ(true);
1389 	}
1390 	for_each_vi(pi, v, vi) {
1391 		if (vi->xact_addr_filt == G_SYN_MAC_IDX(l2info))
1392 			goto found;
1393 	}
1394 	REJECT_PASS_ACCEPT_REQ(true);
1395 found:
1396 	hw_ifp = vi->ifp;	/* the cxgbe ifnet */
1397 	m->m_pkthdr.rcvif = hw_ifp;
1398 	tod = TOEDEV(hw_ifp);
1399 
1400 	/*
1401 	 * Don't offload if the peer requested a TCP option that's not known to
1402 	 * the silicon.  Send the SYN to the kernel instead.
1403 	 */
1404 	if (__predict_false(cpl->tcpopt.unknown))
1405 		REJECT_PASS_ACCEPT_REQ(true);
1406 
1407 	/*
1408 	 * Figure out if there is a pseudo interface (vlan, lagg, etc.)
1409 	 * involved.  Don't offload if the SYN had a VLAN tag and the vid
1410 	 * doesn't match anything on this interface.
1411 	 *
1412 	 * XXX: lagg support, lagg + vlan support.
1413 	 */
1414 	vid = EVL_VLANOFTAG(be16toh(cpl->vlan));
1415 	if (vid != 0xfff && vid != 0) {
1416 		ifp = VLAN_DEVAT(hw_ifp, vid);
1417 		if (ifp == NULL)
1418 			REJECT_PASS_ACCEPT_REQ(true);
1419 	} else
1420 		ifp = hw_ifp;
1421 
1422 	/*
1423 	 * Don't offload if the ifnet that the SYN came in on is not in the same
1424 	 * vnet as the listening socket.
1425 	 */
1426 	if (lctx->vnet != if_getvnet(ifp))
1427 		REJECT_PASS_ACCEPT_REQ(true);
1428 
1429 	pass_accept_req_to_protohdrs(sc, m, &inc, &th, &iptos);
1430 	if (inc.inc_flags & INC_ISIPV6) {
1431 
1432 		/* Don't offload if the ifcap isn't enabled */
1433 		if ((if_getcapenable(ifp) & IFCAP_TOE6) == 0)
1434 			REJECT_PASS_ACCEPT_REQ(true);
1435 
1436 		/*
1437 		 * SYN must be directed to an IP6 address on this ifnet.  This
1438 		 * is more restrictive than in6_localip.
1439 		 */
1440 		NET_EPOCH_ENTER(et);
1441 		if (!in6_ifhasaddr(ifp, &inc.inc6_laddr)) {
1442 			NET_EPOCH_EXIT(et);
1443 			REJECT_PASS_ACCEPT_REQ(true);
1444 		}
1445 
1446 		ntids = 2;
1447 	} else {
1448 
1449 		/* Don't offload if the ifcap isn't enabled */
1450 		if ((if_getcapenable(ifp) & IFCAP_TOE4) == 0)
1451 			REJECT_PASS_ACCEPT_REQ(true);
1452 
1453 		/*
1454 		 * SYN must be directed to an IP address on this ifnet.  This
1455 		 * is more restrictive than in_localip.
1456 		 */
1457 		NET_EPOCH_ENTER(et);
1458 		if (!in_ifhasaddr(ifp, inc.inc_laddr)) {
1459 			NET_EPOCH_EXIT(et);
1460 			REJECT_PASS_ACCEPT_REQ(true);
1461 		}
1462 
1463 		ntids = 1;
1464 	}
1465 
1466 	e = get_l2te_for_nexthop(pi, ifp, &inc);
1467 	if (e == NULL) {
1468 		NET_EPOCH_EXIT(et);
1469 		REJECT_PASS_ACCEPT_REQ(true);
1470 	}
1471 
1472 	/* Don't offload if the 4-tuple is already in use */
1473 	if (toe_4tuple_check(&inc, &th, ifp) != 0) {
1474 		NET_EPOCH_EXIT(et);
1475 		REJECT_PASS_ACCEPT_REQ(false);
1476 	}
1477 
1478 	inp = lctx->inp;		/* listening socket, not owned by TOE */
1479 	INP_RLOCK(inp);
1480 
1481 	/* Don't offload if the listening socket has closed */
1482 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1483 		INP_RUNLOCK(inp);
1484 		NET_EPOCH_EXIT(et);
1485 		REJECT_PASS_ACCEPT_REQ(false);
1486 	}
1487 	so = inp->inp_socket;
1488 	rw_rlock(&sc->policy_lock);
1489 	settings = *lookup_offload_policy(sc, OPEN_TYPE_PASSIVE, m,
1490 	    EVL_MAKETAG(0xfff, 0, 0), inp);
1491 	rw_runlock(&sc->policy_lock);
1492 	if (!settings.offload) {
1493 		INP_RUNLOCK(inp);
1494 		NET_EPOCH_EXIT(et);
1495 		REJECT_PASS_ACCEPT_REQ(true);	/* Rejected by COP. */
1496 	}
1497 
1498 	synqe = alloc_synqe(sc, lctx, M_NOWAIT);
1499 	if (synqe == NULL) {
1500 		INP_RUNLOCK(inp);
1501 		NET_EPOCH_EXIT(et);
1502 		REJECT_PASS_ACCEPT_REQ(true);
1503 	}
1504 	MPASS(rss->hash_type == RSS_HASH_TCP);
1505 	synqe->rss_hash = be32toh(rss->hash_val);
1506 	atomic_store_int(&synqe->ok_to_respond, 0);
1507 
1508 	init_conn_params(vi, &settings, &inc, so, &cpl->tcpopt, e->idx,
1509 	    &synqe->params);
1510 
1511 	/*
1512 	 * If all goes well t4_syncache_respond will get called during
1513 	 * syncache_add.  Note that syncache_add releases the pcb lock.
1514 	 */
1515 	t4opt_to_tcpopt(&cpl->tcpopt, &to);
1516 	toe_syncache_add(&inc, &to, &th, inp, tod, synqe, iptos);
1517 
1518 	if (atomic_load_int(&synqe->ok_to_respond) > 0) {
1519 		uint64_t opt0;
1520 		uint32_t opt2;
1521 
1522 		opt0 = calc_options0(vi, &synqe->params);
1523 		opt2 = calc_options2(vi, &synqe->params);
1524 
1525 		insert_tid(sc, tid, synqe, ntids);
1526 		synqe->tid = tid;
1527 		synqe->syn = m;
1528 		m = NULL;
1529 		mtx_lock(&td->toep_list_lock);
1530 		TAILQ_INSERT_TAIL(&td->synqe_list, synqe, link);
1531 		mtx_unlock(&td->toep_list_lock);
1532 
1533 		if (send_synack(sc, synqe, opt0, opt2, tid) != 0) {
1534 			remove_tid(sc, tid, ntids);
1535 			m = synqe->syn;
1536 			synqe->syn = NULL;
1537 			mtx_lock(&td->toep_list_lock);
1538 			TAILQ_REMOVE(&td->synqe_list, synqe, link);
1539 			mtx_unlock(&td->toep_list_lock);
1540 			NET_EPOCH_EXIT(et);
1541 			REJECT_PASS_ACCEPT_REQ(true);
1542 		}
1543 		CTR6(KTR_CXGBE,
1544 		    "%s: stid %u, tid %u, synqe %p, opt0 %#016lx, opt2 %#08x",
1545 		    __func__, stid, tid, synqe, be64toh(opt0), be32toh(opt2));
1546 	} else {
1547 		NET_EPOCH_EXIT(et);
1548 		REJECT_PASS_ACCEPT_REQ(false);
1549 	}
1550 
1551 	NET_EPOCH_EXIT(et);
1552 	CURVNET_RESTORE();
1553 	return (0);
1554 reject:
1555 	CURVNET_RESTORE();
1556 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
1557 	    reject_reason);
1558 
1559 	if (e)
1560 		t4_l2t_release(e);
1561 	release_tid(sc, tid, lctx->ctrlq);
1562 	if (synqe) {
1563 		inp = synqe->lctx->inp;
1564 		INP_WLOCK(inp);
1565 		inp = release_synqe(sc, synqe);
1566 		if (inp)
1567 			INP_WUNLOCK(inp);
1568 	}
1569 
1570 	if (m) {
1571 		/*
1572 		 * The connection request hit a TOE listener but is being passed
1573 		 * on to the kernel sw stack instead of getting offloaded.
1574 		 */
1575 		m_adj(m, sizeof(*cpl));
1576 		m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID |
1577 		    CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1578 		m->m_pkthdr.csum_data = 0xffff;
1579 		if_input(hw_ifp, m);
1580 	}
1581 
1582 	return (reject_reason);
1583 }
1584 
1585 static void
synqe_to_protohdrs(struct adapter * sc,struct synq_entry * synqe,const struct cpl_pass_establish * cpl,struct in_conninfo * inc,struct tcphdr * th,struct tcpopt * to)1586 synqe_to_protohdrs(struct adapter *sc, struct synq_entry *synqe,
1587     const struct cpl_pass_establish *cpl, struct in_conninfo *inc,
1588     struct tcphdr *th, struct tcpopt *to)
1589 {
1590 	uint16_t tcp_opt = be16toh(cpl->tcp_opt);
1591 	uint8_t iptos;
1592 
1593 	/* start off with the original SYN */
1594 	pass_accept_req_to_protohdrs(sc, synqe->syn, inc, th, &iptos);
1595 
1596 	/* modify parts to make it look like the ACK to our SYN|ACK */
1597 	tcp_set_flags(th, TH_ACK);
1598 	th->th_ack = synqe->iss + 1;
1599 	th->th_seq = be32toh(cpl->rcv_isn);
1600 	bzero(to, sizeof(*to));
1601 	if (G_TCPOPT_TSTAMP(tcp_opt)) {
1602 		to->to_flags |= TOF_TS;
1603 		to->to_tsecr = synqe->ts;
1604 	}
1605 }
1606 
1607 static int
do_pass_establish(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)1608 do_pass_establish(struct sge_iq *iq, const struct rss_header *rss,
1609     struct mbuf *m)
1610 {
1611 	struct adapter *sc = iq->adapter;
1612 	struct vi_info *vi;
1613 	if_t ifp;
1614 	const struct cpl_pass_establish *cpl = (const void *)(rss + 1);
1615 #if defined(KTR) || defined(INVARIANTS)
1616 	unsigned int stid = G_PASS_OPEN_TID(be32toh(cpl->tos_stid));
1617 #endif
1618 	unsigned int tid = GET_TID(cpl);
1619 	struct synq_entry *synqe = lookup_tid(sc, tid);
1620 	struct listen_ctx *lctx = synqe->lctx;
1621 	struct inpcb *inp = lctx->inp, *new_inp;
1622 	struct socket *so;
1623 	struct tcphdr th;
1624 	struct tcpopt to;
1625 	struct in_conninfo inc;
1626 	struct toepcb *toep;
1627 	struct epoch_tracker et;
1628 	int rstreason;
1629 #ifdef INVARIANTS
1630 	unsigned int opcode = G_CPL_OPCODE(be32toh(OPCODE_TID(cpl)));
1631 #endif
1632 
1633 	KASSERT(opcode == CPL_PASS_ESTABLISH,
1634 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
1635 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
1636 	KASSERT(lctx->stid == stid, ("%s: lctx stid mismatch", __func__));
1637 	KASSERT(synqe->flags & TPF_SYNQE,
1638 	    ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe));
1639 
1640 	CURVNET_SET(lctx->vnet);
1641 	NET_EPOCH_ENTER(et);	/* for syncache_expand */
1642 	INP_WLOCK(inp);
1643 
1644 	CTR6(KTR_CXGBE,
1645 	    "%s: stid %u, tid %u, synqe %p (0x%x), inp_flags 0x%x",
1646 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
1647 
1648 	ifp = synqe->syn->m_pkthdr.rcvif;
1649 	vi = if_getsoftc(ifp);
1650 	KASSERT(vi->adapter == sc,
1651 	    ("%s: vi %p, sc %p mismatch", __func__, vi, sc));
1652 
1653 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
1654 reset:
1655 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_SEND_RST);
1656 		INP_WUNLOCK(inp);
1657 		NET_EPOCH_EXIT(et);
1658 		CURVNET_RESTORE();
1659 		return (0);
1660 	}
1661 
1662 	KASSERT(synqe->params.rxq_idx == iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0],
1663 	    ("%s: CPL arrived on unexpected rxq.  %d %d", __func__,
1664 	    synqe->params.rxq_idx,
1665 	    (int)(iq_to_ofld_rxq(iq) - &sc->sge.ofld_rxq[0])));
1666 
1667 	toep = alloc_toepcb(vi, M_NOWAIT);
1668 	if (toep == NULL)
1669 		goto reset;
1670 	toep->tid = tid;
1671 	toep->l2te = &sc->l2t->l2tab[synqe->params.l2t_idx];
1672 	toep->vnet = lctx->vnet;
1673 	bcopy(&synqe->params, &toep->params, sizeof(toep->params));
1674 	init_toepcb(vi, toep);
1675 
1676 	MPASS(be32toh(cpl->snd_isn) - 1 == synqe->iss);
1677 	MPASS(be32toh(cpl->rcv_isn) - 1 == synqe->irs);
1678 	synqe->tcp_opt = cpl->tcp_opt;
1679 	synqe->toep = toep;
1680 
1681 	/* Come up with something that syncache_expand should be ok with. */
1682 	synqe_to_protohdrs(sc, synqe, cpl, &inc, &th, &to);
1683 	if (inc.inc_flags & INC_ISIPV6) {
1684 		if (lctx->ce == NULL) {
1685 			toep->ce = t4_get_clip_entry(sc, &inc.inc6_laddr, true);
1686 			if (toep->ce == NULL) {
1687 				free_toepcb(toep);
1688 				goto reset;	/* RST without a CLIP entry? */
1689 			}
1690 		} else {
1691 			t4_hold_clip_entry(sc, lctx->ce);
1692 			toep->ce = lctx->ce;
1693 		}
1694 	}
1695 	so = inp->inp_socket;
1696 	KASSERT(so != NULL, ("%s: socket is NULL", __func__));
1697 
1698 	rstreason = toe_syncache_expand(&inc, &to, &th, &so);
1699 	if (rstreason < 0) {
1700 		free_toepcb(toep);
1701 		send_abort_rpl_synqe(TOEDEV(ifp), synqe, CPL_ABORT_NO_RST);
1702 		INP_WUNLOCK(inp);
1703 		NET_EPOCH_EXIT(et);
1704 		CURVNET_RESTORE();
1705 		return (0);
1706 	} else if (rstreason == 0 || so == NULL) {
1707 		free_toepcb(toep);
1708 		goto reset;
1709 	}
1710 
1711 	/* New connection inpcb is already locked by syncache_expand(). */
1712 	new_inp = sotoinpcb(so);
1713 	INP_WLOCK_ASSERT(new_inp);
1714 	MPASS(so->so_vnet == lctx->vnet);
1715 
1716 	/*
1717 	 * This is for expansion from syncookies.
1718 	 *
1719 	 * XXX: we've held the tcbinfo lock throughout so there's no risk of
1720 	 * anyone accept'ing a connection before we've installed our hooks, but
1721 	 * this somewhat defeats the purpose of having a tod_offload_socket :-(
1722 	 */
1723 	if (__predict_false(!(synqe->flags & TPF_SYNQE_EXPANDED))) {
1724 		tcp_timer_activate(intotcpcb(new_inp), TT_KEEP, 0);
1725 		t4_offload_socket(TOEDEV(ifp), synqe, so);
1726 	}
1727 
1728 	INP_WUNLOCK(new_inp);
1729 
1730 	/* Done with the synqe */
1731 	inp = release_synqe(sc, synqe);
1732 	if (inp != NULL)
1733 		INP_WUNLOCK(inp);
1734 	NET_EPOCH_EXIT(et);
1735 	CURVNET_RESTORE();
1736 
1737 	return (0);
1738 }
1739 
1740 void
t4_init_listen_cpl_handlers(void)1741 t4_init_listen_cpl_handlers(void)
1742 {
1743 
1744 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
1745 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
1746 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req);
1747 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish);
1748 }
1749 
1750 void
t4_uninit_listen_cpl_handlers(void)1751 t4_uninit_listen_cpl_handlers(void)
1752 {
1753 
1754 	t4_register_cpl_handler(CPL_PASS_OPEN_RPL, NULL);
1755 	t4_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, NULL);
1756 	t4_register_cpl_handler(CPL_PASS_ACCEPT_REQ, NULL);
1757 	t4_register_cpl_handler(CPL_PASS_ESTABLISH, NULL);
1758 }
1759 #endif
1760