xref: /freebsd/sys/dev/cxgbe/tom/t4_tls.c (revision e2fae07e093bf4953ad7359b2aa9ca62b0d76adf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2017-2018 Chelsio Communications, Inc.
5  * All rights reserved.
6  * Written by: John Baldwin <jhb@FreeBSD.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include "opt_inet.h"
31 #include "opt_kern_tls.h"
32 
33 #include <sys/cdefs.h>
34 #ifdef KERN_TLS
35 #include <sys/param.h>
36 #include <sys/ktr.h>
37 #include <sys/ktls.h>
38 #include <sys/sglist.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
41 #include <sys/systm.h>
42 #include <netinet/in.h>
43 #include <netinet/in_pcb.h>
44 #include <netinet/tcp_var.h>
45 #include <netinet/toecore.h>
46 #include <opencrypto/cryptodev.h>
47 #include <opencrypto/xform.h>
48 
49 #ifdef TCP_OFFLOAD
50 #include "common/common.h"
51 #include "common/t4_tcb.h"
52 #include "crypto/t4_crypto.h"
53 #include "tom/t4_tom_l2t.h"
54 #include "tom/t4_tom.h"
55 
56 /*
57  * The TCP sequence number of a CPL_TLS_DATA mbuf is saved here while
58  * the mbuf is in the ulp_pdu_reclaimq.
59  */
60 #define	tls_tcp_seq	PH_loc.thirtytwo[0]
61 
62 static void
63 t4_set_tls_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask,
64     uint64_t val, int reply, int cookie)
65 {
66 	struct adapter *sc = td_adapter(toep->td);
67 	struct mbuf *m;
68 
69 	m = alloc_raw_wr_mbuf(sizeof(struct cpl_set_tcb_field));
70 	if (m == NULL) {
71 		/* XXX */
72 		panic("%s: out of memory", __func__);
73 	}
74 
75 	write_set_tcb_field(sc, mtod(m, void *), toep, word, mask, val, reply,
76 	    cookie);
77 
78 	t4_raw_wr_tx(sc, toep, m);
79 }
80 
81 /* TLS and DTLS common routines */
82 bool
83 can_tls_offload(struct adapter *sc)
84 {
85 
86 	return (sc->tt.tls && sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS);
87 }
88 
89 int
90 tls_tx_key(struct toepcb *toep)
91 {
92 	struct tls_ofld_info *tls_ofld = &toep->tls;
93 
94 	return (tls_ofld->tx_key_addr >= 0);
95 }
96 
97 /* Set TF_RX_QUIESCE to pause receive. */
98 static void
99 t4_set_rx_quiesce(struct toepcb *toep)
100 {
101 
102 	t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1),
103 	    V_TF_RX_QUIESCE(1), 1, CPL_COOKIE_TOM);
104 }
105 
106 /* Clear TF_RX_QUIESCE to re-enable receive. */
107 static void
108 t4_clear_rx_quiesce(struct toepcb *toep)
109 {
110 
111 	t4_set_tls_tcb_field(toep, W_TCB_T_FLAGS, V_TF_RX_QUIESCE(1), 0, 0, 0);
112 }
113 
114 /* TLS/DTLS content type  for CPL SFO */
115 static inline unsigned char
116 tls_content_type(unsigned char content_type)
117 {
118 	switch (content_type) {
119 	case CONTENT_TYPE_CCS:
120 		return CPL_TX_TLS_SFO_TYPE_CCS;
121 	case CONTENT_TYPE_ALERT:
122 		return CPL_TX_TLS_SFO_TYPE_ALERT;
123 	case CONTENT_TYPE_HANDSHAKE:
124 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
125 	case CONTENT_TYPE_APP_DATA:
126 		return CPL_TX_TLS_SFO_TYPE_DATA;
127 	default:
128 		return CPL_TX_TLS_SFO_TYPE_CUSTOM;
129 	}
130 }
131 
132 /* TLS Key memory management */
133 static void
134 clear_tls_keyid(struct toepcb *toep)
135 {
136 	struct tls_ofld_info *tls_ofld = &toep->tls;
137 	struct adapter *sc = td_adapter(toep->td);
138 
139 	if (tls_ofld->rx_key_addr >= 0) {
140 		t4_free_tls_keyid(sc, tls_ofld->rx_key_addr);
141 		tls_ofld->rx_key_addr = -1;
142 	}
143 	if (tls_ofld->tx_key_addr >= 0) {
144 		t4_free_tls_keyid(sc, tls_ofld->tx_key_addr);
145 		tls_ofld->tx_key_addr = -1;
146 	}
147 }
148 
149 static int
150 get_tp_plen_max(struct ktls_session *tls)
151 {
152 	int plen = ((min(3*4096, TP_TX_PG_SZ))/1448) * 1448;
153 
154 	return (tls->params.max_frame_len <= 8192 ? plen : FC_TP_PLEN_MAX);
155 }
156 
157 /* Send request to save the key in on-card memory. */
158 static int
159 tls_program_key_id(struct toepcb *toep, struct ktls_session *tls,
160     int direction)
161 {
162 	struct tls_ofld_info *tls_ofld = &toep->tls;
163 	struct adapter *sc = td_adapter(toep->td);
164 	int keyid;
165 	struct mbuf *m;
166 	struct tls_key_req *kwr;
167 	struct tls_keyctx *kctx;
168 
169 #ifdef INVARIANTS
170 	int kwrlen, kctxlen, len;
171 
172 	kwrlen = sizeof(*kwr);
173 	kctxlen = roundup2(sizeof(*kctx), 32);
174 	len = roundup2(kwrlen + kctxlen, 16);
175 	MPASS(TLS_KEY_WR_SZ == len);
176 #endif
177 	if (toep->txsd_avail == 0)
178 		return (EAGAIN);
179 
180 	if ((keyid = t4_alloc_tls_keyid(sc)) < 0) {
181 		return (ENOSPC);
182 	}
183 
184 	m = alloc_raw_wr_mbuf(TLS_KEY_WR_SZ);
185 	if (m == NULL) {
186 		t4_free_tls_keyid(sc, keyid);
187 		return (ENOMEM);
188 	}
189 	kwr = mtod(m, struct tls_key_req *);
190 	memset(kwr, 0, TLS_KEY_WR_SZ);
191 
192 	t4_write_tlskey_wr(tls, direction, toep->tid, F_FW_WR_COMPL, keyid,
193 	    kwr);
194 	kctx = (struct tls_keyctx *)(kwr + 1);
195 	if (direction == KTLS_TX)
196 		tls_ofld->tx_key_addr = keyid;
197 	else
198 		tls_ofld->rx_key_addr = keyid;
199 	t4_tls_key_ctx(tls, direction, kctx);
200 
201 	t4_raw_wr_tx(sc, toep, m);
202 
203 	return (0);
204 }
205 
206 int
207 tls_alloc_ktls(struct toepcb *toep, struct ktls_session *tls, int direction)
208 {
209 	struct adapter *sc = td_adapter(toep->td);
210 	int error, iv_size, mac_first;
211 
212 	if (!can_tls_offload(sc))
213 		return (EINVAL);
214 
215 	if (direction == KTLS_RX) {
216 		if (ulp_mode(toep) != ULP_MODE_NONE)
217 			return (EINVAL);
218 		if ((toep->flags & TPF_TLS_STARTING) != 0)
219 			return (EINVAL);
220 	} else {
221 		switch (ulp_mode(toep)) {
222 		case ULP_MODE_NONE:
223 		case ULP_MODE_TLS:
224 		case ULP_MODE_TCPDDP:
225 			break;
226 		default:
227 			return (EINVAL);
228 		}
229 	}
230 
231 	/* TLS 1.1 through TLS 1.3 are currently supported. */
232 	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
233 	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
234 	    tls->params.tls_vminor > TLS_MINOR_VER_THREE) {
235 		return (EPROTONOSUPPORT);
236 	}
237 
238 	/* TLS 1.3 is only supported on T7+. */
239 	if (tls->params.tls_vminor == TLS_MINOR_VER_THREE) {
240 		if (is_t6(sc)) {
241 			return (EPROTONOSUPPORT);
242 		}
243 	}
244 
245 	/* Sanity check values in *tls. */
246 	switch (tls->params.cipher_algorithm) {
247 	case CRYPTO_AES_CBC:
248 		/* XXX: Explicitly ignore any provided IV. */
249 		switch (tls->params.cipher_key_len) {
250 		case 128 / 8:
251 		case 192 / 8:
252 		case 256 / 8:
253 			break;
254 		default:
255 			return (EINVAL);
256 		}
257 		switch (tls->params.auth_algorithm) {
258 		case CRYPTO_SHA1_HMAC:
259 		case CRYPTO_SHA2_256_HMAC:
260 		case CRYPTO_SHA2_384_HMAC:
261 			break;
262 		default:
263 			return (EPROTONOSUPPORT);
264 		}
265 		iv_size = AES_BLOCK_LEN;
266 		mac_first = 1;
267 		break;
268 	case CRYPTO_AES_NIST_GCM_16:
269 		switch (tls->params.cipher_key_len) {
270 		case 128 / 8:
271 		case 192 / 8:
272 		case 256 / 8:
273 			break;
274 		default:
275 			return (EINVAL);
276 		}
277 
278 		/*
279 		 * The IV size for TLS 1.2 is the explicit IV in the
280 		 * record header.  For TLS 1.3 it is the size of the
281 		 * sequence number.
282 		 */
283 		iv_size = 8;
284 		mac_first = 0;
285 		break;
286 	default:
287 		return (EPROTONOSUPPORT);
288 	}
289 
290 	/* Bail if we already have a key. */
291 	if (direction == KTLS_TX) {
292 		if (toep->tls.tx_key_addr != -1)
293 			return (EOPNOTSUPP);
294 	} else {
295 		if (toep->tls.rx_key_addr != -1)
296 			return (EOPNOTSUPP);
297 	}
298 
299 	error = tls_program_key_id(toep, tls, direction);
300 	if (error)
301 		return (error);
302 
303 	toep->tls.tls13 = tls->params.tls_vminor == TLS_MINOR_VER_THREE;
304 	if (direction == KTLS_TX) {
305 		toep->tls.scmd0.seqno_numivs =
306 			(V_SCMD_SEQ_NO_CTRL(3) |
307 			 V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
308 			 V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
309 			 V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
310 			 V_SCMD_CIPH_MODE(t4_tls_cipher_mode(tls)) |
311 			 V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
312 			 V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
313 			 V_SCMD_IV_SIZE(iv_size / 2));
314 
315 		toep->tls.scmd0.ivgen_hdrlen =
316 			(V_SCMD_IV_GEN_CTRL(1) |
317 			 V_SCMD_KEY_CTX_INLINE(0) |
318 			 V_SCMD_TLS_FRAG_ENABLE(1));
319 
320 		toep->tls.iv_len = iv_size;
321 		toep->tls.frag_size = tls->params.max_frame_len;
322 		toep->tls.fcplenmax = get_tp_plen_max(tls);
323 		toep->tls.expn_per_ulp = tls->params.tls_hlen +
324 		    tls->params.tls_tlen;
325 		toep->tls.pdus_per_ulp = 1;
326 		toep->tls.adjusted_plen = toep->tls.expn_per_ulp +
327 		    tls->params.max_frame_len;
328 		toep->tls.tx_key_info_size = t4_tls_key_info_size(tls);
329 	} else {
330 		toep->flags |= TPF_TLS_STARTING | TPF_TLS_RX_QUIESCING;
331 		toep->tls.rx_version = tls->params.tls_vmajor << 8 |
332 		    tls->params.tls_vminor;
333 
334 		CTR2(KTR_CXGBE, "%s: tid %d setting RX_QUIESCE", __func__,
335 		    toep->tid);
336 		t4_set_rx_quiesce(toep);
337 	}
338 
339 	return (0);
340 }
341 
342 void
343 tls_init_toep(struct toepcb *toep)
344 {
345 	struct tls_ofld_info *tls_ofld = &toep->tls;
346 
347 	tls_ofld->rx_key_addr = -1;
348 	tls_ofld->tx_key_addr = -1;
349 }
350 
351 void
352 tls_uninit_toep(struct toepcb *toep)
353 {
354 
355 	clear_tls_keyid(toep);
356 }
357 
358 #define MAX_OFLD_TX_CREDITS (SGE_MAX_WR_LEN / 16)
359 #define	MIN_OFLD_TLSTX_CREDITS(toep)					\
360 	(howmany(sizeof(struct fw_tlstx_data_wr) +			\
361 	    sizeof(struct cpl_tx_tls_sfo) + sizeof(struct ulptx_idata) + \
362 	    sizeof(struct ulptx_sc_memrd) +				\
363 	    AES_BLOCK_LEN + 1, 16))
364 
365 static void
366 write_tlstx_wr(struct fw_tlstx_data_wr *txwr, struct toepcb *toep,
367     unsigned int plen, unsigned int expn, uint8_t credits, int shove,
368     int num_ivs)
369 {
370 	struct tls_ofld_info *tls_ofld = &toep->tls;
371 	unsigned int len = plen + expn;
372 
373 	txwr->op_to_immdlen = htobe32(V_WR_OP(FW_TLSTX_DATA_WR) |
374 	    V_FW_TLSTX_DATA_WR_COMPL(1) |
375 	    V_FW_TLSTX_DATA_WR_IMMDLEN(0));
376 	txwr->flowid_len16 = htobe32(V_FW_TLSTX_DATA_WR_FLOWID(toep->tid) |
377 	    V_FW_TLSTX_DATA_WR_LEN16(credits));
378 	txwr->plen = htobe32(len);
379 	txwr->lsodisable_to_flags = htobe32(V_TX_ULP_MODE(ULP_MODE_TLS) |
380 	    V_TX_URG(0) | /* F_T6_TX_FORCE | */ V_TX_SHOVE(shove));
381 	txwr->ctxloc_to_exp = htobe32(V_FW_TLSTX_DATA_WR_NUMIVS(num_ivs) |
382 	    V_FW_TLSTX_DATA_WR_EXP(expn) |
383 	    V_FW_TLSTX_DATA_WR_CTXLOC(TLS_SFO_WR_CONTEXTLOC_DDR) |
384 	    V_FW_TLSTX_DATA_WR_IVDSGL(0) |
385 	    V_FW_TLSTX_DATA_WR_KEYSIZE(tls_ofld->tx_key_info_size >> 4));
386 	txwr->mfs = htobe16(tls_ofld->frag_size);
387 	txwr->adjustedplen_pkd = htobe16(
388 	    V_FW_TLSTX_DATA_WR_ADJUSTEDPLEN(tls_ofld->adjusted_plen));
389 	txwr->expinplenmax_pkd = htobe16(
390 	    V_FW_TLSTX_DATA_WR_EXPINPLENMAX(tls_ofld->expn_per_ulp));
391 	txwr->pdusinplenmax_pkd =
392 	    V_FW_TLSTX_DATA_WR_PDUSINPLENMAX(tls_ofld->pdus_per_ulp);
393 }
394 
395 static void
396 write_tlstx_cpl(struct cpl_tx_tls_sfo *cpl, struct toepcb *toep,
397     struct tls_hdr *tls_hdr, unsigned int plen, uint8_t rec_type,
398     uint64_t seqno)
399 {
400 	struct tls_ofld_info *tls_ofld = &toep->tls;
401 	int data_type, seglen;
402 
403 	seglen = plen;
404 	data_type = tls_content_type(rec_type);
405 	cpl->op_to_seg_len = htobe32(V_CPL_TX_TLS_SFO_OPCODE(CPL_TX_TLS_SFO) |
406 	    V_CPL_TX_TLS_SFO_DATA_TYPE(data_type) |
407 	    V_CPL_TX_TLS_SFO_CPL_LEN(2) | V_CPL_TX_TLS_SFO_SEG_LEN(seglen));
408 	cpl->pld_len = htobe32(plen);
409 	if (data_type == CPL_TX_TLS_SFO_TYPE_CUSTOM)
410 		cpl->type_protover = htobe32(V_CPL_TX_TLS_SFO_TYPE(rec_type));
411 	cpl->seqno_numivs = htobe32(tls_ofld->scmd0.seqno_numivs |
412 	    V_SCMD_NUM_IVS(1));
413 	cpl->ivgen_hdrlen = htobe32(tls_ofld->scmd0.ivgen_hdrlen);
414 	cpl->scmd1 = htobe64(seqno);
415 }
416 
417 static int
418 count_ext_pgs_segs(struct mbuf *m)
419 {
420 	vm_paddr_t nextpa;
421 	u_int i, nsegs;
422 
423 	MPASS(m->m_epg_npgs > 0);
424 	nsegs = 1;
425 	nextpa = m->m_epg_pa[0] + PAGE_SIZE;
426 	for (i = 1; i < m->m_epg_npgs; i++) {
427 		if (nextpa != m->m_epg_pa[i])
428 			nsegs++;
429 		nextpa = m->m_epg_pa[i] + PAGE_SIZE;
430 	}
431 	return (nsegs);
432 }
433 
434 static void
435 write_ktlstx_sgl(void *dst, struct mbuf *m, int nsegs)
436 {
437 	struct ulptx_sgl *usgl = dst;
438 	vm_paddr_t pa;
439 	uint32_t len;
440 	int i, j;
441 
442 	KASSERT(nsegs > 0, ("%s: nsegs 0", __func__));
443 
444 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
445 	    V_ULPTX_NSGE(nsegs));
446 
447 	/* Figure out the first S/G length. */
448 	pa = m->m_epg_pa[0] + m->m_epg_1st_off;
449 	usgl->addr0 = htobe64(pa);
450 	len = m_epg_pagelen(m, 0, m->m_epg_1st_off);
451 	pa += len;
452 	for (i = 1; i < m->m_epg_npgs; i++) {
453 		if (m->m_epg_pa[i] != pa)
454 			break;
455 		len += m_epg_pagelen(m, i, 0);
456 		pa += m_epg_pagelen(m, i, 0);
457 	}
458 	usgl->len0 = htobe32(len);
459 #ifdef INVARIANTS
460 	nsegs--;
461 #endif
462 
463 	j = -1;
464 	for (; i < m->m_epg_npgs; i++) {
465 		if (j == -1 || m->m_epg_pa[i] != pa) {
466 			if (j >= 0)
467 				usgl->sge[j / 2].len[j & 1] = htobe32(len);
468 			j++;
469 #ifdef INVARIANTS
470 			nsegs--;
471 #endif
472 			pa = m->m_epg_pa[i];
473 			usgl->sge[j / 2].addr[j & 1] = htobe64(pa);
474 			len = m_epg_pagelen(m, i, 0);
475 			pa += len;
476 		} else {
477 			len += m_epg_pagelen(m, i, 0);
478 			pa += m_epg_pagelen(m, i, 0);
479 		}
480 	}
481 	if (j >= 0) {
482 		usgl->sge[j / 2].len[j & 1] = htobe32(len);
483 
484 		if ((j & 1) == 0)
485 			usgl->sge[j / 2].len[1] = htobe32(0);
486 	}
487 	KASSERT(nsegs == 0, ("%s: nsegs %d, m %p", __func__, nsegs, m));
488 }
489 
490 /*
491  * Similar to t4_push_frames() but handles sockets that contain TLS
492  * record mbufs.
493  */
494 void
495 t4_push_ktls(struct adapter *sc, struct toepcb *toep, int drop)
496 {
497 	struct tls_hdr *thdr;
498 	struct fw_tlstx_data_wr *txwr;
499 	struct cpl_tx_tls_sfo *cpl;
500 	struct ulptx_idata *idata;
501 	struct ulptx_sc_memrd *memrd;
502 	struct wrqe *wr;
503 	struct mbuf *m;
504 	u_int nsegs, credits, wr_len;
505 	u_int expn_size;
506 	struct inpcb *inp = toep->inp;
507 	struct tcpcb *tp = intotcpcb(inp);
508 	struct socket *so = inp->inp_socket;
509 	struct sockbuf *sb = &so->so_snd;
510 	struct mbufq *pduq = &toep->ulp_pduq;
511 	int tls_size, tx_credits, shove, sowwakeup;
512 	struct ofld_tx_sdesc *txsd;
513 	char *buf;
514 	bool tls13;
515 
516 	INP_WLOCK_ASSERT(inp);
517 	KASSERT(toep->flags & TPF_FLOWC_WR_SENT,
518 	    ("%s: flowc_wr not sent for tid %u.", __func__, toep->tid));
519 
520 	KASSERT(ulp_mode(toep) == ULP_MODE_NONE ||
521 	    ulp_mode(toep) == ULP_MODE_TCPDDP || ulp_mode(toep) == ULP_MODE_TLS,
522 	    ("%s: ulp_mode %u for toep %p", __func__, ulp_mode(toep), toep));
523 	KASSERT(tls_tx_key(toep),
524 	    ("%s: TX key not set for toep %p", __func__, toep));
525 
526 #ifdef VERBOSE_TRACES
527 	CTR4(KTR_CXGBE, "%s: tid %d toep flags %#x tp flags %#x drop %d",
528 	    __func__, toep->tid, toep->flags, tp->t_flags);
529 #endif
530 	if (__predict_false(toep->flags & TPF_ABORT_SHUTDOWN))
531 		return;
532 
533 #ifdef RATELIMIT
534 	if (__predict_false(inp->inp_flags2 & INP_RATE_LIMIT_CHANGED) &&
535 	    (update_tx_rate_limit(sc, toep, so->so_max_pacing_rate) == 0)) {
536 		inp->inp_flags2 &= ~INP_RATE_LIMIT_CHANGED;
537 	}
538 #endif
539 
540 	/*
541 	 * This function doesn't resume by itself.  Someone else must clear the
542 	 * flag and call this function.
543 	 */
544 	if (__predict_false(toep->flags & TPF_TX_SUSPENDED)) {
545 		KASSERT(drop == 0,
546 		    ("%s: drop (%d) != 0 but tx is suspended", __func__, drop));
547 		return;
548 	}
549 
550 	tls13 = toep->tls.tls13;
551 	txsd = &toep->txsd[toep->txsd_pidx];
552 	for (;;) {
553 		tx_credits = min(toep->tx_credits, MAX_OFLD_TX_CREDITS);
554 
555 		if (__predict_false((m = mbufq_first(pduq)) != NULL)) {
556 			if (!t4_push_raw_wr(sc, toep, m)) {
557 				toep->flags |= TPF_TX_SUSPENDED;
558 				return;
559 			}
560 
561 			(void)mbufq_dequeue(pduq);
562 
563 			txsd = &toep->txsd[toep->txsd_pidx];
564 			continue;
565 		}
566 
567 		SOCKBUF_LOCK(sb);
568 		sowwakeup = drop;
569 		if (drop) {
570 			sbdrop_locked(sb, drop);
571 			drop = 0;
572 		}
573 
574 		m = sb->sb_sndptr != NULL ? sb->sb_sndptr->m_next : sb->sb_mb;
575 
576 		/*
577 		 * Send a FIN if requested, but only if there's no
578 		 * more data to send.
579 		 */
580 		if (m == NULL && toep->flags & TPF_SEND_FIN) {
581 			if (sowwakeup)
582 				sowwakeup_locked(so);
583 			else
584 				SOCKBUF_UNLOCK(sb);
585 			SOCKBUF_UNLOCK_ASSERT(sb);
586 			t4_close_conn(sc, toep);
587 			return;
588 		}
589 
590 		/*
591 		 * If there is no ready data to send, wait until more
592 		 * data arrives.
593 		 */
594 		if (m == NULL || (m->m_flags & M_NOTREADY) != 0) {
595 			if (sowwakeup)
596 				sowwakeup_locked(so);
597 			else
598 				SOCKBUF_UNLOCK(sb);
599 			SOCKBUF_UNLOCK_ASSERT(sb);
600 #ifdef VERBOSE_TRACES
601 			CTR2(KTR_CXGBE, "%s: tid %d no ready data to send",
602 			    __func__, toep->tid);
603 #endif
604 			return;
605 		}
606 
607 		KASSERT(m->m_flags & M_EXTPG, ("%s: mbuf %p is not NOMAP",
608 		    __func__, m));
609 		KASSERT(m->m_epg_tls != NULL,
610 		    ("%s: mbuf %p doesn't have TLS session", __func__, m));
611 
612 		/* Calculate WR length. */
613 		wr_len = sizeof(struct fw_tlstx_data_wr) +
614 		    sizeof(struct cpl_tx_tls_sfo) +
615 		    sizeof(struct ulptx_idata) + sizeof(struct ulptx_sc_memrd);
616 
617 		if (!tls13) {
618 			/* Explicit IVs for AES-CBC and AES-GCM are <= 16. */
619 			MPASS(toep->tls.iv_len <= AES_BLOCK_LEN);
620 			wr_len += AES_BLOCK_LEN;
621 		}
622 
623 		/* Account for SGL in work request length. */
624 		nsegs = count_ext_pgs_segs(m);
625 		wr_len += sizeof(struct ulptx_sgl) +
626 		    ((3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1)) * 8;
627 
628 		/* Not enough credits for this work request. */
629 		if (howmany(wr_len, 16) > tx_credits) {
630 			if (sowwakeup)
631 				sowwakeup_locked(so);
632 			else
633 				SOCKBUF_UNLOCK(sb);
634 			SOCKBUF_UNLOCK_ASSERT(sb);
635 #ifdef VERBOSE_TRACES
636 			CTR5(KTR_CXGBE,
637 	    "%s: tid %d mbuf %p requires %d credits, but only %d available",
638 			    __func__, toep->tid, m, howmany(wr_len, 16),
639 			    tx_credits);
640 #endif
641 			toep->flags |= TPF_TX_SUSPENDED;
642 			return;
643 		}
644 
645 		/* Shove if there is no additional data pending. */
646 		shove = ((m->m_next == NULL ||
647 		    (m->m_next->m_flags & M_NOTREADY) != 0)) &&
648 		    (tp->t_flags & TF_MORETOCOME) == 0;
649 
650 		if (sb->sb_flags & SB_AUTOSIZE &&
651 		    V_tcp_do_autosndbuf &&
652 		    sb->sb_hiwat < V_tcp_autosndbuf_max &&
653 		    sbused(sb) >= sb->sb_hiwat * 7 / 8) {
654 			int newsize = min(sb->sb_hiwat + V_tcp_autosndbuf_inc,
655 			    V_tcp_autosndbuf_max);
656 
657 			if (!sbreserve_locked(so, SO_SND, newsize, NULL))
658 				sb->sb_flags &= ~SB_AUTOSIZE;
659 			else
660 				sowwakeup = 1;	/* room available */
661 		}
662 		if (sowwakeup)
663 			sowwakeup_locked(so);
664 		else
665 			SOCKBUF_UNLOCK(sb);
666 		SOCKBUF_UNLOCK_ASSERT(sb);
667 
668 		if (__predict_false(toep->flags & TPF_FIN_SENT))
669 			panic("%s: excess tx.", __func__);
670 
671 		wr = alloc_wrqe(roundup2(wr_len, 16), &toep->ofld_txq->wrq);
672 		if (wr == NULL) {
673 			/* XXX: how will we recover from this? */
674 			toep->flags |= TPF_TX_SUSPENDED;
675 			return;
676 		}
677 
678 		thdr = (struct tls_hdr *)&m->m_epg_hdr;
679 #ifdef VERBOSE_TRACES
680 		CTR5(KTR_CXGBE, "%s: tid %d TLS record %ju type %d len %#x",
681 		    __func__, toep->tid, m->m_epg_seqno, thdr->type,
682 		    m->m_len);
683 #endif
684 		txwr = wrtod(wr);
685 		cpl = (struct cpl_tx_tls_sfo *)(txwr + 1);
686 		memset(txwr, 0, roundup2(wr_len, 16));
687 		credits = howmany(wr_len, 16);
688 		expn_size = m->m_epg_hdrlen +
689 		    m->m_epg_trllen;
690 		tls_size = m->m_len - expn_size;
691 		write_tlstx_wr(txwr, toep, tls_size, expn_size, credits, shove,
692 		    tls13 ? 0 : 1);
693 		write_tlstx_cpl(cpl, toep, thdr, tls_size,
694 		    tls13 ? m->m_epg_record_type : thdr->type, m->m_epg_seqno);
695 
696 		idata = (struct ulptx_idata *)(cpl + 1);
697 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP));
698 		idata->len = htobe32(0);
699 		memrd = (struct ulptx_sc_memrd *)(idata + 1);
700 		memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
701 		    V_ULP_TX_SC_MORE(1) |
702 		    V_ULPTX_LEN16(toep->tls.tx_key_info_size >> 4));
703 		memrd->addr = htobe32(toep->tls.tx_key_addr >> 5);
704 
705 		buf = (char *)(memrd + 1);
706 		if (!tls13) {
707 			/* Copy IV. */
708 			memcpy(buf, thdr + 1, toep->tls.iv_len);
709 			buf += AES_BLOCK_LEN;
710 		}
711 
712 		write_ktlstx_sgl(buf, m, nsegs);
713 
714 		KASSERT(toep->tx_credits >= credits,
715 			("%s: not enough credits", __func__));
716 
717 		toep->tx_credits -= credits;
718 
719 		tp->snd_nxt += m->m_len;
720 		tp->snd_max += m->m_len;
721 
722 		SOCKBUF_LOCK(sb);
723 		sb->sb_sndptr = m;
724 		SOCKBUF_UNLOCK(sb);
725 
726 		toep->flags |= TPF_TX_DATA_SENT;
727 		if (toep->tx_credits < MIN_OFLD_TLSTX_CREDITS(toep))
728 			toep->flags |= TPF_TX_SUSPENDED;
729 
730 		KASSERT(toep->txsd_avail > 0, ("%s: no txsd", __func__));
731 		KASSERT(m->m_len <= MAX_OFLD_TX_SDESC_PLEN,
732 		    ("%s: plen %u too large", __func__, m->m_len));
733 		txsd->plen = m->m_len;
734 		txsd->tx_credits = credits;
735 		txsd++;
736 		if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) {
737 			toep->txsd_pidx = 0;
738 			txsd = &toep->txsd[0];
739 		}
740 		toep->txsd_avail--;
741 
742 		counter_u64_add(toep->ofld_txq->tx_toe_tls_records, 1);
743 		counter_u64_add(toep->ofld_txq->tx_toe_tls_octets, m->m_len);
744 
745 		t4_l2t_send(sc, wr, toep->l2te);
746 	}
747 }
748 
749 /*
750  * For TLS data we place received mbufs received via CPL_TLS_DATA into
751  * an mbufq in the TLS offload state.  When CPL_RX_TLS_CMP is
752  * received, the completed PDUs are placed into the socket receive
753  * buffer.
754  *
755  * The TLS code reuses the ulp_pdu_reclaimq to hold the pending mbufs.
756  */
757 static int
758 do_tls_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
759 {
760 	struct adapter *sc = iq->adapter;
761 	const struct cpl_tls_data *cpl = mtod(m, const void *);
762 	unsigned int tid = GET_TID(cpl);
763 	struct toepcb *toep = lookup_tid(sc, tid);
764 	struct inpcb *inp = toep->inp;
765 	struct tcpcb *tp;
766 	int len;
767 
768 	/* XXX: Should this match do_rx_data instead? */
769 	KASSERT(!(toep->flags & TPF_SYNQE),
770 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
771 
772 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
773 
774 	/* strip off CPL header */
775 	m_adj(m, sizeof(*cpl));
776 	len = m->m_pkthdr.len;
777 
778 	toep->ofld_rxq->rx_toe_tls_octets += len;
779 
780 	KASSERT(len == G_CPL_TLS_DATA_LENGTH(be32toh(cpl->length_pkd)),
781 	    ("%s: payload length mismatch", __func__));
782 
783 	INP_WLOCK(inp);
784 	if (inp->inp_flags & INP_DROPPED) {
785 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
786 		    __func__, tid, len, inp->inp_flags);
787 		INP_WUNLOCK(inp);
788 		m_freem(m);
789 		return (0);
790 	}
791 
792 	/* Save TCP sequence number. */
793 	m->m_pkthdr.tls_tcp_seq = be32toh(cpl->seq);
794 
795 	if (mbufq_enqueue(&toep->ulp_pdu_reclaimq, m)) {
796 #ifdef INVARIANTS
797 		panic("Failed to queue TLS data packet");
798 #else
799 		printf("%s: Failed to queue TLS data packet\n", __func__);
800 		INP_WUNLOCK(inp);
801 		m_freem(m);
802 		return (0);
803 #endif
804 	}
805 
806 	tp = intotcpcb(inp);
807 	tp->t_rcvtime = ticks;
808 
809 #ifdef VERBOSE_TRACES
810 	CTR4(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__, tid, len,
811 	    be32toh(cpl->seq));
812 #endif
813 
814 	INP_WUNLOCK(inp);
815 	return (0);
816 }
817 
818 static int
819 do_rx_tls_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
820 {
821 	struct adapter *sc = iq->adapter;
822 	const struct cpl_rx_tls_cmp *cpl = mtod(m, const void *);
823 	struct tlsrx_hdr_pkt *tls_hdr_pkt;
824 	unsigned int tid = GET_TID(cpl);
825 	struct toepcb *toep = lookup_tid(sc, tid);
826 	struct inpcb *inp = toep->inp;
827 	struct tcpcb *tp;
828 	struct socket *so;
829 	struct sockbuf *sb;
830 	struct mbuf *tls_data;
831 	struct tls_get_record *tgr;
832 	struct mbuf *control, *n;
833 	int pdu_length, resid, trailer_len;
834 #if defined(KTR) || defined(INVARIANTS)
835 	int len;
836 #endif
837 
838 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
839 	KASSERT(!(toep->flags & TPF_SYNQE),
840 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
841 
842 	/* strip off CPL header */
843 	m_adj(m, sizeof(*cpl));
844 #if defined(KTR) || defined(INVARIANTS)
845 	len = m->m_pkthdr.len;
846 #endif
847 
848 	toep->ofld_rxq->rx_toe_tls_records++;
849 
850 	KASSERT(len == G_CPL_RX_TLS_CMP_LENGTH(be32toh(cpl->pdulength_length)),
851 	    ("%s: payload length mismatch", __func__));
852 
853 	INP_WLOCK(inp);
854 	if (inp->inp_flags & INP_DROPPED) {
855 		CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
856 		    __func__, tid, len, inp->inp_flags);
857 		INP_WUNLOCK(inp);
858 		m_freem(m);
859 		return (0);
860 	}
861 
862 	pdu_length = G_CPL_RX_TLS_CMP_PDULENGTH(be32toh(cpl->pdulength_length));
863 
864 	so = inp_inpcbtosocket(inp);
865 	tp = intotcpcb(inp);
866 
867 #ifdef VERBOSE_TRACES
868 	CTR6(KTR_CXGBE, "%s: tid %u PDU len %d len %d seq %u, rcv_nxt %u",
869 	    __func__, tid, pdu_length, len, be32toh(cpl->seq), tp->rcv_nxt);
870 #endif
871 
872 	tp->rcv_nxt += pdu_length;
873 	KASSERT(tp->rcv_wnd >= pdu_length,
874 	    ("%s: negative window size", __func__));
875 	tp->rcv_wnd -= pdu_length;
876 
877 	/* XXX: Not sure what to do about urgent data. */
878 
879 	/*
880 	 * The payload of this CPL is the TLS header followed by
881 	 * additional fields.  For TLS 1.3 the type field holds the
882 	 * inner record type and the length field has been updated to
883 	 * strip the inner record type, padding, and MAC.
884 	 */
885 	KASSERT(m->m_len >= sizeof(*tls_hdr_pkt),
886 	    ("%s: payload too small", __func__));
887 	tls_hdr_pkt = mtod(m, void *);
888 
889 	tls_data = mbufq_dequeue(&toep->ulp_pdu_reclaimq);
890 	if (tls_data != NULL) {
891 		KASSERT(be32toh(cpl->seq) == tls_data->m_pkthdr.tls_tcp_seq,
892 		    ("%s: sequence mismatch", __func__));
893 	}
894 
895 	/*
896 	 * Report decryption errors as EBADMSG.
897 	 *
898 	 * XXX: To support rekeying for TLS 1.3 this will eventually
899 	 * have to be updated to recrypt the data with the old key and
900 	 * then decrypt with the new key.  Punt for now as KTLS
901 	 * doesn't yet support rekeying.
902 	 */
903 	if ((tls_hdr_pkt->res_to_mac_error & M_TLSRX_HDR_PKT_ERROR) != 0) {
904 		CTR4(KTR_CXGBE, "%s: tid %u TLS error %#x ddp_vld %#x",
905 		    __func__, toep->tid, tls_hdr_pkt->res_to_mac_error,
906 		    be32toh(cpl->ddp_valid));
907 		m_freem(m);
908 		m_freem(tls_data);
909 
910 		CURVNET_SET(toep->vnet);
911 		so->so_error = EBADMSG;
912 		sorwakeup(so);
913 
914 		INP_WUNLOCK(inp);
915 		CURVNET_RESTORE();
916 
917 		return (0);
918 	}
919 
920 	/* For TLS 1.3 trim the header and trailer. */
921 	if (toep->tls.tls13) {
922 		KASSERT(tls_data != NULL, ("%s: TLS 1.3 record without data",
923 		    __func__));
924 		MPASS(tls_data->m_pkthdr.len == pdu_length);
925 		m_adj(tls_data, sizeof(struct tls_record_layer));
926 		if (tls_data->m_pkthdr.len > be16toh(tls_hdr_pkt->length))
927 			tls_data->m_pkthdr.len = be16toh(tls_hdr_pkt->length);
928 		resid = tls_data->m_pkthdr.len;
929 		if (resid == 0) {
930 			m_freem(tls_data);
931 			tls_data = NULL;
932 		} else {
933 			for (n = tls_data;; n = n->m_next) {
934 				if (n->m_len < resid) {
935 					resid -= n->m_len;
936 					continue;
937 				}
938 
939 				n->m_len = resid;
940 				m_freem(n->m_next);
941 				n->m_next = NULL;
942 				break;
943 			}
944 		}
945 	}
946 
947 	/* Handle data received after the socket is closed. */
948 	sb = &so->so_rcv;
949 	SOCKBUF_LOCK(sb);
950 	if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
951 		struct epoch_tracker et;
952 
953 		CTR3(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes)",
954 		    __func__, tid, pdu_length);
955 		m_freem(m);
956 		m_freem(tls_data);
957 		SOCKBUF_UNLOCK(sb);
958 		INP_WUNLOCK(inp);
959 
960 		CURVNET_SET(toep->vnet);
961 		NET_EPOCH_ENTER(et);
962 		INP_WLOCK(inp);
963 		tp = tcp_drop(tp, ECONNRESET);
964 		if (tp != NULL)
965 			INP_WUNLOCK(inp);
966 		NET_EPOCH_EXIT(et);
967 		CURVNET_RESTORE();
968 
969 		return (0);
970 	}
971 
972 	/*
973 	 * If there is any data in the 'sb_mtls' chain of the socket
974 	 * or we aren't able to allocate the control mbuf, append the
975 	 * record as a CSUM_TLS_DECRYPTED packet to 'sb_mtls' rather
976 	 * than as a decrypted record to 'sb_m'.
977 	 */
978 	if (sb->sb_mtls != NULL)
979 		control = NULL;
980 	else
981 		control = sbcreatecontrol(NULL, sizeof(*tgr), TLS_GET_RECORD,
982 		    IPPROTO_TCP, M_NOWAIT);
983 
984 	if (control != NULL) {
985 		tgr = (struct tls_get_record *)
986 		    CMSG_DATA(mtod(control, struct cmsghdr *));
987 		memset(tgr, 0, sizeof(*tgr));
988 		tgr->tls_type = tls_hdr_pkt->type;
989 		tgr->tls_vmajor = be16toh(tls_hdr_pkt->version) >> 8;
990 		tgr->tls_vminor = be16toh(tls_hdr_pkt->version) & 0xff;
991 		if (tls_data != NULL) {
992 			m_last(tls_data)->m_flags |= M_EOR;
993 			tgr->tls_length = htobe16(tls_data->m_pkthdr.len);
994 		} else
995 			tgr->tls_length = 0;
996 
997 		m_freem(m);
998 		m = tls_data;
999 	} else {
1000 		M_ASSERTPKTHDR(m);
1001 
1002 		/* It's ok that any explicit IV is missing. */
1003 		m->m_len = sb->sb_tls_info->params.tls_hlen;
1004 		m->m_pkthdr.csum_flags |= CSUM_TLS_DECRYPTED;
1005 		m->m_pkthdr.len = m->m_len;
1006 		if (tls_data != NULL) {
1007 			m->m_pkthdr.len += tls_data->m_pkthdr.len;
1008 			m_demote_pkthdr(tls_data);
1009 			m->m_next = tls_data;
1010 		}
1011 
1012 		/*
1013 		 * Grow the chain by the trailer, but without
1014 		 * contents.  The trailer will be thrown away by
1015 		 * ktls_decrypt.  Note that ktls_decrypt assumes the
1016 		 * trailer is tls_tlen bytes long, so append that many
1017 		 * bytes not the actual trailer size computed from
1018 		 * pdu_length.
1019 		 */
1020 		trailer_len = sb->sb_tls_info->params.tls_tlen;
1021 		if (tls_data != NULL) {
1022 			m_last(tls_data)->m_len += trailer_len;
1023 			tls_data = NULL;
1024 		} else
1025 			m->m_len += trailer_len;
1026 		m->m_pkthdr.len += trailer_len;
1027 		tls_hdr_pkt->length = htobe16(m->m_pkthdr.len -
1028 		    sizeof(struct tls_record_layer));
1029 	}
1030 
1031 	/* receive buffer autosize */
1032 	MPASS(toep->vnet == so->so_vnet);
1033 	CURVNET_SET(toep->vnet);
1034 	if (sb->sb_flags & SB_AUTOSIZE &&
1035 	    V_tcp_do_autorcvbuf &&
1036 	    sb->sb_hiwat < V_tcp_autorcvbuf_max &&
1037 	    m->m_pkthdr.len > (sbspace(sb) / 8 * 7)) {
1038 		unsigned int hiwat = sb->sb_hiwat;
1039 		unsigned int newsize = min(hiwat + sc->tt.autorcvbuf_inc,
1040 		    V_tcp_autorcvbuf_max);
1041 
1042 		if (!sbreserve_locked(so, SO_RCV, newsize, NULL))
1043 			sb->sb_flags &= ~SB_AUTOSIZE;
1044 	}
1045 
1046 	if (control != NULL)
1047 		sbappendcontrol_locked(sb, m, control, 0);
1048 	else
1049 		sbappendstream_locked(sb, m, 0);
1050 	t4_rcvd_locked(&toep->td->tod, tp);
1051 
1052 	sorwakeup_locked(so);
1053 	SOCKBUF_UNLOCK_ASSERT(sb);
1054 
1055 	INP_WUNLOCK(inp);
1056 	CURVNET_RESTORE();
1057 	return (0);
1058 }
1059 
1060 void
1061 do_rx_data_tls(const struct cpl_rx_data *cpl, struct toepcb *toep,
1062     struct mbuf *m)
1063 {
1064 	struct inpcb *inp = toep->inp;
1065 	struct tls_ofld_info *tls_ofld = &toep->tls;
1066 	struct tls_hdr *hdr;
1067 	struct tcpcb *tp;
1068 	struct socket *so;
1069 	struct sockbuf *sb;
1070 	int len;
1071 
1072 	len = m->m_pkthdr.len;
1073 
1074 	INP_WLOCK_ASSERT(inp);
1075 
1076 	so = inp_inpcbtosocket(inp);
1077 	tp = intotcpcb(inp);
1078 	sb = &so->so_rcv;
1079 	SOCKBUF_LOCK(sb);
1080 	CURVNET_SET(toep->vnet);
1081 
1082 	tp->rcv_nxt += len;
1083 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
1084 	tp->rcv_wnd -= len;
1085 
1086 	/* Do we have a full TLS header? */
1087 	if (len < sizeof(*hdr)) {
1088 		CTR3(KTR_CXGBE, "%s: tid %u len %d: too short for a TLS header",
1089 		    __func__, toep->tid, len);
1090 		so->so_error = EMSGSIZE;
1091 		goto out;
1092 	}
1093 	hdr = mtod(m, struct tls_hdr *);
1094 
1095 	/* Is the header valid? */
1096 	if (be16toh(hdr->version) != tls_ofld->rx_version) {
1097 		CTR3(KTR_CXGBE, "%s: tid %u invalid version %04x",
1098 		    __func__, toep->tid, be16toh(hdr->version));
1099 		so->so_error = EINVAL;
1100 		goto out;
1101 	}
1102 	if (be16toh(hdr->length) < sizeof(*hdr)) {
1103 		CTR3(KTR_CXGBE, "%s: tid %u invalid length %u",
1104 		    __func__, toep->tid, be16toh(hdr->length));
1105 		so->so_error = EBADMSG;
1106 		goto out;
1107 	}
1108 
1109 	/* Did we get a truncated record? */
1110 	if (len < be16toh(hdr->length)) {
1111 		CTR4(KTR_CXGBE, "%s: tid %u truncated TLS record (%d vs %u)",
1112 		    __func__, toep->tid, len, be16toh(hdr->length));
1113 
1114 		so->so_error = EMSGSIZE;
1115 		goto out;
1116 	}
1117 
1118 	/* Is the header type unknown? */
1119 	switch (hdr->type) {
1120 	case CONTENT_TYPE_CCS:
1121 	case CONTENT_TYPE_ALERT:
1122 	case CONTENT_TYPE_APP_DATA:
1123 	case CONTENT_TYPE_HANDSHAKE:
1124 		break;
1125 	default:
1126 		CTR3(KTR_CXGBE, "%s: tid %u invalid TLS record type %u",
1127 		    __func__, toep->tid, hdr->type);
1128 		so->so_error = EBADMSG;
1129 		goto out;
1130 	}
1131 
1132 	/*
1133 	 * Just punt.  Although this could fall back to software
1134 	 * decryption, this case should never really happen.
1135 	 */
1136 	CTR4(KTR_CXGBE, "%s: tid %u dropping TLS record type %u, length %u",
1137 	    __func__, toep->tid, hdr->type, be16toh(hdr->length));
1138 	so->so_error = EBADMSG;
1139 
1140 out:
1141 	sorwakeup_locked(so);
1142 	SOCKBUF_UNLOCK_ASSERT(sb);
1143 
1144 	INP_WUNLOCK(inp);
1145 	CURVNET_RESTORE();
1146 
1147 	m_freem(m);
1148 }
1149 
1150 /*
1151  * Send a work request setting one or more TCB fields to partially or
1152  * fully enable ULP_MODE_TLS.
1153  *
1154  * - If resid == 0, the socket buffer ends at a record boundary
1155  *   (either empty or contains one or more complete records).  Switch
1156  *   to ULP_MODE_TLS (if not already) and enable TLS decryption.
1157  *
1158  * - If resid != 0, the socket buffer contains a partial record.  In
1159  *   this case, switch to ULP_MODE_TLS partially and configure the TCB
1160  *   to pass along the remaining resid bytes undecrypted.  Once they
1161  *   arrive, this is called again with resid == 0 and enables TLS
1162  *   decryption.
1163  */
1164 static void
1165 tls_update_tcb(struct adapter *sc, struct toepcb *toep, uint64_t seqno,
1166     size_t resid)
1167 {
1168 	struct mbuf *m;
1169 	struct work_request_hdr *wrh;
1170 	struct ulp_txpkt *ulpmc;
1171 	int fields, key_offset, len;
1172 
1173 	/*
1174 	 * If we are already in ULP_MODE_TLS, then we should now be at
1175 	 * a record boundary and ready to finish enabling TLS RX.
1176 	 */
1177 	KASSERT(resid == 0 || ulp_mode(toep) == ULP_MODE_NONE,
1178 	    ("%s: tid %d needs %zu more data but already ULP_MODE_TLS",
1179 	    __func__, toep->tid, resid));
1180 
1181 	fields = 0;
1182 	if (ulp_mode(toep) == ULP_MODE_NONE) {
1183 		/* 2 writes for the overlay region */
1184 		fields += 2;
1185 	}
1186 
1187 	if (resid == 0) {
1188 		/* W_TCB_TLS_SEQ */
1189 		fields++;
1190 
1191 		/* W_TCB_ULP_RAW */
1192 		fields++;
1193 	} else {
1194 		/* W_TCB_PDU_LEN */
1195 		fields++;
1196 
1197 		/* W_TCB_ULP_RAW */
1198 		fields++;
1199 	}
1200 
1201 	if (ulp_mode(toep) == ULP_MODE_NONE) {
1202 		/* W_TCB_ULP_TYPE */
1203 		fields ++;
1204 	}
1205 
1206 	/* W_TCB_T_FLAGS */
1207 	fields++;
1208 
1209 	len = sizeof(*wrh) + fields * roundup2(LEN__SET_TCB_FIELD_ULP, 16);
1210 	KASSERT(len <= SGE_MAX_WR_LEN,
1211 	    ("%s: WR with %d TCB field updates too large", __func__, fields));
1212 
1213 	m = alloc_raw_wr_mbuf(len);
1214 	if (m == NULL) {
1215 		/* XXX */
1216 		panic("%s: out of memory", __func__);
1217 	}
1218 
1219 	wrh = mtod(m, struct work_request_hdr *);
1220 	INIT_ULPTX_WRH(wrh, len, 1, toep->tid);	/* atomic */
1221 	ulpmc = (struct ulp_txpkt *)(wrh + 1);
1222 
1223 	if (ulp_mode(toep) == ULP_MODE_NONE) {
1224 		/*
1225 		 * Clear the TLS overlay region: 1023:832.
1226 		 *
1227 		 * Words 26/27 are always set to zero.  Words 28/29
1228 		 * contain seqno and are set when enabling TLS
1229 		 * decryption.  Word 30 is zero and Word 31 contains
1230 		 * the keyid.
1231 		 */
1232 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 26,
1233 		    0xffffffffffffffff, 0);
1234 
1235 		/*
1236 		 * RX key tags are an index into the key portion of MA
1237 		 * memory stored as an offset from the base address in
1238 		 * units of 64 bytes.
1239 		 */
1240 		key_offset = toep->tls.rx_key_addr - sc->vres.key.start;
1241 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, 30,
1242 		    0xffffffffffffffff,
1243 		    (uint64_t)V_TCB_RX_TLS_KEY_TAG(key_offset / 64) << 32);
1244 	}
1245 
1246 	if (resid == 0) {
1247 		/*
1248 		 * The socket buffer is empty or only contains
1249 		 * complete TLS records: Set the sequence number and
1250 		 * enable TLS decryption.
1251 		 */
1252 		CTR3(KTR_CXGBE, "%s: tid %d enable TLS seqno %lu", __func__,
1253 		    toep->tid, seqno);
1254 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
1255 		    W_TCB_RX_TLS_SEQ, V_TCB_RX_TLS_SEQ(M_TCB_RX_TLS_SEQ),
1256 		    V_TCB_RX_TLS_SEQ(seqno));
1257 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
1258 		    W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW),
1259 		    V_TCB_ULP_RAW((V_TF_TLS_KEY_SIZE(3) | V_TF_TLS_CONTROL(1) |
1260 		    V_TF_TLS_ACTIVE(1) | V_TF_TLS_ENABLE(1))));
1261 
1262 		toep->flags &= ~TPF_TLS_STARTING;
1263 		toep->flags |= TPF_TLS_RECEIVE;
1264 	} else {
1265 		/*
1266 		 * The socket buffer ends with a partial record with a
1267 		 * full header and needs at least 6 bytes.
1268 		 *
1269 		 * Set PDU length.  This is treating the 'resid' bytes
1270 		 * as a TLS PDU, so the first 5 bytes are a fake
1271 		 * header and the rest are the PDU length.
1272 		 */
1273 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
1274 		    W_TCB_PDU_LEN, V_TCB_PDU_LEN(M_TCB_PDU_LEN),
1275 		    V_TCB_PDU_LEN(resid - sizeof(struct tls_hdr)));
1276 		CTR3(KTR_CXGBE, "%s: tid %d setting PDU_LEN to %zu",
1277 		    __func__, toep->tid, resid - sizeof(struct tls_hdr));
1278 
1279 		/* Clear all bits in ULP_RAW except for ENABLE. */
1280 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
1281 		    W_TCB_ULP_RAW, V_TCB_ULP_RAW(M_TCB_ULP_RAW),
1282 		    V_TCB_ULP_RAW(V_TF_TLS_ENABLE(1)));
1283 
1284 		/* Wait for 'resid' bytes to be delivered as CPL_RX_DATA. */
1285 		toep->tls.rx_resid = resid;
1286 	}
1287 
1288 	if (ulp_mode(toep) == ULP_MODE_NONE) {
1289 		/* Set the ULP mode to ULP_MODE_TLS. */
1290 		toep->params.ulp_mode = ULP_MODE_TLS;
1291 		ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid,
1292 		    W_TCB_ULP_TYPE, V_TCB_ULP_TYPE(M_TCB_ULP_TYPE),
1293 		    V_TCB_ULP_TYPE(ULP_MODE_TLS));
1294 	}
1295 
1296 	/* Clear TF_RX_QUIESCE. */
1297 	ulpmc = mk_set_tcb_field_ulp(sc, ulpmc, toep->tid, W_TCB_T_FLAGS,
1298 	    V_TF_RX_QUIESCE(1), 0);
1299 
1300 	t4_raw_wr_tx(sc, toep, m);
1301 }
1302 
1303 /*
1304  * Examine the pending data in the socket buffer and either enable TLS
1305  * RX or request more encrypted data.
1306  */
1307 static void
1308 tls_check_rx_sockbuf(struct adapter *sc, struct toepcb *toep,
1309     struct sockbuf *sb)
1310 {
1311 	uint64_t seqno;
1312 	size_t resid;
1313 	bool have_header;
1314 
1315 	SOCKBUF_LOCK_ASSERT(sb);
1316 	MPASS(toep->tls.rx_resid == 0);
1317 
1318 	have_header = ktls_pending_rx_info(sb, &seqno, &resid);
1319 	CTR5(KTR_CXGBE, "%s: tid %d have_header %d seqno %lu resid %zu",
1320 	    __func__, toep->tid, have_header, seqno, resid);
1321 
1322 	/*
1323 	 * If we have a partial header or we need fewer bytes than the
1324 	 * size of a TLS record, re-enable receive and pause again once
1325 	 * we get more data to try again.
1326 	 */
1327 	if (!have_header || (resid != 0 && (resid < sizeof(struct tls_hdr) ||
1328 	    is_t6(sc)))) {
1329 		CTR(KTR_CXGBE, "%s: tid %d waiting for more data", __func__,
1330 		    toep->tid);
1331 		toep->flags &= ~TPF_TLS_RX_QUIESCED;
1332 		t4_clear_rx_quiesce(toep);
1333 		return;
1334 	}
1335 
1336 	tls_update_tcb(sc, toep, seqno, resid);
1337 }
1338 
1339 void
1340 tls_received_starting_data(struct adapter *sc, struct toepcb *toep,
1341     struct sockbuf *sb, int len)
1342 {
1343 	MPASS(toep->flags & TPF_TLS_STARTING);
1344 
1345 	/* Data was received before quiescing took effect. */
1346 	if ((toep->flags & TPF_TLS_RX_QUIESCING) != 0)
1347 		return;
1348 
1349 	/*
1350 	 * A previous call to tls_check_rx_sockbuf needed more data.
1351 	 * Now that more data has arrived, quiesce receive again and
1352 	 * check the state once the quiesce has completed.
1353 	 */
1354 	if ((toep->flags & TPF_TLS_RX_QUIESCED) == 0) {
1355 		CTR(KTR_CXGBE, "%s: tid %d quiescing", __func__, toep->tid);
1356 		toep->flags |= TPF_TLS_RX_QUIESCING;
1357 		t4_set_rx_quiesce(toep);
1358 		return;
1359 	}
1360 
1361 	KASSERT(len <= toep->tls.rx_resid,
1362 	    ("%s: received excess bytes %d (waiting for %zu)", __func__, len,
1363 	    toep->tls.rx_resid));
1364 	toep->tls.rx_resid -= len;
1365 	if (toep->tls.rx_resid != 0)
1366 		return;
1367 
1368 	tls_check_rx_sockbuf(sc, toep, sb);
1369 }
1370 
1371 static int
1372 do_tls_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
1373 {
1374 	struct adapter *sc = iq->adapter;
1375 	const struct cpl_set_tcb_rpl *cpl = (const void *)(rss + 1);
1376 	unsigned int tid = GET_TID(cpl);
1377 	struct toepcb *toep;
1378 	struct inpcb *inp;
1379 	struct socket *so;
1380 	struct sockbuf *sb;
1381 
1382 	if (cpl->status != CPL_ERR_NONE)
1383 		panic("XXX: tcp_rpl failed: %d", cpl->status);
1384 
1385 	toep = lookup_tid(sc, tid);
1386 	inp = toep->inp;
1387 	switch (cpl->cookie) {
1388 	case V_WORD(W_TCB_T_FLAGS) | V_COOKIE(CPL_COOKIE_TOM):
1389 		INP_WLOCK(inp);
1390 		if ((toep->flags & TPF_TLS_STARTING) == 0)
1391 			panic("%s: connection is not starting TLS RX\n",
1392 			    __func__);
1393 		MPASS((toep->flags & TPF_TLS_RX_QUIESCING) != 0);
1394 
1395 		toep->flags &= ~TPF_TLS_RX_QUIESCING;
1396 		toep->flags |= TPF_TLS_RX_QUIESCED;
1397 
1398 		so = inp->inp_socket;
1399 		sb = &so->so_rcv;
1400 		SOCKBUF_LOCK(sb);
1401 		tls_check_rx_sockbuf(sc, toep, sb);
1402 		SOCKBUF_UNLOCK(sb);
1403 		INP_WUNLOCK(inp);
1404 		break;
1405 	default:
1406 		panic("XXX: unknown tcb_rpl offset %#x, cookie %#x",
1407 		    G_WORD(cpl->cookie), G_COOKIE(cpl->cookie));
1408 	}
1409 
1410 	return (0);
1411 }
1412 
1413 void
1414 t4_tls_mod_load(void)
1415 {
1416 
1417 	t4_register_cpl_handler(CPL_TLS_DATA, do_tls_data);
1418 	t4_register_cpl_handler(CPL_RX_TLS_CMP, do_rx_tls_cmp);
1419 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, do_tls_tcb_rpl,
1420 	    CPL_COOKIE_TOM);
1421 }
1422 
1423 void
1424 t4_tls_mod_unload(void)
1425 {
1426 
1427 	t4_register_cpl_handler(CPL_TLS_DATA, NULL);
1428 	t4_register_cpl_handler(CPL_RX_TLS_CMP, NULL);
1429 	t4_register_shared_cpl_handler(CPL_SET_TCB_RPL, NULL, CPL_COOKIE_TOM);
1430 }
1431 #endif	/* TCP_OFFLOAD */
1432 #endif	/* KERN_TLS */
1433