xref: /freebsd/sys/dev/cxgbe/crypto/t7_kern_tls.c (revision 24e4dcf4ba5e9dedcf89efd358ea3e1fe5867020)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2025 Chelsio Communications
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_kern_tls.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/ktr.h>
38 #include <sys/ktls.h>
39 #include <sys/sglist.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42 #include <sys/sockbuf.h>
43 #include <netinet/in.h>
44 #include <netinet/in_pcb.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip6.h>
47 #include <netinet/tcp_var.h>
48 #include <opencrypto/cryptodev.h>
49 #include <opencrypto/xform.h>
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 
53 #include "common/common.h"
54 #include "common/t4_regs.h"
55 #include "common/t4_regs_values.h"
56 #include "common/t4_tcb.h"
57 #include "t4_l2t.h"
58 #include "t4_clip.h"
59 #include "t4_mp_ring.h"
60 #include "crypto/t4_crypto.h"
61 
62 #if defined(INET) || defined(INET6)
63 
64 #define TLS_HEADER_LENGTH		5
65 
66 struct tls_scmd {
67 	__be32 seqno_numivs;
68 	__be32 ivgen_hdrlen;
69 };
70 
71 struct tlspcb {
72 	struct m_snd_tag com;
73 	struct vi_info *vi;	/* virtual interface */
74 	struct adapter *sc;
75 	struct sge_txq *txq;
76 
77 	int tx_key_addr;
78 	bool inline_key;
79 	bool tls13;
80 	unsigned char enc_mode;
81 
82 	struct tls_scmd scmd0;
83 	struct tls_scmd scmd0_partial;
84 	struct tls_scmd scmd0_short;
85 
86 	unsigned int tx_key_info_size;
87 
88 	uint16_t prev_mss;
89 
90 	/* Fields used for GCM records using GHASH state. */
91 	uint16_t ghash_offset;
92 	uint64_t ghash_tls_seqno;
93 	char ghash[AES_GMAC_HASH_LEN];
94 	bool ghash_valid;
95 	bool ghash_pending;
96 	bool ghash_lcb;
97 	bool queue_mbufs;
98 	uint8_t rx_chid;
99 	uint16_t rx_qid;
100 	struct mbufq pending_mbufs;
101 
102 	/*
103 	 * Only used outside of setup and teardown when using inline
104 	 * keys or for partial GCM mode.
105 	 */
106 	struct tls_keyctx keyctx;
107 };
108 
109 static void t7_tls_tag_free(struct m_snd_tag *mst);
110 static int ktls_setup_keys(struct tlspcb *tlsp,
111     const struct ktls_session *tls, struct sge_txq *txq);
112 
113 static void *zero_buffer;
114 static vm_paddr_t zero_buffer_pa;
115 
116 static const struct if_snd_tag_sw t7_tls_tag_sw = {
117 	.snd_tag_free = t7_tls_tag_free,
118 	.type = IF_SND_TAG_TYPE_TLS
119 };
120 
121 static inline struct tlspcb *
122 mst_to_tls(struct m_snd_tag *t)
123 {
124 	return (__containerof(t, struct tlspcb, com));
125 }
126 
127 static struct tlspcb *
128 alloc_tlspcb(struct ifnet *ifp, struct vi_info *vi, int flags)
129 {
130 	struct port_info *pi = vi->pi;
131 	struct adapter *sc = pi->adapter;
132 	struct tlspcb *tlsp;
133 
134 	tlsp = malloc(sizeof(*tlsp), M_CXGBE, M_ZERO | flags);
135 	if (tlsp == NULL)
136 		return (NULL);
137 
138 	m_snd_tag_init(&tlsp->com, ifp, &t7_tls_tag_sw);
139 	tlsp->vi = vi;
140 	tlsp->sc = sc;
141 	tlsp->tx_key_addr = -1;
142 	tlsp->ghash_offset = -1;
143 	tlsp->rx_chid = pi->rx_chan;
144 	tlsp->rx_qid = sc->sge.rxq[pi->vi->first_rxq].iq.abs_id;
145 	mbufq_init(&tlsp->pending_mbufs, INT_MAX);
146 
147 	return (tlsp);
148 }
149 
150 int
151 t7_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
152     struct m_snd_tag **pt)
153 {
154 	const struct ktls_session *tls;
155 	struct tlspcb *tlsp;
156 	struct adapter *sc;
157 	struct vi_info *vi;
158 	struct inpcb *inp;
159 	struct sge_txq *txq;
160 	int error, iv_size, keyid, mac_first;
161 
162 	tls = params->tls.tls;
163 
164 	/* TLS 1.1 through TLS 1.3 are currently supported. */
165 	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
166 	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
167 	    tls->params.tls_vminor > TLS_MINOR_VER_THREE)
168 		return (EPROTONOSUPPORT);
169 
170 	/* Sanity check values in *tls. */
171 	switch (tls->params.cipher_algorithm) {
172 	case CRYPTO_AES_CBC:
173 		/* XXX: Explicitly ignore any provided IV. */
174 		switch (tls->params.cipher_key_len) {
175 		case 128 / 8:
176 		case 192 / 8:
177 		case 256 / 8:
178 			break;
179 		default:
180 			return (EINVAL);
181 		}
182 		switch (tls->params.auth_algorithm) {
183 		case CRYPTO_SHA1_HMAC:
184 		case CRYPTO_SHA2_256_HMAC:
185 		case CRYPTO_SHA2_384_HMAC:
186 			break;
187 		default:
188 			return (EPROTONOSUPPORT);
189 		}
190 		iv_size = AES_BLOCK_LEN;
191 		mac_first = 1;
192 		break;
193 	case CRYPTO_AES_NIST_GCM_16:
194 		switch (tls->params.cipher_key_len) {
195 		case 128 / 8:
196 		case 192 / 8:
197 		case 256 / 8:
198 			break;
199 		default:
200 			return (EINVAL);
201 		}
202 
203 		/*
204 		 * The IV size for TLS 1.2 is the explicit IV in the
205 		 * record header.  For TLS 1.3 it is the size of the
206 		 * sequence number.
207 		 */
208 		iv_size = 8;
209 		mac_first = 0;
210 		break;
211 	default:
212 		return (EPROTONOSUPPORT);
213 	}
214 
215 	vi = if_getsoftc(ifp);
216 	sc = vi->adapter;
217 
218 	tlsp = alloc_tlspcb(ifp, vi, M_WAITOK);
219 
220 	/*
221 	 * Pointers with the low bit set in the pointer can't
222 	 * be stored as the cookie in the CPL_FW6_PLD reply.
223 	 */
224 	if (((uintptr_t)tlsp & CPL_FW6_COOKIE_MASK) != 0) {
225 		error = EINVAL;
226 		goto failed;
227 	}
228 
229 	tlsp->tls13 = tls->params.tls_vminor == TLS_MINOR_VER_THREE;
230 
231 	if (sc->tlst.inline_keys)
232 		keyid = -1;
233 	else
234 		keyid = t4_alloc_tls_keyid(sc);
235 	if (keyid < 0) {
236 		CTR(KTR_CXGBE, "%s: %p using immediate key ctx", __func__,
237 		    tlsp);
238 		tlsp->inline_key = true;
239 	} else {
240 		tlsp->tx_key_addr = keyid;
241 		CTR(KTR_CXGBE, "%s: %p allocated TX key addr %#x", __func__,
242 		    tlsp, tlsp->tx_key_addr);
243 	}
244 
245 	inp = params->tls.inp;
246 	INP_RLOCK(inp);
247 	if (inp->inp_flags & INP_DROPPED) {
248 		INP_RUNLOCK(inp);
249 		error = ECONNRESET;
250 		goto failed;
251 	}
252 
253 	txq = &sc->sge.txq[vi->first_txq];
254 	if (inp->inp_flowtype != M_HASHTYPE_NONE)
255 		txq += ((inp->inp_flowid % (vi->ntxq - vi->rsrv_noflowq)) +
256 		    vi->rsrv_noflowq);
257 	tlsp->txq = txq;
258 	INP_RUNLOCK(inp);
259 
260 	error = ktls_setup_keys(tlsp, tls, txq);
261 	if (error)
262 		goto failed;
263 
264 	tlsp->enc_mode = t4_tls_cipher_mode(tls);
265 	tlsp->tx_key_info_size = t4_tls_key_info_size(tls);
266 
267 	/* The SCMD fields used when encrypting a full TLS record. */
268 	if (tlsp->tls13)
269 		tlsp->scmd0.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0);
270 	else
271 		tlsp->scmd0.seqno_numivs = V_SCMD_SEQ_NO_CTRL(3);
272 	tlsp->scmd0.seqno_numivs |=
273 	    V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
274 	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
275 	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
276 	    V_SCMD_CIPH_MODE(tlsp->enc_mode) |
277 	    V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
278 	    V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
279 	    V_SCMD_IV_SIZE(iv_size / 2) | V_SCMD_NUM_IVS(1);
280 	tlsp->scmd0.seqno_numivs = htobe32(tlsp->scmd0.seqno_numivs);
281 
282 	tlsp->scmd0.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
283 	    V_SCMD_TLS_FRAG_ENABLE(0);
284 	if (tlsp->inline_key)
285 		tlsp->scmd0.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
286 
287 	/*
288 	 * The SCMD fields used when encrypting a short TLS record
289 	 * (no trailer and possibly a truncated payload).
290 	 */
291 	tlsp->scmd0_short.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) |
292 	    V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) |
293 	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
294 	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
295 	    V_SCMD_AUTH_MODE(SCMD_AUTH_MODE_NOP) |
296 	    V_SCMD_HMAC_CTRL(SCMD_HMAC_CTRL_NOP) |
297 	    V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(0);
298 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
299 		tlsp->scmd0_short.seqno_numivs |=
300 		    V_SCMD_CIPH_MODE(SCMD_CIPH_MODE_AES_CTR);
301 	else
302 		tlsp->scmd0_short.seqno_numivs |=
303 		    V_SCMD_CIPH_MODE(tlsp->enc_mode);
304 	tlsp->scmd0_short.seqno_numivs =
305 	    htobe32(tlsp->scmd0_short.seqno_numivs);
306 
307 	tlsp->scmd0_short.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
308 	    V_SCMD_TLS_FRAG_ENABLE(0) | V_SCMD_AADIVDROP(1);
309 	if (tlsp->inline_key)
310 		tlsp->scmd0_short.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
311 
312 	/*
313 	 * The SCMD fields used when encrypting a short TLS record
314 	 * using a partial GHASH.
315 	 */
316 	tlsp->scmd0_partial.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) |
317 	    V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) |
318 	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
319 	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
320 	    V_SCMD_CIPH_MODE(tlsp->enc_mode) |
321 	    V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
322 	    V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
323 	    V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(1);
324 	tlsp->scmd0_partial.seqno_numivs =
325 	    htobe32(tlsp->scmd0_partial.seqno_numivs);
326 
327 	tlsp->scmd0_partial.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
328 	    V_SCMD_TLS_FRAG_ENABLE(0) | V_SCMD_AADIVDROP(1) |
329 	    V_SCMD_KEY_CTX_INLINE(1);
330 
331 	TXQ_LOCK(txq);
332 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
333 		txq->kern_tls_gcm++;
334 	else
335 		txq->kern_tls_cbc++;
336 	TXQ_UNLOCK(txq);
337 	*pt = &tlsp->com;
338 	return (0);
339 
340 failed:
341 	m_snd_tag_rele(&tlsp->com);
342 	return (error);
343 }
344 
345 static int
346 ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls,
347     struct sge_txq *txq)
348 {
349 	struct tls_key_req *kwr;
350 	struct tls_keyctx *kctx;
351 	void *items[1];
352 	struct mbuf *m;
353 	int error;
354 
355 	/*
356 	 * Store the salt and keys in the key context.  For
357 	 * connections with an inline key, this key context is passed
358 	 * as immediate data in each work request.  For connections
359 	 * storing the key in DDR, a work request is used to store a
360 	 * copy of the key context in DDR.
361 	 */
362 	t4_tls_key_ctx(tls, KTLS_TX, &tlsp->keyctx);
363 	if (tlsp->inline_key)
364 		return (0);
365 
366 	/* Populate key work request. */
367         m = alloc_wr_mbuf(TLS_KEY_WR_SZ, M_NOWAIT);
368 	if (m == NULL) {
369 		CTR(KTR_CXGBE, "%s: %p failed to alloc WR mbuf", __func__,
370 		    tlsp);
371 		return (ENOMEM);
372 	}
373 	m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com);
374 	m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
375 	kwr = mtod(m, void *);
376 	memset(kwr, 0, TLS_KEY_WR_SZ);
377 
378 	t4_write_tlskey_wr(tls, KTLS_TX, 0, 0, tlsp->tx_key_addr, kwr);
379 	kctx = (struct tls_keyctx *)(kwr + 1);
380 	memcpy(kctx, &tlsp->keyctx, sizeof(*kctx));
381 
382 	/*
383 	 * Place the key work request in the transmit queue.  It
384 	 * should be sent to the NIC before any TLS packets using this
385 	 * session.
386 	 */
387 	items[0] = m;
388 	error = mp_ring_enqueue(txq->r, items, 1, 1);
389 	if (error)
390 		m_free(m);
391 	else
392 		CTR(KTR_CXGBE, "%s: %p sent key WR", __func__, tlsp);
393 	return (error);
394 }
395 
396 static u_int
397 ktls_base_wr_size(struct tlspcb *tlsp, bool inline_key)
398 {
399 	u_int wr_len;
400 
401 	wr_len = sizeof(struct fw_ulptx_wr);	// 16
402 	wr_len += sizeof(struct ulp_txpkt);	// 8
403 	wr_len += sizeof(struct ulptx_idata);	// 8
404 	wr_len += sizeof(struct cpl_tx_sec_pdu);// 32
405 	if (inline_key)
406 		wr_len += tlsp->tx_key_info_size;
407 	else {
408 		wr_len += sizeof(struct ulptx_sc_memrd);// 8
409 		wr_len += sizeof(struct ulptx_idata);	// 8
410 	}
411 	/* SplitMode CPL_RX_PHYS_DSGL here if needed. */
412 	/* CPL_TX_*_LSO here if needed. */
413 	wr_len += sizeof(struct cpl_tx_pkt_core);// 16
414 	return (wr_len);
415 }
416 
417 static u_int
418 ktls_sgl_size(u_int nsegs)
419 {
420 	u_int wr_len;
421 
422 	/* First segment is part of ulptx_sgl. */
423 	nsegs--;
424 
425 	wr_len = sizeof(struct ulptx_sgl);
426 	wr_len += 8 * ((3 * nsegs) / 2 + (nsegs & 1));
427 	return (wr_len);
428 }
429 
430 /*
431  * A request that doesn't need to generate the TLS trailer is a short
432  * record.  For these requests, part of the TLS record payload is
433  * encrypted without invoking the MAC.
434  *
435  * Returns true if this record should be sent as a short record.  In
436  * either case, the remaining outputs describe the how much of the
437  * TLS record to send as input to the crypto block and the amount of
438  * crypto output to trim via SplitMode:
439  *
440  * *header_len - Number of bytes of TLS header to pass as immediate
441  *               data
442  *
443  * *offset - Start offset of TLS record payload to pass as DSGL data
444  *
445  * *plen - Length of TLS record payload to pass as DSGL data
446  *
447  * *leading_waste - amount of non-packet-header bytes to drop at the
448  *                  start of the crypto output
449  *
450  * *trailing_waste - amount of crypto output to drop from the end
451  */
452 static bool
453 ktls_is_short_record(struct tlspcb *tlsp, struct mbuf *m_tls, u_int tlen,
454     u_int rlen, u_int *header_len, u_int *offset, u_int *plen,
455     u_int *leading_waste, u_int *trailing_waste, bool send_partial_ghash,
456     bool request_ghash)
457 {
458 	u_int new_tlen, trailer_len;
459 
460 	MPASS(tlen > m_tls->m_epg_hdrlen);
461 
462 	/*
463 	 * For TLS 1.3 treat the inner record type stored as the first
464 	 * byte of the trailer as part of the payload rather than part
465 	 * of the trailer.
466 	 */
467 	trailer_len = m_tls->m_epg_trllen;
468 	if (tlsp->tls13)
469 		trailer_len--;
470 
471 	/*
472 	 * Default to sending the full record as input to the crypto
473 	 * engine and relying on SplitMode to drop any waste.
474 	 */
475 	*header_len = m_tls->m_epg_hdrlen;
476 	*offset = 0;
477 	*plen = rlen - (m_tls->m_epg_hdrlen + trailer_len);
478 	*leading_waste = mtod(m_tls, vm_offset_t);
479 	*trailing_waste = rlen - tlen;
480 	if (!tlsp->sc->tlst.short_records)
481 		return (false);
482 
483 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC) {
484 		/*
485 		 * For AES-CBC we have to send input from the start of
486 		 * the TLS record payload that is a multiple of the
487 		 * block size.  new_tlen rounds up tlen to the end of
488 		 * the containing AES block.  If this last block
489 		 * overlaps with the trailer, send the full record to
490 		 * generate the MAC.
491 		 */
492 		new_tlen = TLS_HEADER_LENGTH +
493 		    roundup2(tlen - TLS_HEADER_LENGTH, AES_BLOCK_LEN);
494 		if (rlen - new_tlen < trailer_len)
495 			return (false);
496 
497 		*trailing_waste = new_tlen - tlen;
498 		*plen = new_tlen - m_tls->m_epg_hdrlen;
499 	} else {
500 		if (rlen - tlen < trailer_len ||
501 		    (rlen - tlen == trailer_len && request_ghash)) {
502 			/*
503 			 * For AES-GCM we have to send the full record
504 			 * if the end overlaps with the trailer and a
505 			 * partial GHASH isn't being sent.
506 			 */
507 			if (!send_partial_ghash)
508 				return (false);
509 
510 			/*
511 			 * Will need to treat any excess trailer bytes as
512 			 * trailing waste.  *trailing_waste is already
513 			 * correct.
514 			 */
515 		} else {
516 			/*
517 			 * We can use AES-CTR or AES-GCM in partial GHASH
518 			 * mode to encrypt a partial PDU.
519 			 *
520 			 * The last block can be partially encrypted
521 			 * without any trailing waste.
522 			 */
523 			*trailing_waste = 0;
524 			*plen = tlen - m_tls->m_epg_hdrlen;
525 		}
526 
527 		/*
528 		 * If this request starts at the first byte of the
529 		 * payload (so the previous request sent the full TLS
530 		 * header as a tunnel packet) and a partial GHASH is
531 		 * being requested, the full TLS header must be sent
532 		 * as input for the GHASH.
533 		 */
534 		if (mtod(m_tls, vm_offset_t) == m_tls->m_epg_hdrlen &&
535 		    request_ghash)
536 			return (true);
537 
538 		/*
539 		 * In addition, we can minimize leading waste by
540 		 * starting encryption at the start of the closest AES
541 		 * block.
542 		 */
543 		if (mtod(m_tls, vm_offset_t) >= m_tls->m_epg_hdrlen) {
544 			*header_len = 0;
545 			*offset = mtod(m_tls, vm_offset_t) -
546 			    m_tls->m_epg_hdrlen;
547 			if (*offset >= *plen)
548 				*offset = *plen;
549 			else
550 				*offset = rounddown2(*offset, AES_BLOCK_LEN);
551 
552 			/*
553 			 * If the request is just bytes from the trailer,
554 			 * trim the offset to the end of the payload.
555 			 */
556 			*offset = min(*offset, *plen);
557 			*plen -= *offset;
558 			*leading_waste -= (m_tls->m_epg_hdrlen + *offset);
559 		}
560 	}
561 	return (true);
562 }
563 
564 /* Size of the AES-GCM TLS AAD for a given connection. */
565 static int
566 ktls_gcm_aad_len(struct tlspcb *tlsp)
567 {
568 	return (tlsp->tls13 ? sizeof(struct tls_aead_data_13) :
569 	    sizeof(struct tls_aead_data));
570 }
571 
572 static int
573 ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls,
574     int *nsegsp)
575 {
576 	const struct tls_record_layer *hdr;
577 	u_int header_len, imm_len, offset, plen, rlen, tlen, wr_len;
578 	u_int leading_waste, trailing_waste;
579 	bool inline_key, last_ghash_frag, request_ghash, send_partial_ghash;
580 	bool short_record;
581 
582 	M_ASSERTEXTPG(m_tls);
583 
584 	/*
585 	 * The relative offset of the last byte to send from the TLS
586 	 * record.
587 	 */
588 	tlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
589 	if (tlen <= m_tls->m_epg_hdrlen) {
590 		/*
591 		 * For requests that only want to send the TLS header,
592 		 * send a tunnelled packet as immediate data.
593 		 */
594 		wr_len = sizeof(struct fw_eth_tx_pkt_wr) +
595 		    sizeof(struct cpl_tx_pkt_core) +
596 		    roundup2(m->m_len + m_tls->m_len, 16);
597 		if (wr_len > SGE_MAX_WR_LEN) {
598 			CTR(KTR_CXGBE,
599 		    "%s: %p TLS header-only packet too long (len %d)",
600 			    __func__, tlsp, m->m_len + m_tls->m_len);
601 		}
602 
603 		/* This should always be the last TLS record in a chain. */
604 		MPASS(m_tls->m_next == NULL);
605 		*nsegsp = 0;
606 		return (wr_len);
607 	}
608 
609 	hdr = (void *)m_tls->m_epg_hdr;
610 	rlen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length);
611 
612 	/*
613 	 * See if this request might make use of GHASH state.  This
614 	 * errs on the side of over-budgeting the WR size.
615 	 */
616 	last_ghash_frag = false;
617 	request_ghash = false;
618 	send_partial_ghash = false;
619 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM &&
620 	    tlsp->sc->tlst.partial_ghash && tlsp->sc->tlst.short_records) {
621 		u_int trailer_len;
622 
623 		trailer_len = m_tls->m_epg_trllen;
624 		if (tlsp->tls13)
625 			trailer_len--;
626 		KASSERT(trailer_len == AES_GMAC_HASH_LEN,
627 		    ("invalid trailer length for AES-GCM"));
628 
629 		/* Is this the start of a TLS record? */
630 		if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen) {
631 			/*
632 			 * Might use partial GHASH if this doesn't
633 			 * send the full record.
634 			 */
635 			if (tlen < rlen) {
636 				if (tlen < (rlen - trailer_len))
637 					send_partial_ghash = true;
638 				request_ghash = true;
639 			}
640 		} else {
641 			send_partial_ghash = true;
642 			if (tlen < rlen)
643 				request_ghash = true;
644 			if (tlen >= (rlen - trailer_len))
645 				last_ghash_frag = true;
646 		}
647 	}
648 
649 	/*
650 	 * Assume not sending partial GHASH for this call to get the
651 	 * larger size.
652 	 */
653 	short_record = ktls_is_short_record(tlsp, m_tls, tlen, rlen,
654 	    &header_len, &offset, &plen, &leading_waste, &trailing_waste,
655 	    false, request_ghash);
656 
657 	inline_key = send_partial_ghash || tlsp->inline_key;
658 
659 	/* Calculate the size of the work request. */
660 	wr_len = ktls_base_wr_size(tlsp, inline_key);
661 
662 	if (send_partial_ghash)
663 		wr_len += AES_GMAC_HASH_LEN;
664 
665 	if (leading_waste != 0 || trailing_waste != 0) {
666 		/*
667 		 * Partial records might require a SplitMode
668 		 * CPL_RX_PHYS_DSGL.
669 		 */
670 		wr_len += sizeof(struct cpl_t7_rx_phys_dsgl);
671 	}
672 
673 	/* Budget for an LSO header even if we don't use it. */
674 	wr_len += sizeof(struct cpl_tx_pkt_lso_core);
675 
676 	/*
677 	 * Headers (including the TLS header) are always sent as
678 	 * immediate data.  Short records include a raw AES IV as
679 	 * immediate data.  TLS 1.3 non-short records include a
680 	 * placeholder for the sequence number as immediate data.
681 	 * Short records using a partial hash may also need to send
682 	 * TLS AAD.  If a partial hash might be sent, assume a short
683 	 * record to get the larger size.
684 	 */
685 	imm_len = m->m_len + header_len;
686 	if (short_record || send_partial_ghash) {
687 		imm_len += AES_BLOCK_LEN;
688 		if (send_partial_ghash && header_len != 0)
689 			imm_len += ktls_gcm_aad_len(tlsp);
690 	} else if (tlsp->tls13)
691 		imm_len += sizeof(uint64_t);
692 	wr_len += roundup2(imm_len, 16);
693 
694 	/*
695 	 * TLS record payload via DSGL.  For partial GCM mode we
696 	 * might need an extra SG entry for a placeholder.
697 	 */
698 	*nsegsp = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset,
699 	    plen);
700 	wr_len += ktls_sgl_size(*nsegsp + (last_ghash_frag ? 1 : 0));
701 
702 	if (request_ghash) {
703 		/* AES-GCM records might return a partial hash. */
704 		wr_len += sizeof(struct ulp_txpkt);
705 		wr_len += sizeof(struct ulptx_idata);
706 		wr_len += sizeof(struct cpl_tx_tls_ack);
707 		wr_len += sizeof(struct rss_header) +
708 		    sizeof(struct cpl_fw6_pld);
709 		wr_len += AES_GMAC_HASH_LEN;
710 	}
711 
712 	wr_len = roundup2(wr_len, 16);
713 	return (wr_len);
714 }
715 
716 /* Queue the next pending packet. */
717 static void
718 ktls_queue_next_packet(struct tlspcb *tlsp, bool enqueue_only)
719 {
720 #ifdef KTR
721 	struct ether_header *eh;
722 	struct tcphdr *tcp;
723 	tcp_seq tcp_seqno;
724 #endif
725 	struct mbuf *m;
726 	void *items[1];
727 	int rc;
728 
729 	TXQ_LOCK_ASSERT_OWNED(tlsp->txq);
730 	KASSERT(tlsp->queue_mbufs, ("%s: mbufs not being queued for %p",
731 	    __func__, tlsp));
732 	for (;;) {
733 		m = mbufq_dequeue(&tlsp->pending_mbufs);
734 		if (m == NULL) {
735 			tlsp->queue_mbufs = false;
736 			return;
737 		}
738 
739 #ifdef KTR
740 		eh = mtod(m, struct ether_header *);
741 		tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen +
742 		    m->m_pkthdr.l3hlen);
743 		tcp_seqno = ntohl(tcp->th_seq);
744 #ifdef VERBOSE_TRACES
745 		CTR(KTR_CXGBE, "%s: pkt len %d TCP seq %u", __func__,
746 		    m->m_pkthdr.len, tcp_seqno);
747 #endif
748 #endif
749 
750 		items[0] = m;
751 		if (enqueue_only)
752 			rc = mp_ring_enqueue_only(tlsp->txq->r, items, 1);
753 		else {
754 			TXQ_UNLOCK(tlsp->txq);
755 			rc = mp_ring_enqueue(tlsp->txq->r, items, 1, 256);
756 			TXQ_LOCK(tlsp->txq);
757 		}
758 		if (__predict_true(rc == 0))
759 			return;
760 
761 		CTR(KTR_CXGBE, "%s: pkt len %d TCP seq %u dropped", __func__,
762 		    m->m_pkthdr.len, tcp_seqno);
763 		m_freem(m);
764 	}
765 }
766 
767 int
768 t7_ktls_parse_pkt(struct mbuf *m)
769 {
770 	struct tlspcb *tlsp;
771 	struct ether_header *eh;
772 	struct ip *ip;
773 	struct ip6_hdr *ip6;
774 	struct tcphdr *tcp;
775 	struct mbuf *m_tls;
776 	void *items[1];
777 	int error, nsegs;
778 	u_int wr_len, tot_len;
779 	uint16_t eh_type;
780 
781 	/*
782 	 * Locate headers in initial mbuf.
783 	 *
784 	 * XXX: This assumes all of the headers are in the initial mbuf.
785 	 * Could perhaps use m_advance() like parse_pkt() if that turns
786 	 * out to not be true.
787 	 */
788 	M_ASSERTPKTHDR(m);
789 	MPASS(m->m_pkthdr.snd_tag != NULL);
790 	tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
791 
792 	if (m->m_len <= sizeof(*eh) + sizeof(*ip)) {
793 		CTR(KTR_CXGBE, "%s: %p header mbuf too short", __func__, tlsp);
794 		return (EINVAL);
795 	}
796 	eh = mtod(m, struct ether_header *);
797 	eh_type = ntohs(eh->ether_type);
798 	if (eh_type == ETHERTYPE_VLAN) {
799 		struct ether_vlan_header *evh = (void *)eh;
800 
801 		eh_type = ntohs(evh->evl_proto);
802 		m->m_pkthdr.l2hlen = sizeof(*evh);
803 	} else
804 		m->m_pkthdr.l2hlen = sizeof(*eh);
805 
806 	switch (eh_type) {
807 	case ETHERTYPE_IP:
808 		ip = (struct ip *)(eh + 1);
809 		if (ip->ip_p != IPPROTO_TCP) {
810 			CTR(KTR_CXGBE, "%s: %p mbuf not IPPROTO_TCP", __func__,
811 			    tlsp);
812 			return (EINVAL);
813 		}
814 		m->m_pkthdr.l3hlen = ip->ip_hl * 4;
815 		break;
816 	case ETHERTYPE_IPV6:
817 		ip6 = (struct ip6_hdr *)(eh + 1);
818 		if (ip6->ip6_nxt != IPPROTO_TCP) {
819 			CTR(KTR_CXGBE, "%s: %p, mbuf not IPPROTO_TCP (%u)",
820 			    __func__, tlsp, ip6->ip6_nxt);
821 			return (EINVAL);
822 		}
823 		m->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
824 		break;
825 	default:
826 		CTR(KTR_CXGBE, "%s: %p mbuf not ETHERTYPE_IP{,V6}", __func__,
827 		    tlsp);
828 		return (EINVAL);
829 	}
830 	if (m->m_len < m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
831 	    sizeof(*tcp)) {
832 		CTR(KTR_CXGBE, "%s: %p header mbuf too short (2)", __func__,
833 		    tlsp);
834 		return (EINVAL);
835 	}
836 	tcp = (struct tcphdr *)((char *)(eh + 1) + m->m_pkthdr.l3hlen);
837 	m->m_pkthdr.l4hlen = tcp->th_off * 4;
838 
839 	/* Bail if there is TCP payload before the TLS record. */
840 	if (m->m_len != m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
841 	    m->m_pkthdr.l4hlen) {
842 		CTR(KTR_CXGBE,
843 		    "%s: %p header mbuf bad length (%d + %d + %d != %d)",
844 		    __func__, tlsp, m->m_pkthdr.l2hlen, m->m_pkthdr.l3hlen,
845 		    m->m_pkthdr.l4hlen, m->m_len);
846 		return (EINVAL);
847 	}
848 
849 	/* Assume all headers are in 'm' for now. */
850 	MPASS(m->m_next != NULL);
851 	MPASS(m->m_next->m_flags & M_EXTPG);
852 
853 	tot_len = 0;
854 
855 	/*
856 	 * Each of the remaining mbufs in the chain should reference a
857 	 * TLS record.
858 	 */
859 	for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
860 		MPASS(m_tls->m_flags & M_EXTPG);
861 
862 		wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs);
863 #ifdef VERBOSE_TRACES
864 		CTR(KTR_CXGBE, "%s: %p wr_len %d nsegs %d", __func__, tlsp,
865 		    wr_len, nsegs);
866 #endif
867 		if (wr_len > SGE_MAX_WR_LEN || nsegs > TX_SGL_SEGS)
868 			return (EFBIG);
869 		tot_len += roundup2(wr_len, EQ_ESIZE);
870 
871 		/*
872 		 * Store 'nsegs' for the first TLS record in the
873 		 * header mbuf's metadata.
874 		 */
875 		if (m_tls == m->m_next)
876 			set_mbuf_nsegs(m, nsegs);
877 	}
878 
879 	MPASS(tot_len != 0);
880 	set_mbuf_len16(m, tot_len / 16);
881 
882 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
883 		/* Defer packets beyond what has been sent so far. */
884 		TXQ_LOCK(tlsp->txq);
885 		if (tlsp->queue_mbufs) {
886 			error = mbufq_enqueue(&tlsp->pending_mbufs, m);
887 			if (error == 0) {
888 #ifdef VERBOSE_TRACES
889 				CTR(KTR_CXGBE,
890 				    "%s: %p len16 %d nsegs %d TCP seq %u deferred",
891 				    __func__, tlsp, mbuf_len16(m),
892 				    mbuf_nsegs(m), ntohl(tcp->th_seq));
893 #endif
894 			}
895 			TXQ_UNLOCK(tlsp->txq);
896 			return (error);
897 		}
898 		tlsp->queue_mbufs = true;
899 		TXQ_UNLOCK(tlsp->txq);
900 	}
901 
902 #ifdef VERBOSE_TRACES
903 	CTR(KTR_CXGBE, "%s: %p len16 %d nsegs %d", __func__, tlsp,
904 	    mbuf_len16(m), mbuf_nsegs(m));
905 #endif
906 	items[0] = m;
907 	error = mp_ring_enqueue(tlsp->txq->r, items, 1, 256);
908 	if (__predict_false(error != 0)) {
909 		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
910 			TXQ_LOCK(tlsp->txq);
911 			ktls_queue_next_packet(tlsp, false);
912 			TXQ_UNLOCK(tlsp->txq);
913 		}
914 	}
915 	return (error);
916 }
917 
918 static inline bool
919 needs_vlan_insertion(struct mbuf *m)
920 {
921 
922 	M_ASSERTPKTHDR(m);
923 
924 	return (m->m_flags & M_VLANTAG);
925 }
926 
927 static inline uint64_t
928 pkt_ctrl1(struct sge_txq *txq, struct mbuf *m, uint16_t eh_type)
929 {
930 	uint64_t ctrl1;
931 
932 	/* Checksums are always offloaded */
933 	if (eh_type == ETHERTYPE_IP) {
934 		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) |
935 		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
936 		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
937 	} else {
938 		MPASS(m->m_pkthdr.l3hlen == sizeof(struct ip6_hdr));
939 		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) |
940 		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
941 		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
942 	}
943 	txq->txcsum++;
944 
945 	/* VLAN tag insertion */
946 	if (needs_vlan_insertion(m)) {
947 		ctrl1 |= F_TXPKT_VLAN_VLD |
948 		    V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
949 		txq->vlan_insertion++;
950 	}
951 
952 	return (ctrl1);
953 }
954 
955 static inline void *
956 write_lso_cpl(void *cpl, struct mbuf *m0, uint16_t mss, uint16_t eh_type,
957     int total_len)
958 {
959 	struct cpl_tx_pkt_lso_core *lso;
960 	uint32_t ctrl;
961 
962 	KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
963 	    m0->m_pkthdr.l4hlen > 0,
964 	    ("%s: mbuf %p needs TSO but missing header lengths",
965 		__func__, m0));
966 
967 	ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
968 	    F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
969 	    V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
970 	    V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
971 	    V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
972 	if (eh_type == ETHERTYPE_IPV6)
973 		ctrl |= F_LSO_IPV6;
974 
975 	lso = cpl;
976 	lso->lso_ctrl = htobe32(ctrl);
977 	lso->ipid_ofst = htobe16(0);
978 	lso->mss = htobe16(mss);
979 	lso->seqno_offset = htobe32(0);
980 	lso->len = htobe32(total_len);
981 
982 	return (lso + 1);
983 }
984 
985 static inline void *
986 write_tx_tls_ack(void *dst, u_int rx_chid, u_int hash_len, bool ghash_lcb)
987 {
988 	struct cpl_tx_tls_ack *cpl;
989 	uint32_t flags;
990 
991 	flags = ghash_lcb ? F_CPL_TX_TLS_ACK_LCB : F_CPL_TX_TLS_ACK_PHASH;
992 	cpl = dst;
993 	cpl->op_to_Rsvd2 = htobe32(V_CPL_TX_TLS_ACK_OPCODE(CPL_TX_TLS_ACK) |
994 	    V_T7_CPL_TX_TLS_ACK_RXCHID(rx_chid) | F_CPL_TX_TLS_ACK_ULPTXLPBK |
995 	    flags);
996 
997 	/* 32 == AckEncCpl, 16 == LCB */
998 	cpl->PldLen = htobe32(V_CPL_TX_TLS_ACK_PLDLEN(32 + 16 + hash_len));
999 	cpl->Rsvd3 = 0;
1000 
1001 	return (cpl + 1);
1002 }
1003 
1004 static inline void *
1005 write_fw6_pld(void *dst, u_int rx_chid, u_int rx_qid, u_int hash_len,
1006     uint64_t cookie)
1007 {
1008 	struct rss_header *rss;
1009 	struct cpl_fw6_pld *cpl;
1010 
1011 	rss = dst;
1012 	memset(rss, 0, sizeof(*rss));
1013 	rss->opcode = CPL_FW6_PLD;
1014 	rss->qid = htobe16(rx_qid);
1015 	rss->channel = rx_chid;
1016 
1017 	cpl = (void *)(rss + 1);
1018 	memset(cpl, 0, sizeof(*cpl));
1019 	cpl->opcode = CPL_FW6_PLD;
1020 	cpl->len = htobe16(hash_len);
1021 	cpl->data[1] = htobe64(cookie);
1022 
1023 	return (cpl + 1);
1024 }
1025 
1026 static inline void *
1027 write_split_mode_rx_phys(void *dst, struct mbuf *m, struct mbuf *m_tls,
1028     u_int crypto_hdr_len, u_int leading_waste, u_int trailing_waste)
1029 {
1030 	struct cpl_t7_rx_phys_dsgl *cpl;
1031 	uint16_t *len;
1032 	uint8_t numsge;
1033 
1034 	/* Forward first (3) and third (1) segments. */
1035 	numsge = 0xa;
1036 
1037 	cpl = dst;
1038 	cpl->ot.opcode = CPL_RX_PHYS_DSGL;
1039 	cpl->PhysAddrFields_lo_to_NumSGE =
1040 	    htobe32(F_CPL_T7_RX_PHYS_DSGL_SPLITMODE |
1041 	    V_CPL_T7_RX_PHYS_DSGL_NUMSGE(numsge));
1042 
1043 	len = (uint16_t *)(cpl->RSSCopy);
1044 
1045 	/*
1046 	 * First segment always contains packet headers as well as
1047 	 * transmit-related CPLs.
1048 	 */
1049 	len[0] = htobe16(crypto_hdr_len);
1050 
1051 	/*
1052 	 * Second segment is "gap" of data to drop at the front of the
1053 	 * TLS record.
1054 	 */
1055 	len[1] = htobe16(leading_waste);
1056 
1057 	/* Third segment is how much of the TLS record to send. */
1058 	len[2] = htobe16(m_tls->m_len);
1059 
1060 	/* Fourth segment is how much data to drop at the end. */
1061 	len[3] = htobe16(trailing_waste);
1062 
1063 #ifdef VERBOSE_TRACES
1064 	CTR(KTR_CXGBE, "%s: forward %u skip %u forward %u skip %u",
1065 	    __func__, be16toh(len[0]), be16toh(len[1]), be16toh(len[2]),
1066 	    be16toh(len[3]));
1067 #endif
1068 	return (cpl + 1);
1069 }
1070 
1071 /*
1072  * If the SGL ends on an address that is not 16 byte aligned, this function will
1073  * add a 0 filled flit at the end.
1074  */
1075 static void *
1076 write_gl_to_buf(struct sglist *gl, caddr_t to)
1077 {
1078 	struct sglist_seg *seg;
1079 	__be64 *flitp;
1080 	struct ulptx_sgl *usgl;
1081 	int i, nflits, nsegs;
1082 
1083 	KASSERT(((uintptr_t)to & 0xf) == 0,
1084 	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, to));
1085 
1086 	nsegs = gl->sg_nseg;
1087 	MPASS(nsegs > 0);
1088 
1089 	nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
1090 	flitp = (__be64 *)to;
1091 	seg = &gl->sg_segs[0];
1092 	usgl = (void *)flitp;
1093 
1094 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
1095 	    V_ULPTX_NSGE(nsegs));
1096 	usgl->len0 = htobe32(seg->ss_len);
1097 	usgl->addr0 = htobe64(seg->ss_paddr);
1098 	seg++;
1099 
1100 	for (i = 0; i < nsegs - 1; i++, seg++) {
1101 		usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
1102 		usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
1103 	}
1104 	if (i & 1)
1105 		usgl->sge[i / 2].len[1] = htobe32(0);
1106 	flitp += nflits;
1107 
1108 	if (nflits & 1) {
1109 		MPASS(((uintptr_t)flitp) & 0xf);
1110 		*flitp++ = 0;
1111 	}
1112 
1113 	MPASS((((uintptr_t)flitp) & 0xf) == 0);
1114 	return (flitp);
1115 }
1116 
1117 static inline void
1118 copy_to_txd(struct sge_eq *eq, const char *from, caddr_t *to, int len)
1119 {
1120 
1121 	MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
1122 	MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
1123 
1124 	if (__predict_true((uintptr_t)(*to) + len <=
1125 	    (uintptr_t)&eq->desc[eq->sidx])) {
1126 		bcopy(from, *to, len);
1127 		(*to) += len;
1128 		if ((uintptr_t)(*to) == (uintptr_t)&eq->desc[eq->sidx])
1129 			(*to) = (caddr_t)eq->desc;
1130 	} else {
1131 		int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
1132 
1133 		bcopy(from, *to, portion);
1134 		from += portion;
1135 		portion = len - portion;	/* remaining */
1136 		bcopy(from, (void *)eq->desc, portion);
1137 		(*to) = (caddr_t)eq->desc + portion;
1138 	}
1139 }
1140 
1141 static int
1142 ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m,
1143     const void *src, u_int len, u_int available, tcp_seq tcp_seqno, u_int pidx,
1144     uint16_t eh_type, bool last_wr)
1145 {
1146 	struct tx_sdesc *txsd;
1147 	struct fw_eth_tx_pkt_wr *wr;
1148 	struct cpl_tx_pkt_core *cpl;
1149 	uint32_t ctrl;
1150 	int len16, ndesc, pktlen;
1151 	struct ether_header *eh;
1152 	struct ip *ip, newip;
1153 	struct ip6_hdr *ip6, newip6;
1154 	struct tcphdr *tcp, newtcp;
1155 	caddr_t out;
1156 
1157 	TXQ_LOCK_ASSERT_OWNED(txq);
1158 	M_ASSERTPKTHDR(m);
1159 
1160 	wr = dst;
1161 	pktlen = m->m_len + len;
1162 	ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen;
1163 	len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16);
1164 	ndesc = tx_len16_to_desc(len16);
1165 	MPASS(ndesc <= available);
1166 
1167 	/* Firmware work request header */
1168 	/* TODO: Handle VF work request. */
1169 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
1170 	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
1171 
1172 	ctrl = V_FW_WR_LEN16(len16);
1173 	wr->equiq_to_len16 = htobe32(ctrl);
1174 	wr->r3 = 0;
1175 
1176 	cpl = (void *)(wr + 1);
1177 
1178 	/* CPL header */
1179 	cpl->ctrl0 = txq->cpl_ctrl0;
1180 	cpl->pack = 0;
1181 	cpl->len = htobe16(pktlen);
1182 
1183 	out = (void *)(cpl + 1);
1184 
1185 	/* Copy over Ethernet header. */
1186 	eh = mtod(m, struct ether_header *);
1187 	copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen);
1188 
1189 	/* Fixup length in IP header and copy out. */
1190 	if (eh_type == ETHERTYPE_IP) {
1191 		ip = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1192 		newip = *ip;
1193 		newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen);
1194 		copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip));
1195 		if (m->m_pkthdr.l3hlen > sizeof(*ip))
1196 			copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out,
1197 			    m->m_pkthdr.l3hlen - sizeof(*ip));
1198 	} else {
1199 		ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1200 		newip6 = *ip6;
1201 		newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen -
1202 		    sizeof(*ip6));
1203 		copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6));
1204 		MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6));
1205 	}
1206 	cpl->ctrl1 = htobe64(pkt_ctrl1(txq, m, eh_type));
1207 
1208 	/* Set sequence number in TCP header. */
1209 	tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1210 	newtcp = *tcp;
1211 	newtcp.th_seq = htonl(tcp_seqno);
1212 	copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp));
1213 
1214 	/* Copy rest of TCP header. */
1215 	copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len -
1216 	    (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)));
1217 
1218 	/* Copy the payload data. */
1219 	copy_to_txd(&txq->eq, src, &out, len);
1220 	txq->imm_wrs++;
1221 
1222 	txq->txpkt_wrs++;
1223 
1224 	txsd = &txq->sdesc[pidx];
1225 	if (last_wr)
1226 		txsd->m = m;
1227 	else
1228 		txsd->m = NULL;
1229 	txsd->desc_used = ndesc;
1230 
1231 	return (ndesc);
1232 }
1233 
1234 static int
1235 ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq,
1236     void *dst, struct mbuf *m, struct tcphdr *tcp, struct mbuf *m_tls,
1237     u_int available, tcp_seq tcp_seqno, u_int pidx, uint16_t eh_type,
1238     uint16_t mss)
1239 {
1240 	struct sge_eq *eq = &txq->eq;
1241 	struct tx_sdesc *txsd;
1242 	struct fw_ulptx_wr *wr;
1243 	struct ulp_txpkt *txpkt;
1244 	struct ulptx_sc_memrd *memrd;
1245 	struct ulptx_idata *idata;
1246 	struct cpl_tx_sec_pdu *sec_pdu;
1247 	struct cpl_tx_pkt_core *tx_pkt;
1248 	const struct tls_record_layer *hdr;
1249 	struct ip *ip;
1250 	struct ip6_hdr *ip6;
1251 	struct tcphdr *newtcp;
1252 	char *iv, *out;
1253 	u_int aad_start, aad_stop;
1254 	u_int auth_start, auth_stop, auth_insert;
1255 	u_int cipher_start, cipher_stop, iv_offset;
1256 	u_int header_len, offset, plen, rlen, tlen;
1257 	u_int imm_len, ndesc, nsegs, txpkt_lens[2], wr_len;
1258 	u_int cpl_len, crypto_hdr_len, post_key_context_len;
1259 	u_int leading_waste, trailing_waste;
1260 	u_short ip_len;
1261 	bool inline_key, ghash_lcb, last_ghash_frag, last_wr, need_lso;
1262 	bool request_ghash, send_partial_ghash, short_record, split_mode;
1263 	bool using_scratch;
1264 
1265 	MPASS(tlsp->txq == txq);
1266 	M_ASSERTEXTPG(m_tls);
1267 
1268 	/* Final work request for this mbuf chain? */
1269 	last_wr = (m_tls->m_next == NULL);
1270 
1271 	/*
1272 	 * The relative offset of the last byte to send from the TLS
1273 	 * record.
1274 	 */
1275 	tlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
1276 	if (tlen <= m_tls->m_epg_hdrlen) {
1277 		/*
1278 		 * For requests that only want to send the TLS header,
1279 		 * send a tunnelled packet as immediate data.
1280 		 */
1281 #ifdef VERBOSE_TRACES
1282 		CTR(KTR_CXGBE, "%s: %p header-only TLS record %u", __func__,
1283 		    tlsp, (u_int)m_tls->m_epg_seqno);
1284 #endif
1285 		/* This should always be the last TLS record in a chain. */
1286 		MPASS(last_wr);
1287 
1288 		txq->kern_tls_header++;
1289 
1290 		return (ktls_write_tunnel_packet(txq, dst, m,
1291 		    (char *)m_tls->m_epg_hdr + mtod(m_tls, vm_offset_t),
1292 		    m_tls->m_len, available, tcp_seqno, pidx, eh_type,
1293 		    last_wr));
1294 	}
1295 
1296 	/* Locate the TLS header. */
1297 	hdr = (void *)m_tls->m_epg_hdr;
1298 	rlen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length);
1299 
1300 #ifdef VERBOSE_TRACES
1301 	CTR(KTR_CXGBE, "%s: offset %lu len %u TCP seq %u TLS record %u",
1302 	    __func__, mtod(m_tls, vm_offset_t), m_tls->m_len, tcp_seqno,
1303 	    (u_int)m_tls->m_epg_seqno);
1304 #endif
1305 
1306 	/* Should this request make use of GHASH state? */
1307 	ghash_lcb = false;
1308 	last_ghash_frag = false;
1309 	request_ghash = false;
1310 	send_partial_ghash = false;
1311 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM &&
1312 	    tlsp->sc->tlst.partial_ghash && tlsp->sc->tlst.short_records) {
1313 		u_int trailer_len;
1314 
1315 		trailer_len = m_tls->m_epg_trllen;
1316 		if (tlsp->tls13)
1317 			trailer_len--;
1318 		KASSERT(trailer_len == AES_GMAC_HASH_LEN,
1319 		    ("invalid trailer length for AES-GCM"));
1320 
1321 		/* Is this the start of a TLS record? */
1322 		if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen) {
1323 			/*
1324 			 * If this is the very first TLS record or
1325 			 * if this is a newer TLS record, request a partial
1326 			 * hash, but not if we are going to send the whole
1327 			 * thing.
1328 			 */
1329 			if ((tlsp->ghash_tls_seqno == 0 ||
1330 			    tlsp->ghash_tls_seqno < m_tls->m_epg_seqno) &&
1331 			    tlen < rlen) {
1332 				/*
1333 				 * If we are only missing part or all
1334 				 * of the trailer, send a normal full
1335 				 * record but request the hash.
1336 				 * Otherwise, use partial GHASH mode.
1337 				 */
1338 				if (tlen >= (rlen - trailer_len))
1339 					ghash_lcb = true;
1340 				else
1341 					send_partial_ghash = true;
1342 				request_ghash = true;
1343 				tlsp->ghash_tls_seqno = m_tls->m_epg_seqno;
1344 			}
1345 		} else if (tlsp->ghash_tls_seqno == m_tls->m_epg_seqno &&
1346 		    tlsp->ghash_valid) {
1347 			/*
1348 			 * Compute the offset of the first AES block as
1349 			 * is done in ktls_is_short_record.
1350 			 */
1351 			if (rlen - tlen < trailer_len)
1352 				plen = rlen - (m_tls->m_epg_hdrlen +
1353 				    trailer_len);
1354 			else
1355 				plen = tlen - m_tls->m_epg_hdrlen;
1356 			offset = mtod(m_tls, vm_offset_t) - m_tls->m_epg_hdrlen;
1357 			if (offset >= plen)
1358 				offset = plen;
1359 			else
1360 				offset = rounddown2(offset, AES_BLOCK_LEN);
1361 			if (tlsp->ghash_offset == offset) {
1362 				if (offset == plen) {
1363 					/*
1364 					 * Send a partial trailer as a
1365 					 * tunnelled packet as
1366 					 * immediate data.
1367 					 */
1368 #ifdef VERBOSE_TRACES
1369 					CTR(KTR_CXGBE,
1370 					    "%s: %p trailer-only TLS record %u",
1371 					    __func__, tlsp,
1372 					    (u_int)m_tls->m_epg_seqno);
1373 #endif
1374 
1375 					txq->kern_tls_trailer++;
1376 
1377 					offset = mtod(m_tls, vm_offset_t) -
1378 					    (m_tls->m_epg_hdrlen + plen);
1379 					KASSERT(offset <= AES_GMAC_HASH_LEN,
1380 					    ("offset outside of trailer"));
1381 					return (ktls_write_tunnel_packet(txq,
1382 					    dst, m, tlsp->ghash + offset,
1383 					    m_tls->m_len, available, tcp_seqno,
1384 					    pidx, eh_type, last_wr));
1385 				}
1386 
1387 				/*
1388 				 * If this request sends the end of
1389 				 * the payload, it is the last
1390 				 * fragment.
1391 				 */
1392 				if (tlen >= (rlen - trailer_len)) {
1393 					last_ghash_frag = true;
1394 					ghash_lcb = true;
1395 				}
1396 
1397 				/*
1398 				 * Only use partial GCM mode (rather
1399 				 * than an AES-CTR short record) if
1400 				 * there is input auth data to pass to
1401 				 * the GHASH.  That is true so long as
1402 				 * there is at least one full block of
1403 				 * payload data, or if the remaining
1404 				 * payload data is the final partial
1405 				 * block.
1406 				 */
1407 				if (plen - offset >= GMAC_BLOCK_LEN ||
1408 				    last_ghash_frag) {
1409 					send_partial_ghash = true;
1410 
1411 					/*
1412 					 * If not sending the complete
1413 					 * end of the record, this is
1414 					 * a middle request so needs
1415 					 * to request an updated
1416 					 * partial hash.
1417 					 */
1418 					if (tlen < rlen)
1419 						request_ghash = true;
1420 				}
1421 			}
1422 		}
1423 	}
1424 
1425 	short_record = ktls_is_short_record(tlsp, m_tls, tlen, rlen,
1426 	    &header_len, &offset, &plen, &leading_waste, &trailing_waste,
1427 	    send_partial_ghash, request_ghash);
1428 
1429 	if (short_record) {
1430 #ifdef VERBOSE_TRACES
1431 		CTR(KTR_CXGBE,
1432 		    "%s: %p short TLS record %u hdr %u offs %u plen %u",
1433 		    __func__, tlsp, (u_int)m_tls->m_epg_seqno, header_len,
1434 		    offset, plen);
1435 		if (send_partial_ghash) {
1436 			if (header_len != 0)
1437 				CTR(KTR_CXGBE, "%s: %p sending initial GHASH",
1438 				    __func__, tlsp);
1439 			else
1440 				CTR(KTR_CXGBE, "%s: %p sending partial GHASH for offset %u%s",
1441 				    __func__, tlsp, tlsp->ghash_offset,
1442 				    last_ghash_frag ? ", last_frag" : "");
1443 		}
1444 #endif
1445 		KASSERT(send_partial_ghash || !request_ghash,
1446 		    ("requesting but not sending partial hash for short record"));
1447 	} else {
1448 		KASSERT(!send_partial_ghash,
1449 		    ("sending partial hash with full record"));
1450 	}
1451 
1452 	if (tlen < rlen && m_tls->m_next == NULL &&
1453 	    (tcp->th_flags & TH_FIN) != 0) {
1454 		txq->kern_tls_fin_short++;
1455 #ifdef INVARIANTS
1456 		panic("%s: FIN on short TLS record", __func__);
1457 #endif
1458 	}
1459 
1460 	/*
1461 	 * Use cached value for first record in chain if not using
1462 	 * partial GCM mode. ktls_parse_pkt() calculates nsegs based
1463 	 * on send_partial_ghash being false.
1464 	 */
1465 	if (m->m_next == m_tls && !send_partial_ghash)
1466 		nsegs = mbuf_nsegs(m);
1467 	else
1468 		nsegs = sglist_count_mbuf_epg(m_tls,
1469 		    m_tls->m_epg_hdrlen + offset, plen);
1470 
1471 	/* Determine if we need an LSO header. */
1472 	need_lso = (m_tls->m_len > mss);
1473 
1474 	/* Calculate the size of the TLS work request. */
1475 	inline_key = send_partial_ghash || tlsp->inline_key;
1476 	wr_len = ktls_base_wr_size(tlsp, inline_key);
1477 
1478 	if (send_partial_ghash) {
1479 		/* Inline key context includes partial hash in OPAD. */
1480 		wr_len += AES_GMAC_HASH_LEN;
1481 	}
1482 
1483 	/*
1484 	 * SplitMode is required if there is any thing we need to trim
1485 	 * from the crypto output, either at the front or end of the
1486 	 * record.  Note that short records might not need trimming.
1487 	 */
1488 	split_mode = leading_waste != 0 || trailing_waste != 0;
1489 	if (split_mode) {
1490 		/*
1491 		 * Partial records require a SplitMode
1492 		 * CPL_RX_PHYS_DSGL.
1493 		 */
1494 		wr_len += sizeof(struct cpl_t7_rx_phys_dsgl);
1495 	}
1496 
1497 	if (need_lso)
1498 		wr_len += sizeof(struct cpl_tx_pkt_lso_core);
1499 
1500 	imm_len = m->m_len + header_len;
1501 	if (short_record) {
1502 		imm_len += AES_BLOCK_LEN;
1503 		if (send_partial_ghash && header_len != 0)
1504 			imm_len += ktls_gcm_aad_len(tlsp);
1505 	} else if (tlsp->tls13)
1506 		imm_len += sizeof(uint64_t);
1507 	wr_len += roundup2(imm_len, 16);
1508 	wr_len += ktls_sgl_size(nsegs + (last_ghash_frag ? 1 : 0));
1509 	wr_len = roundup2(wr_len, 16);
1510 	txpkt_lens[0] = wr_len - sizeof(*wr);
1511 
1512 	if (request_ghash) {
1513 		/*
1514 		 * Requesting the hash entails a second ULP_TX_PKT
1515 		 * containing CPL_TX_TLS_ACK, CPL_FW6_PLD, and space
1516 		 * for the hash.
1517 		 */
1518 		txpkt_lens[1] = sizeof(struct ulp_txpkt);
1519 		txpkt_lens[1] += sizeof(struct ulptx_idata);
1520 		txpkt_lens[1] += sizeof(struct cpl_tx_tls_ack);
1521 		txpkt_lens[1] += sizeof(struct rss_header) +
1522 		    sizeof(struct cpl_fw6_pld);
1523 		txpkt_lens[1] += AES_GMAC_HASH_LEN;
1524 		wr_len += txpkt_lens[1];
1525 	} else
1526 		txpkt_lens[1] = 0;
1527 
1528 	ndesc = howmany(wr_len, EQ_ESIZE);
1529 	MPASS(ndesc <= available);
1530 
1531 	/*
1532 	 * Use the per-txq scratch pad if near the end of the ring to
1533 	 * simplify handling of wrap-around.
1534 	 */
1535 	using_scratch = (eq->sidx - pidx < ndesc);
1536 	if (using_scratch)
1537 		wr = (void *)txq->ss;
1538 	else
1539 		wr = dst;
1540 
1541 	/* FW_ULPTX_WR */
1542 	wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR));
1543 	wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA |
1544 	    V_FW_WR_LEN16(wr_len / 16));
1545 	wr->cookie = 0;
1546 
1547 	/* ULP_TXPKT */
1548 	txpkt = (void *)(wr + 1);
1549 	txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
1550 	    V_ULP_TXPKT_DATAMODIFY(0) |
1551 	    V_T7_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) |
1552 	    V_ULP_TXPKT_DEST(0) |
1553 	    V_ULP_TXPKT_CMDMORE(request_ghash ? 1 : 0) |
1554 	    V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1));
1555 	txpkt->len = htobe32(howmany(txpkt_lens[0], 16));
1556 
1557 	/* ULPTX_IDATA sub-command */
1558 	idata = (void *)(txpkt + 1);
1559 	idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1560 	    V_ULP_TX_SC_MORE(1));
1561 	idata->len = sizeof(struct cpl_tx_sec_pdu);
1562 
1563 	/*
1564 	 * After the key context comes CPL_RX_PHYS_DSGL, CPL_TX_*, and
1565 	 * immediate data containing headers.  When using an inline
1566 	 * key, these are counted as part of this ULPTX_IDATA.  When
1567 	 * reading the key from memory, these are part of a separate
1568 	 * ULPTX_IDATA.
1569 	 */
1570 	cpl_len = sizeof(struct cpl_tx_pkt_core);
1571 	if (need_lso)
1572 		cpl_len += sizeof(struct cpl_tx_pkt_lso_core);
1573 	if (split_mode)
1574 		cpl_len += sizeof(struct cpl_t7_rx_phys_dsgl);
1575 	post_key_context_len = cpl_len + imm_len;
1576 
1577 	if (inline_key) {
1578 		idata->len += tlsp->tx_key_info_size + post_key_context_len;
1579 		if (send_partial_ghash) {
1580 			/* Partial GHASH in key context. */
1581 			idata->len += AES_GMAC_HASH_LEN;
1582 		}
1583 	}
1584 	idata->len = htobe32(idata->len);
1585 
1586 	/* CPL_TX_SEC_PDU */
1587 	sec_pdu = (void *)(idata + 1);
1588 
1589 	/*
1590 	 * Packet headers are passed through unchanged by the crypto
1591 	 * engine by marking them as header data in SCMD0.
1592 	 */
1593 	crypto_hdr_len = m->m_len;
1594 
1595 	if (send_partial_ghash) {
1596 		/*
1597 		 * For short records using a partial hash, the TLS
1598 		 * header is counted as header data in SCMD0.  TLS AAD
1599 		 * is next (if AAD is present) followed by the AES-CTR
1600 		 * IV.  Last is the cipher region for the payload.
1601 		 */
1602 		if (header_len != 0) {
1603 			aad_start = 1;
1604 			aad_stop = ktls_gcm_aad_len(tlsp);
1605 		} else {
1606 			aad_start = 0;
1607 			aad_stop = 0;
1608 		}
1609 		iv_offset = aad_stop + 1;
1610 		cipher_start = iv_offset + AES_BLOCK_LEN;
1611 		cipher_stop = 0;
1612 		if (last_ghash_frag) {
1613 			auth_start = cipher_start;
1614 			auth_stop = AES_GMAC_HASH_LEN;
1615 			auth_insert = auth_stop;
1616 		} else if (plen < GMAC_BLOCK_LEN) {
1617 			/*
1618 			 * A request that sends part of the first AES
1619 			 * block will only have AAD.
1620 			 */
1621 			KASSERT(header_len != 0,
1622 			    ("%s: partial GHASH with no auth", __func__));
1623 			auth_start = 0;
1624 			auth_stop = 0;
1625 			auth_insert = 0;
1626 		} else {
1627 			auth_start = cipher_start;
1628 			auth_stop = plen % GMAC_BLOCK_LEN;
1629 			auth_insert = 0;
1630 		}
1631 
1632 		sec_pdu->pldlen = htobe32(aad_stop + AES_BLOCK_LEN + plen +
1633 		    (last_ghash_frag ? AES_GMAC_HASH_LEN : 0));
1634 
1635 		/*
1636 		 * For short records, the TLS header is treated as
1637 		 * header data.
1638 		 */
1639 		crypto_hdr_len += header_len;
1640 
1641 		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1642 		sec_pdu->seqno_numivs = tlsp->scmd0_partial.seqno_numivs;
1643 		sec_pdu->ivgen_hdrlen = tlsp->scmd0_partial.ivgen_hdrlen;
1644 		if (last_ghash_frag)
1645 			sec_pdu->ivgen_hdrlen |= V_SCMD_LAST_FRAG(1);
1646 		else
1647 			sec_pdu->ivgen_hdrlen |= V_SCMD_MORE_FRAGS(1);
1648 		sec_pdu->ivgen_hdrlen = htobe32(sec_pdu->ivgen_hdrlen |
1649 		    V_SCMD_HDR_LEN(crypto_hdr_len));
1650 
1651 		txq->kern_tls_partial_ghash++;
1652 	} else if (short_record) {
1653 		/*
1654 		 * For short records without a partial hash, the TLS
1655 		 * header is counted as header data in SCMD0 and the
1656 		 * IV is next, followed by a cipher region for the
1657 		 * payload.
1658 		 */
1659 		aad_start = 0;
1660 		aad_stop = 0;
1661 		iv_offset = 1;
1662 		auth_start = 0;
1663 		auth_stop = 0;
1664 		auth_insert = 0;
1665 		cipher_start = AES_BLOCK_LEN + 1;
1666 		cipher_stop = 0;
1667 
1668 		sec_pdu->pldlen = htobe32(AES_BLOCK_LEN + plen);
1669 
1670 		/*
1671 		 * For short records, the TLS header is treated as
1672 		 * header data.
1673 		 */
1674 		crypto_hdr_len += header_len;
1675 
1676 		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1677 		sec_pdu->seqno_numivs = tlsp->scmd0_short.seqno_numivs;
1678 		sec_pdu->ivgen_hdrlen = htobe32(
1679 		    tlsp->scmd0_short.ivgen_hdrlen |
1680 		    V_SCMD_HDR_LEN(crypto_hdr_len));
1681 
1682 		txq->kern_tls_short++;
1683 	} else {
1684 		/*
1685 		 * AAD is TLS header.  IV is after AAD for TLS < 1.3.
1686 		 * For TLS 1.3, a placeholder for the TLS sequence
1687 		 * number is provided as an IV before the AAD.  The
1688 		 * cipher region starts after the AAD and IV.  See
1689 		 * comments in ccr_authenc() and ccr_gmac() in
1690 		 * t4_crypto.c regarding cipher and auth start/stop
1691 		 * values.
1692 		 */
1693 		if (tlsp->tls13) {
1694 			iv_offset = 1;
1695 			aad_start = 1 + sizeof(uint64_t);
1696 			aad_stop = sizeof(uint64_t) + TLS_HEADER_LENGTH;
1697 			cipher_start = aad_stop + 1;
1698 		} else {
1699 			aad_start = 1;
1700 			aad_stop = TLS_HEADER_LENGTH;
1701 			iv_offset = TLS_HEADER_LENGTH + 1;
1702 			cipher_start = m_tls->m_epg_hdrlen + 1;
1703 		}
1704 		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
1705 			cipher_stop = 0;
1706 			auth_start = cipher_start;
1707 			auth_stop = 0;
1708 			auth_insert = 0;
1709 		} else {
1710 			cipher_stop = 0;
1711 			auth_start = cipher_start;
1712 			auth_stop = 0;
1713 			auth_insert = 0;
1714 		}
1715 
1716 		sec_pdu->pldlen = htobe32((tlsp->tls13 ? sizeof(uint64_t) : 0) +
1717 		    m_tls->m_epg_hdrlen + plen);
1718 
1719 		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1720 		sec_pdu->seqno_numivs = tlsp->scmd0.seqno_numivs;
1721 		sec_pdu->ivgen_hdrlen = htobe32(tlsp->scmd0.ivgen_hdrlen |
1722 		    V_SCMD_HDR_LEN(crypto_hdr_len));
1723 
1724 		if (split_mode)
1725 			txq->kern_tls_partial++;
1726 		else
1727 			txq->kern_tls_full++;
1728 	}
1729 	sec_pdu->op_ivinsrtofst = htobe32(
1730 	    V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) |
1731 	    V_CPL_TX_SEC_PDU_CPLLEN(cpl_len / 8) |
1732 	    V_CPL_TX_SEC_PDU_PLACEHOLDER(send_partial_ghash ? 1 : 0) |
1733 	    V_CPL_TX_SEC_PDU_IVINSRTOFST(iv_offset));
1734 	sec_pdu->aadstart_cipherstop_hi = htobe32(
1735 	    V_CPL_TX_SEC_PDU_AADSTART(aad_start) |
1736 	    V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) |
1737 	    V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) |
1738 	    V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4));
1739 	sec_pdu->cipherstop_lo_authinsert = htobe32(
1740 	    V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) |
1741 	    V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) |
1742 	    V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) |
1743 	    V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert));
1744 
1745 	if (send_partial_ghash && last_ghash_frag) {
1746 		uint64_t aad_len, cipher_len;
1747 
1748 		aad_len = ktls_gcm_aad_len(tlsp);
1749 		cipher_len = rlen - (m_tls->m_epg_hdrlen + AES_GMAC_HASH_LEN);
1750 		sec_pdu->scmd1 = htobe64(aad_len << 44 | cipher_len);
1751 	} else
1752 		sec_pdu->scmd1 = htobe64(m_tls->m_epg_seqno);
1753 
1754 	/* Key context */
1755 	out = (void *)(sec_pdu + 1);
1756 	if (inline_key) {
1757 		memcpy(out, &tlsp->keyctx, tlsp->tx_key_info_size);
1758 		if (send_partial_ghash) {
1759 			struct tls_keyctx *keyctx = (void *)out;
1760 
1761 			keyctx->u.txhdr.ctxlen++;
1762 			keyctx->u.txhdr.dualck_to_txvalid &= ~htobe16(
1763 			    V_KEY_CONTEXT_MK_SIZE(M_KEY_CONTEXT_MK_SIZE));
1764 			keyctx->u.txhdr.dualck_to_txvalid |= htobe16(
1765 			    F_KEY_CONTEXT_OPAD_PRESENT |
1766 			    V_KEY_CONTEXT_MK_SIZE(0));
1767 		}
1768 		out += tlsp->tx_key_info_size;
1769 		if (send_partial_ghash) {
1770 			if (header_len != 0)
1771 				memset(out, 0, AES_GMAC_HASH_LEN);
1772 			else
1773 				memcpy(out, tlsp->ghash, AES_GMAC_HASH_LEN);
1774 			out += AES_GMAC_HASH_LEN;
1775 		}
1776 	} else {
1777 		/* ULPTX_SC_MEMRD to read key context. */
1778 		memrd = (void *)out;
1779 		memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
1780 		    V_ULP_TX_SC_MORE(1) |
1781 		    V_ULPTX_LEN16(tlsp->tx_key_info_size >> 4));
1782 		memrd->addr = htobe32(tlsp->tx_key_addr >> 5);
1783 
1784 		/* ULPTX_IDATA for CPL_TX_* and headers. */
1785 		idata = (void *)(memrd + 1);
1786 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1787 		    V_ULP_TX_SC_MORE(1));
1788 		idata->len = htobe32(post_key_context_len);
1789 
1790 		out = (void *)(idata + 1);
1791 	}
1792 
1793 	/* CPL_RX_PHYS_DSGL */
1794 	if (split_mode) {
1795 		crypto_hdr_len = sizeof(struct cpl_tx_pkt_core);
1796 		if (need_lso)
1797 			crypto_hdr_len += sizeof(struct cpl_tx_pkt_lso_core);
1798 		crypto_hdr_len += m->m_len;
1799 		out = write_split_mode_rx_phys(out, m, m_tls, crypto_hdr_len,
1800 		    leading_waste, trailing_waste);
1801 	}
1802 
1803 	/* CPL_TX_PKT_LSO */
1804 	if (need_lso) {
1805 		out = write_lso_cpl(out, m, mss, eh_type, m->m_len +
1806 		    m_tls->m_len);
1807 		txq->tso_wrs++;
1808 	}
1809 
1810 	/* CPL_TX_PKT_XT */
1811 	tx_pkt = (void *)out;
1812 	tx_pkt->ctrl0 = txq->cpl_ctrl0;
1813 	tx_pkt->ctrl1 = htobe64(pkt_ctrl1(txq, m, eh_type));
1814 	tx_pkt->pack = 0;
1815 	tx_pkt->len = htobe16(m->m_len + m_tls->m_len);
1816 
1817 	/* Copy the packet headers. */
1818 	out = (void *)(tx_pkt + 1);
1819 	memcpy(out, mtod(m, char *), m->m_len);
1820 
1821 	/* Modify the packet length in the IP header. */
1822 	ip_len = m->m_len + m_tls->m_len - m->m_pkthdr.l2hlen;
1823 	if (eh_type == ETHERTYPE_IP) {
1824 		ip = (void *)(out + m->m_pkthdr.l2hlen);
1825 		be16enc(&ip->ip_len, ip_len);
1826 	} else {
1827 		ip6 = (void *)(out + m->m_pkthdr.l2hlen);
1828 		be16enc(&ip6->ip6_plen, ip_len - sizeof(*ip6));
1829 	}
1830 
1831 	/* Modify sequence number and flags in TCP header. */
1832 	newtcp = (void *)(out + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1833 	be32enc(&newtcp->th_seq, tcp_seqno);
1834 	if (!last_wr)
1835 		newtcp->th_flags = tcp->th_flags & ~(TH_PUSH | TH_FIN);
1836 	out += m->m_len;
1837 
1838 	/*
1839 	 * Insert placeholder for sequence number as IV for TLS 1.3
1840 	 * non-short records.
1841 	 */
1842 	if (tlsp->tls13 && !short_record) {
1843 		memset(out, 0, sizeof(uint64_t));
1844 		out += sizeof(uint64_t);
1845 	}
1846 
1847 	/* Populate the TLS header */
1848 	memcpy(out, m_tls->m_epg_hdr, header_len);
1849 	out += header_len;
1850 
1851 	/* TLS AAD for short records using a partial hash. */
1852 	if (send_partial_ghash && header_len != 0) {
1853 		if (tlsp->tls13) {
1854 			struct tls_aead_data_13 ad;
1855 
1856 			ad.type = hdr->tls_type;
1857 			ad.tls_vmajor = hdr->tls_vmajor;
1858 			ad.tls_vminor = hdr->tls_vminor;
1859 			ad.tls_length = hdr->tls_length;
1860 			memcpy(out, &ad, sizeof(ad));
1861 			out += sizeof(ad);
1862 		} else {
1863 			struct tls_aead_data ad;
1864 			uint16_t cipher_len;
1865 
1866 			cipher_len = rlen -
1867 			    (m_tls->m_epg_hdrlen + AES_GMAC_HASH_LEN);
1868 			ad.seq = htobe64(m_tls->m_epg_seqno);
1869 			ad.type = hdr->tls_type;
1870 			ad.tls_vmajor = hdr->tls_vmajor;
1871 			ad.tls_vminor = hdr->tls_vminor;
1872 			ad.tls_length = htons(cipher_len);
1873 			memcpy(out, &ad, sizeof(ad));
1874 			out += sizeof(ad);
1875 		}
1876 	}
1877 
1878 	/* AES IV for a short record. */
1879 	if (short_record) {
1880 		iv = out;
1881 		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
1882 			memcpy(iv, tlsp->keyctx.u.txhdr.txsalt, SALT_SIZE);
1883 			if (tlsp->tls13) {
1884 				uint64_t value;
1885 
1886 				value = be64dec(tlsp->keyctx.u.txhdr.txsalt +
1887 				    4);
1888 				value ^= m_tls->m_epg_seqno;
1889 				be64enc(iv + 4, value);
1890 			} else
1891 				memcpy(iv + 4, hdr + 1, 8);
1892 			if (send_partial_ghash)
1893 				be32enc(iv + 12, 1 + offset / AES_BLOCK_LEN);
1894 			else
1895 				be32enc(iv + 12, 2 + offset / AES_BLOCK_LEN);
1896 		} else
1897 			memcpy(iv, hdr + 1, AES_BLOCK_LEN);
1898 		out += AES_BLOCK_LEN;
1899 	}
1900 
1901 	if (imm_len % 16 != 0) {
1902 		if (imm_len % 8 != 0) {
1903 			/* Zero pad to an 8-byte boundary. */
1904 			memset(out, 0, 8 - (imm_len % 8));
1905 			out += 8 - (imm_len % 8);
1906 		}
1907 
1908 		/*
1909 		 * Insert a ULP_TX_SC_NOOP if needed so the SGL is
1910 		 * 16-byte aligned.
1911 		 */
1912 		if (imm_len % 16 <= 8) {
1913 			idata = (void *)out;
1914 			idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP) |
1915 			    V_ULP_TX_SC_MORE(1));
1916 			idata->len = htobe32(0);
1917 			out = (void *)(idata + 1);
1918 		}
1919 	}
1920 
1921 	/* SGL for record payload */
1922 	sglist_reset(txq->gl);
1923 	if (sglist_append_mbuf_epg(txq->gl, m_tls, m_tls->m_epg_hdrlen + offset,
1924 	    plen) != 0) {
1925 #ifdef INVARIANTS
1926 		panic("%s: failed to append sglist", __func__);
1927 #endif
1928 	}
1929 	if (last_ghash_frag) {
1930 		if (sglist_append_phys(txq->gl, zero_buffer_pa,
1931 		    AES_GMAC_HASH_LEN) != 0) {
1932 #ifdef INVARIANTS
1933 			panic("%s: failed to append sglist (2)", __func__);
1934 #endif
1935 		}
1936 	}
1937 	out = write_gl_to_buf(txq->gl, out);
1938 
1939 	if (request_ghash) {
1940 		/* ULP_TXPKT */
1941 		txpkt = (void *)out;
1942 		txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
1943 		    V_ULP_TXPKT_DATAMODIFY(0) |
1944 		    V_T7_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) |
1945 		    V_ULP_TXPKT_DEST(0) |
1946 		    V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1));
1947 		txpkt->len = htobe32(howmany(txpkt_lens[1], 16));
1948 
1949 		/* ULPTX_IDATA sub-command */
1950 		idata = (void *)(txpkt + 1);
1951 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1952 		    V_ULP_TX_SC_MORE(0));
1953 		idata->len = sizeof(struct cpl_tx_tls_ack);
1954 		idata->len += sizeof(struct rss_header) +
1955 		    sizeof(struct cpl_fw6_pld);
1956 		idata->len += AES_GMAC_HASH_LEN;
1957 		idata->len = htobe32(idata->len);
1958 		out = (void *)(idata + 1);
1959 
1960 		/* CPL_TX_TLS_ACK */
1961 		out = write_tx_tls_ack(out, tlsp->rx_chid, AES_GMAC_HASH_LEN,
1962 		    ghash_lcb);
1963 
1964 		/* CPL_FW6_PLD */
1965 		out = write_fw6_pld(out, tlsp->rx_chid, tlsp->rx_qid,
1966 		    AES_GMAC_HASH_LEN, (uintptr_t)tlsp | CPL_FW6_COOKIE_KTLS);
1967 
1968 		/* Space for partial hash. */
1969 		memset(out, 0, AES_GMAC_HASH_LEN);
1970 		out += AES_GMAC_HASH_LEN;
1971 
1972 		tlsp->ghash_pending = true;
1973 		tlsp->ghash_valid = false;
1974 		tlsp->ghash_lcb = ghash_lcb;
1975 		if (last_ghash_frag)
1976 			tlsp->ghash_offset = offset + plen;
1977 		else
1978 			tlsp->ghash_offset = rounddown2(offset + plen,
1979 			    GMAC_BLOCK_LEN);
1980 #ifdef VERBOSE_TRACES
1981 		CTR(KTR_CXGBE, "%s: %p requesting GHASH for offset %u",
1982 		    __func__, tlsp, tlsp->ghash_offset);
1983 #endif
1984 		m_snd_tag_ref(&tlsp->com);
1985 
1986 		txq->kern_tls_ghash_requested++;
1987 	}
1988 
1989 	if (using_scratch) {
1990 		out = dst;
1991 		copy_to_txd(eq, txq->ss, &out, wr_len);
1992 	}
1993 
1994 	txq->kern_tls_records++;
1995 	txq->kern_tls_octets += m_tls->m_len;
1996 	if (split_mode) {
1997 		txq->kern_tls_splitmode++;
1998 		txq->kern_tls_waste += leading_waste + trailing_waste;
1999 	}
2000 	if (need_lso)
2001 		txq->kern_tls_lso++;
2002 
2003 	txsd = &txq->sdesc[pidx];
2004 	if (last_wr)
2005 		txsd->m = m;
2006 	else
2007 		txsd->m = NULL;
2008 	txsd->desc_used = ndesc;
2009 
2010 	return (ndesc);
2011 }
2012 
2013 int
2014 t7_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m,
2015     u_int available)
2016 {
2017 	struct sge_eq *eq = &txq->eq;
2018 	struct tlspcb *tlsp;
2019 	struct tcphdr *tcp;
2020 	struct mbuf *m_tls;
2021 	struct ether_header *eh;
2022 	tcp_seq tcp_seqno;
2023 	u_int ndesc, pidx, totdesc;
2024 	uint16_t eh_type, mss;
2025 
2026 	TXQ_LOCK_ASSERT_OWNED(txq);
2027 	M_ASSERTPKTHDR(m);
2028 	MPASS(m->m_pkthdr.snd_tag != NULL);
2029 	tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
2030 
2031 	totdesc = 0;
2032 	eh = mtod(m, struct ether_header *);
2033 	eh_type = ntohs(eh->ether_type);
2034 	if (eh_type == ETHERTYPE_VLAN) {
2035 		struct ether_vlan_header *evh = (void *)eh;
2036 
2037 		eh_type = ntohs(evh->evl_proto);
2038 	}
2039 
2040 	tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen +
2041 	    m->m_pkthdr.l3hlen);
2042 	pidx = eq->pidx;
2043 
2044 	/* Determine MSS. */
2045 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2046 		mss = m->m_pkthdr.tso_segsz;
2047 		tlsp->prev_mss = mss;
2048 	} else if (tlsp->prev_mss != 0)
2049 		mss = tlsp->prev_mss;
2050 	else
2051 		mss = if_getmtu(tlsp->vi->ifp) -
2052 		    (m->m_pkthdr.l3hlen + m->m_pkthdr.l4hlen);
2053 
2054 	/* Fetch the starting TCP sequence number for this chain. */
2055 	tcp_seqno = ntohl(tcp->th_seq);
2056 #ifdef VERBOSE_TRACES
2057 	CTR(KTR_CXGBE, "%s: pkt len %d TCP seq %u", __func__, m->m_pkthdr.len,
2058 	    tcp_seqno);
2059 #endif
2060 	KASSERT(!tlsp->ghash_pending, ("%s: GHASH pending for send", __func__));
2061 
2062 	/*
2063 	 * Iterate over each TLS record constructing a work request
2064 	 * for that record.
2065 	 */
2066 	for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
2067 		MPASS(m_tls->m_flags & M_EXTPG);
2068 
2069 		ndesc = ktls_write_tls_wr(tlsp, txq, dst, m, tcp, m_tls,
2070 		    available - totdesc, tcp_seqno, pidx, eh_type, mss);
2071 		totdesc += ndesc;
2072 		IDXINCR(pidx, ndesc, eq->sidx);
2073 		dst = &eq->desc[pidx];
2074 
2075 		tcp_seqno += m_tls->m_len;
2076 	}
2077 
2078 	/*
2079 	 * Queue another packet if this was a GCM request that didn't
2080 	 * request a GHASH response.
2081 	 */
2082 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM && !tlsp->ghash_pending)
2083 		ktls_queue_next_packet(tlsp, true);
2084 
2085 	MPASS(totdesc <= available);
2086 	return (totdesc);
2087 }
2088 
2089 static void
2090 t7_tls_tag_free(struct m_snd_tag *mst)
2091 {
2092 	struct adapter *sc;
2093 	struct tlspcb *tlsp;
2094 
2095 	tlsp = mst_to_tls(mst);
2096 	sc = tlsp->sc;
2097 
2098 	CTR2(KTR_CXGBE, "%s: %p", __func__, tlsp);
2099 
2100 	if (tlsp->tx_key_addr >= 0)
2101 		t4_free_tls_keyid(sc, tlsp->tx_key_addr);
2102 
2103 	KASSERT(mbufq_len(&tlsp->pending_mbufs) == 0,
2104 	    ("%s: pending mbufs", __func__));
2105 
2106 	zfree(tlsp, M_CXGBE);
2107 }
2108 
2109 static int
2110 ktls_fw6_pld(struct sge_iq *iq, const struct rss_header *rss,
2111     struct mbuf *m)
2112 {
2113 	const struct cpl_fw6_pld *cpl;
2114 	struct tlspcb *tlsp;
2115 	const void *ghash;
2116 
2117 	if (m != NULL)
2118 		cpl = mtod(m, const void *);
2119 	else
2120 		cpl = (const void *)(rss + 1);
2121 
2122 	tlsp = (struct tlspcb *)(uintptr_t)CPL_FW6_PLD_COOKIE(cpl);
2123 	KASSERT(cpl->data[0] == 0, ("%s: error status returned", __func__));
2124 
2125 	TXQ_LOCK(tlsp->txq);
2126 #ifdef VERBOSE_TRACES
2127 	CTR(KTR_CXGBE, "%s: %p received GHASH for offset %u%s", __func__, tlsp,
2128 	    tlsp->ghash_offset, tlsp->ghash_lcb ? " in LCB" : "");
2129 #endif
2130 	if (tlsp->ghash_lcb)
2131 		ghash = &cpl->data[2];
2132 	else
2133 		ghash = cpl + 1;
2134 	memcpy(tlsp->ghash, ghash, AES_GMAC_HASH_LEN);
2135 	tlsp->ghash_valid = true;
2136 	tlsp->ghash_pending = false;
2137 	tlsp->txq->kern_tls_ghash_received++;
2138 
2139 	ktls_queue_next_packet(tlsp, false);
2140 	TXQ_UNLOCK(tlsp->txq);
2141 
2142 	m_snd_tag_rele(&tlsp->com);
2143 	m_freem(m);
2144 	return (0);
2145 }
2146 
2147 void
2148 t7_ktls_modload(void)
2149 {
2150 	zero_buffer = malloc_aligned(AES_GMAC_HASH_LEN, AES_GMAC_HASH_LEN,
2151 	    M_CXGBE, M_ZERO | M_WAITOK);
2152 	zero_buffer_pa = vtophys(zero_buffer);
2153 	t4_register_shared_cpl_handler(CPL_FW6_PLD, ktls_fw6_pld,
2154 	    CPL_FW6_COOKIE_KTLS);
2155 }
2156 
2157 void
2158 t7_ktls_modunload(void)
2159 {
2160 	free(zero_buffer, M_CXGBE);
2161 	t4_register_shared_cpl_handler(CPL_FW6_PLD, NULL, CPL_FW6_COOKIE_KTLS);
2162 }
2163 
2164 #else
2165 
2166 int
2167 t7_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
2168     struct m_snd_tag **pt)
2169 {
2170 	return (ENXIO);
2171 }
2172 
2173 int
2174 t7_ktls_parse_pkt(struct mbuf *m)
2175 {
2176 	return (EINVAL);
2177 }
2178 
2179 int
2180 t7_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m,
2181     u_int available)
2182 {
2183 	panic("can't happen");
2184 }
2185 
2186 void
2187 t7_ktls_modload(void)
2188 {
2189 }
2190 
2191 void
2192 t7_ktls_modunload(void)
2193 {
2194 }
2195 
2196 #endif
2197