xref: /freebsd/sys/dev/cxgbe/crypto/t7_kern_tls.c (revision a6ae6090bb3dc14eda750aa53650fccf4c0bf818)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2025 Chelsio Communications
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_kern_tls.h"
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/ktr.h>
38 #include <sys/ktls.h>
39 #include <sys/sglist.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42 #include <sys/sockbuf.h>
43 #include <netinet/in.h>
44 #include <netinet/in_pcb.h>
45 #include <netinet/ip.h>
46 #include <netinet/ip6.h>
47 #include <netinet/tcp_var.h>
48 #include <opencrypto/cryptodev.h>
49 #include <opencrypto/xform.h>
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 
53 #include "common/common.h"
54 #include "common/t4_regs.h"
55 #include "common/t4_regs_values.h"
56 #include "common/t4_tcb.h"
57 #include "t4_l2t.h"
58 #include "t4_clip.h"
59 #include "t4_mp_ring.h"
60 #include "crypto/t4_crypto.h"
61 
62 #if defined(INET) || defined(INET6)
63 
64 #define TLS_HEADER_LENGTH		5
65 
66 struct tls_scmd {
67 	__be32 seqno_numivs;
68 	__be32 ivgen_hdrlen;
69 };
70 
71 struct tlspcb {
72 	struct m_snd_tag com;
73 	struct vi_info *vi;	/* virtual interface */
74 	struct adapter *sc;
75 	struct sge_txq *txq;
76 
77 	int tx_key_addr;
78 	bool inline_key;
79 	bool tls13;
80 	unsigned char enc_mode;
81 
82 	struct tls_scmd scmd0;
83 	struct tls_scmd scmd0_partial;
84 	struct tls_scmd scmd0_short;
85 
86 	unsigned int tx_key_info_size;
87 
88 	uint16_t prev_mss;
89 
90 	/* Fields used for GCM records using GHASH state. */
91 	uint16_t ghash_offset;
92 	uint64_t ghash_tls_seqno;
93 	char ghash[AES_GMAC_HASH_LEN];
94 	bool ghash_valid;
95 	bool ghash_pending;
96 	bool ghash_lcb;
97 	bool queue_mbufs;
98 	uint8_t rx_chid;
99 	uint16_t rx_qid;
100 	struct mbufq pending_mbufs;
101 
102 	/*
103 	 * Only used outside of setup and teardown when using inline
104 	 * keys or for partial GCM mode.
105 	 */
106 	struct tls_keyctx keyctx;
107 };
108 
109 static void t7_tls_tag_free(struct m_snd_tag *mst);
110 static int ktls_setup_keys(struct tlspcb *tlsp,
111     const struct ktls_session *tls, struct sge_txq *txq);
112 
113 static void *zero_buffer;
114 static vm_paddr_t zero_buffer_pa;
115 
116 static const struct if_snd_tag_sw t7_tls_tag_sw = {
117 	.snd_tag_free = t7_tls_tag_free,
118 	.type = IF_SND_TAG_TYPE_TLS
119 };
120 
121 static inline struct tlspcb *
mst_to_tls(struct m_snd_tag * t)122 mst_to_tls(struct m_snd_tag *t)
123 {
124 	return (__containerof(t, struct tlspcb, com));
125 }
126 
127 static struct tlspcb *
alloc_tlspcb(struct ifnet * ifp,struct vi_info * vi,int flags)128 alloc_tlspcb(struct ifnet *ifp, struct vi_info *vi, int flags)
129 {
130 	struct port_info *pi = vi->pi;
131 	struct adapter *sc = pi->adapter;
132 	struct tlspcb *tlsp;
133 
134 	tlsp = malloc(sizeof(*tlsp), M_CXGBE, M_ZERO | flags);
135 	if (tlsp == NULL)
136 		return (NULL);
137 
138 	m_snd_tag_init(&tlsp->com, ifp, &t7_tls_tag_sw);
139 	tlsp->vi = vi;
140 	tlsp->sc = sc;
141 	tlsp->tx_key_addr = -1;
142 	tlsp->ghash_offset = -1;
143 	tlsp->rx_chid = pi->rx_chan;
144 	tlsp->rx_qid = -1;
145 	tlsp->txq = NULL;
146 	mbufq_init(&tlsp->pending_mbufs, INT_MAX);
147 
148 	return (tlsp);
149 }
150 
151 int
t7_tls_tag_alloc(struct ifnet * ifp,union if_snd_tag_alloc_params * params,struct m_snd_tag ** pt)152 t7_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
153     struct m_snd_tag **pt)
154 {
155 	const struct ktls_session *tls;
156 	struct tlspcb *tlsp;
157 	struct adapter *sc;
158 	struct vi_info *vi;
159 	struct inpcb *inp;
160 	struct sge_txq *txq;
161 	int error, iv_size, keyid, mac_first, qidx;
162 	uint32_t flowid;
163 
164 	tls = params->tls.tls;
165 
166 	/* TLS 1.1 through TLS 1.3 are currently supported. */
167 	if (tls->params.tls_vmajor != TLS_MAJOR_VER_ONE ||
168 	    tls->params.tls_vminor < TLS_MINOR_VER_ONE ||
169 	    tls->params.tls_vminor > TLS_MINOR_VER_THREE)
170 		return (EPROTONOSUPPORT);
171 
172 	/* Sanity check values in *tls. */
173 	switch (tls->params.cipher_algorithm) {
174 	case CRYPTO_AES_CBC:
175 		/* XXX: Explicitly ignore any provided IV. */
176 		switch (tls->params.cipher_key_len) {
177 		case 128 / 8:
178 		case 192 / 8:
179 		case 256 / 8:
180 			break;
181 		default:
182 			return (EINVAL);
183 		}
184 		switch (tls->params.auth_algorithm) {
185 		case CRYPTO_SHA1_HMAC:
186 		case CRYPTO_SHA2_256_HMAC:
187 		case CRYPTO_SHA2_384_HMAC:
188 			break;
189 		default:
190 			return (EPROTONOSUPPORT);
191 		}
192 		iv_size = AES_BLOCK_LEN;
193 		mac_first = 1;
194 		break;
195 	case CRYPTO_AES_NIST_GCM_16:
196 		switch (tls->params.cipher_key_len) {
197 		case 128 / 8:
198 		case 192 / 8:
199 		case 256 / 8:
200 			break;
201 		default:
202 			return (EINVAL);
203 		}
204 
205 		/*
206 		 * The IV size for TLS 1.2 is the explicit IV in the
207 		 * record header.  For TLS 1.3 it is the size of the
208 		 * sequence number.
209 		 */
210 		iv_size = 8;
211 		mac_first = 0;
212 		break;
213 	default:
214 		return (EPROTONOSUPPORT);
215 	}
216 
217 	vi = if_getsoftc(ifp);
218 	sc = vi->adapter;
219 
220 	tlsp = alloc_tlspcb(ifp, vi, M_WAITOK);
221 
222 	/*
223 	 * Pointers with the low bit set in the pointer can't
224 	 * be stored as the cookie in the CPL_FW6_PLD reply.
225 	 */
226 	if (((uintptr_t)tlsp & CPL_FW6_COOKIE_MASK) != 0) {
227 		error = EINVAL;
228 		goto failed;
229 	}
230 
231 	tlsp->tls13 = tls->params.tls_vminor == TLS_MINOR_VER_THREE;
232 
233 	if (sc->tlst.inline_keys)
234 		keyid = -1;
235 	else
236 		keyid = t4_alloc_tls_keyid(sc);
237 	if (keyid < 0) {
238 		CTR(KTR_CXGBE, "%s: %p using immediate key ctx", __func__,
239 		    tlsp);
240 		tlsp->inline_key = true;
241 	} else {
242 		tlsp->tx_key_addr = keyid;
243 		CTR(KTR_CXGBE, "%s: %p allocated TX key addr %#x", __func__,
244 		    tlsp, tlsp->tx_key_addr);
245 	}
246 
247 	inp = params->tls.inp;
248 	INP_RLOCK(inp);
249 	if (inp->inp_flags & INP_DROPPED) {
250 		INP_RUNLOCK(inp);
251 		error = ECONNRESET;
252 		goto failed;
253 	}
254 
255 	if (inp->inp_flowtype != M_HASHTYPE_NONE)
256 		flowid = inp->inp_flowid;
257 	else
258 		flowid = arc4random();
259 	qidx = flowid % vi->nrxq + vi->first_rxq;
260 	tlsp->rx_qid = sc->sge.rxq[qidx].iq.abs_id;
261 	qidx = (flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq +
262 	    vi->first_txq;
263 	tlsp->txq = txq = &sc->sge.txq[qidx];
264 	INP_RUNLOCK(inp);
265 
266 	error = ktls_setup_keys(tlsp, tls, txq);
267 	if (error)
268 		goto failed;
269 
270 	tlsp->enc_mode = t4_tls_cipher_mode(tls);
271 	tlsp->tx_key_info_size = t4_tls_key_info_size(tls);
272 
273 	/* The SCMD fields used when encrypting a full TLS record. */
274 	if (tlsp->tls13)
275 		tlsp->scmd0.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0);
276 	else
277 		tlsp->scmd0.seqno_numivs = V_SCMD_SEQ_NO_CTRL(3);
278 	tlsp->scmd0.seqno_numivs |=
279 	    V_SCMD_PROTO_VERSION(t4_tls_proto_ver(tls)) |
280 	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
281 	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
282 	    V_SCMD_CIPH_MODE(tlsp->enc_mode) |
283 	    V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
284 	    V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
285 	    V_SCMD_IV_SIZE(iv_size / 2) | V_SCMD_NUM_IVS(1);
286 	tlsp->scmd0.seqno_numivs = htobe32(tlsp->scmd0.seqno_numivs);
287 
288 	tlsp->scmd0.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
289 	    V_SCMD_TLS_FRAG_ENABLE(0);
290 	if (tlsp->inline_key)
291 		tlsp->scmd0.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
292 
293 	/*
294 	 * The SCMD fields used when encrypting a short TLS record
295 	 * (no trailer and possibly a truncated payload).
296 	 */
297 	tlsp->scmd0_short.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) |
298 	    V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) |
299 	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
300 	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
301 	    V_SCMD_AUTH_MODE(SCMD_AUTH_MODE_NOP) |
302 	    V_SCMD_HMAC_CTRL(SCMD_HMAC_CTRL_NOP) |
303 	    V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(0);
304 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
305 		tlsp->scmd0_short.seqno_numivs |=
306 		    V_SCMD_CIPH_MODE(SCMD_CIPH_MODE_AES_CTR);
307 	else
308 		tlsp->scmd0_short.seqno_numivs |=
309 		    V_SCMD_CIPH_MODE(tlsp->enc_mode);
310 	tlsp->scmd0_short.seqno_numivs =
311 	    htobe32(tlsp->scmd0_short.seqno_numivs);
312 
313 	tlsp->scmd0_short.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
314 	    V_SCMD_TLS_FRAG_ENABLE(0) | V_SCMD_AADIVDROP(1);
315 	if (tlsp->inline_key)
316 		tlsp->scmd0_short.ivgen_hdrlen |= V_SCMD_KEY_CTX_INLINE(1);
317 
318 	/*
319 	 * The SCMD fields used when encrypting a short TLS record
320 	 * using a partial GHASH.
321 	 */
322 	tlsp->scmd0_partial.seqno_numivs = V_SCMD_SEQ_NO_CTRL(0) |
323 	    V_SCMD_PROTO_VERSION(SCMD_PROTO_VERSION_GENERIC) |
324 	    V_SCMD_ENC_DEC_CTRL(SCMD_ENCDECCTRL_ENCRYPT) |
325 	    V_SCMD_CIPH_AUTH_SEQ_CTRL((mac_first == 0)) |
326 	    V_SCMD_CIPH_MODE(tlsp->enc_mode) |
327 	    V_SCMD_AUTH_MODE(t4_tls_auth_mode(tls)) |
328 	    V_SCMD_HMAC_CTRL(t4_tls_hmac_ctrl(tls)) |
329 	    V_SCMD_IV_SIZE(AES_BLOCK_LEN / 2) | V_SCMD_NUM_IVS(1);
330 	tlsp->scmd0_partial.seqno_numivs =
331 	    htobe32(tlsp->scmd0_partial.seqno_numivs);
332 
333 	tlsp->scmd0_partial.ivgen_hdrlen = V_SCMD_IV_GEN_CTRL(0) |
334 	    V_SCMD_TLS_FRAG_ENABLE(0) | V_SCMD_AADIVDROP(1) |
335 	    V_SCMD_KEY_CTX_INLINE(1);
336 
337 	TXQ_LOCK(txq);
338 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM)
339 		txq->kern_tls_gcm++;
340 	else
341 		txq->kern_tls_cbc++;
342 	TXQ_UNLOCK(txq);
343 	*pt = &tlsp->com;
344 	return (0);
345 
346 failed:
347 	m_snd_tag_rele(&tlsp->com);
348 	return (error);
349 }
350 
351 static int
ktls_setup_keys(struct tlspcb * tlsp,const struct ktls_session * tls,struct sge_txq * txq)352 ktls_setup_keys(struct tlspcb *tlsp, const struct ktls_session *tls,
353     struct sge_txq *txq)
354 {
355 	struct tls_key_req *kwr;
356 	struct tls_keyctx *kctx;
357 	void *items[1];
358 	struct mbuf *m;
359 	int error;
360 
361 	/*
362 	 * Store the salt and keys in the key context.  For
363 	 * connections with an inline key, this key context is passed
364 	 * as immediate data in each work request.  For connections
365 	 * storing the key in DDR, a work request is used to store a
366 	 * copy of the key context in DDR.
367 	 */
368 	t4_tls_key_ctx(tls, KTLS_TX, &tlsp->keyctx);
369 	if (tlsp->inline_key)
370 		return (0);
371 
372 	/* Populate key work request. */
373         m = alloc_wr_mbuf(TLS_KEY_WR_SZ, M_NOWAIT);
374 	if (m == NULL) {
375 		CTR(KTR_CXGBE, "%s: %p failed to alloc WR mbuf", __func__,
376 		    tlsp);
377 		return (ENOMEM);
378 	}
379 	m->m_pkthdr.snd_tag = m_snd_tag_ref(&tlsp->com);
380 	m->m_pkthdr.csum_flags |= CSUM_SND_TAG;
381 	kwr = mtod(m, void *);
382 	memset(kwr, 0, TLS_KEY_WR_SZ);
383 
384 	t4_write_tlskey_wr(tls, KTLS_TX, 0, 0, tlsp->tx_key_addr, kwr);
385 	kctx = (struct tls_keyctx *)(kwr + 1);
386 	memcpy(kctx, &tlsp->keyctx, sizeof(*kctx));
387 
388 	/*
389 	 * Place the key work request in the transmit queue.  It
390 	 * should be sent to the NIC before any TLS packets using this
391 	 * session.
392 	 */
393 	items[0] = m;
394 	error = mp_ring_enqueue(txq->r, items, 1, 1);
395 	if (error)
396 		m_free(m);
397 	else
398 		CTR(KTR_CXGBE, "%s: %p sent key WR", __func__, tlsp);
399 	return (error);
400 }
401 
402 static u_int
ktls_base_wr_size(struct tlspcb * tlsp,bool inline_key)403 ktls_base_wr_size(struct tlspcb *tlsp, bool inline_key)
404 {
405 	u_int wr_len;
406 
407 	wr_len = sizeof(struct fw_ulptx_wr);	// 16
408 	wr_len += sizeof(struct ulp_txpkt);	// 8
409 	wr_len += sizeof(struct ulptx_idata);	// 8
410 	wr_len += sizeof(struct cpl_tx_sec_pdu);// 32
411 	if (inline_key)
412 		wr_len += tlsp->tx_key_info_size;
413 	else {
414 		wr_len += sizeof(struct ulptx_sc_memrd);// 8
415 		wr_len += sizeof(struct ulptx_idata);	// 8
416 	}
417 	/* SplitMode CPL_RX_PHYS_DSGL here if needed. */
418 	/* CPL_TX_*_LSO here if needed. */
419 	wr_len += sizeof(struct cpl_tx_pkt_core);// 16
420 	return (wr_len);
421 }
422 
423 static u_int
ktls_sgl_size(u_int nsegs)424 ktls_sgl_size(u_int nsegs)
425 {
426 	u_int wr_len;
427 
428 	/* First segment is part of ulptx_sgl. */
429 	nsegs--;
430 
431 	wr_len = sizeof(struct ulptx_sgl);
432 	wr_len += 8 * ((3 * nsegs) / 2 + (nsegs & 1));
433 	return (wr_len);
434 }
435 
436 /*
437  * A request that doesn't need to generate the TLS trailer is a short
438  * record.  For these requests, part of the TLS record payload is
439  * encrypted without invoking the MAC.
440  *
441  * Returns true if this record should be sent as a short record.  In
442  * either case, the remaining outputs describe the how much of the
443  * TLS record to send as input to the crypto block and the amount of
444  * crypto output to trim via SplitMode:
445  *
446  * *header_len - Number of bytes of TLS header to pass as immediate
447  *               data
448  *
449  * *offset - Start offset of TLS record payload to pass as DSGL data
450  *
451  * *plen - Length of TLS record payload to pass as DSGL data
452  *
453  * *leading_waste - amount of non-packet-header bytes to drop at the
454  *                  start of the crypto output
455  *
456  * *trailing_waste - amount of crypto output to drop from the end
457  */
458 static bool
ktls_is_short_record(struct tlspcb * tlsp,struct mbuf * m_tls,u_int tlen,u_int rlen,u_int * header_len,u_int * offset,u_int * plen,u_int * leading_waste,u_int * trailing_waste,bool send_partial_ghash,bool request_ghash)459 ktls_is_short_record(struct tlspcb *tlsp, struct mbuf *m_tls, u_int tlen,
460     u_int rlen, u_int *header_len, u_int *offset, u_int *plen,
461     u_int *leading_waste, u_int *trailing_waste, bool send_partial_ghash,
462     bool request_ghash)
463 {
464 	u_int new_tlen, trailer_len;
465 
466 	MPASS(tlen > m_tls->m_epg_hdrlen);
467 
468 	/*
469 	 * For TLS 1.3 treat the inner record type stored as the first
470 	 * byte of the trailer as part of the payload rather than part
471 	 * of the trailer.
472 	 */
473 	trailer_len = m_tls->m_epg_trllen;
474 	if (tlsp->tls13)
475 		trailer_len--;
476 
477 	/*
478 	 * Default to sending the full record as input to the crypto
479 	 * engine and relying on SplitMode to drop any waste.
480 	 */
481 	*header_len = m_tls->m_epg_hdrlen;
482 	*offset = 0;
483 	*plen = rlen - (m_tls->m_epg_hdrlen + trailer_len);
484 	*leading_waste = mtod(m_tls, vm_offset_t);
485 	*trailing_waste = rlen - tlen;
486 	if (!tlsp->sc->tlst.short_records)
487 		return (false);
488 
489 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_CBC) {
490 		/*
491 		 * For AES-CBC we have to send input from the start of
492 		 * the TLS record payload that is a multiple of the
493 		 * block size.  new_tlen rounds up tlen to the end of
494 		 * the containing AES block.  If this last block
495 		 * overlaps with the trailer, send the full record to
496 		 * generate the MAC.
497 		 */
498 		new_tlen = TLS_HEADER_LENGTH +
499 		    roundup2(tlen - TLS_HEADER_LENGTH, AES_BLOCK_LEN);
500 		if (rlen - new_tlen < trailer_len)
501 			return (false);
502 
503 		*trailing_waste = new_tlen - tlen;
504 		*plen = new_tlen - m_tls->m_epg_hdrlen;
505 	} else {
506 		if (rlen - tlen < trailer_len ||
507 		    (rlen - tlen == trailer_len && request_ghash)) {
508 			/*
509 			 * For AES-GCM we have to send the full record
510 			 * if the end overlaps with the trailer and a
511 			 * partial GHASH isn't being sent.
512 			 */
513 			if (!send_partial_ghash)
514 				return (false);
515 
516 			/*
517 			 * Will need to treat any excess trailer bytes as
518 			 * trailing waste.  *trailing_waste is already
519 			 * correct.
520 			 */
521 		} else {
522 			/*
523 			 * We can use AES-CTR or AES-GCM in partial GHASH
524 			 * mode to encrypt a partial PDU.
525 			 *
526 			 * The last block can be partially encrypted
527 			 * without any trailing waste.
528 			 */
529 			*trailing_waste = 0;
530 			*plen = tlen - m_tls->m_epg_hdrlen;
531 		}
532 
533 		/*
534 		 * If this request starts at the first byte of the
535 		 * payload (so the previous request sent the full TLS
536 		 * header as a tunnel packet) and a partial GHASH is
537 		 * being requested, the full TLS header must be sent
538 		 * as input for the GHASH.
539 		 */
540 		if (mtod(m_tls, vm_offset_t) == m_tls->m_epg_hdrlen &&
541 		    request_ghash)
542 			return (true);
543 
544 		/*
545 		 * In addition, we can minimize leading waste by
546 		 * starting encryption at the start of the closest AES
547 		 * block.
548 		 */
549 		if (mtod(m_tls, vm_offset_t) >= m_tls->m_epg_hdrlen) {
550 			*header_len = 0;
551 			*offset = mtod(m_tls, vm_offset_t) -
552 			    m_tls->m_epg_hdrlen;
553 			if (*offset >= *plen)
554 				*offset = *plen;
555 			else
556 				*offset = rounddown2(*offset, AES_BLOCK_LEN);
557 
558 			/*
559 			 * If the request is just bytes from the trailer,
560 			 * trim the offset to the end of the payload.
561 			 */
562 			*offset = min(*offset, *plen);
563 			*plen -= *offset;
564 			*leading_waste -= (m_tls->m_epg_hdrlen + *offset);
565 		}
566 	}
567 	return (true);
568 }
569 
570 /* Size of the AES-GCM TLS AAD for a given connection. */
571 static int
ktls_gcm_aad_len(struct tlspcb * tlsp)572 ktls_gcm_aad_len(struct tlspcb *tlsp)
573 {
574 	return (tlsp->tls13 ? sizeof(struct tls_aead_data_13) :
575 	    sizeof(struct tls_aead_data));
576 }
577 
578 static int
ktls_wr_len(struct tlspcb * tlsp,struct mbuf * m,struct mbuf * m_tls,int * nsegsp)579 ktls_wr_len(struct tlspcb *tlsp, struct mbuf *m, struct mbuf *m_tls,
580     int *nsegsp)
581 {
582 	const struct tls_record_layer *hdr;
583 	u_int header_len, imm_len, offset, plen, rlen, tlen, wr_len;
584 	u_int leading_waste, trailing_waste;
585 	bool inline_key, last_ghash_frag, request_ghash, send_partial_ghash;
586 	bool short_record;
587 
588 	M_ASSERTEXTPG(m_tls);
589 
590 	/*
591 	 * The relative offset of the last byte to send from the TLS
592 	 * record.
593 	 */
594 	tlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
595 	if (tlen <= m_tls->m_epg_hdrlen) {
596 		/*
597 		 * For requests that only want to send the TLS header,
598 		 * send a tunnelled packet as immediate data.
599 		 */
600 		wr_len = sizeof(struct fw_eth_tx_pkt_wr) +
601 		    sizeof(struct cpl_tx_pkt_core) +
602 		    roundup2(m->m_len + m_tls->m_len, 16);
603 		if (wr_len > SGE_MAX_WR_LEN) {
604 			CTR(KTR_CXGBE,
605 		    "%s: %p TLS header-only packet too long (len %d)",
606 			    __func__, tlsp, m->m_len + m_tls->m_len);
607 		}
608 
609 		/* This should always be the last TLS record in a chain. */
610 		MPASS(m_tls->m_next == NULL);
611 		*nsegsp = 0;
612 		return (wr_len);
613 	}
614 
615 	hdr = (void *)m_tls->m_epg_hdr;
616 	rlen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length);
617 
618 	/*
619 	 * See if this request might make use of GHASH state.  This
620 	 * errs on the side of over-budgeting the WR size.
621 	 */
622 	last_ghash_frag = false;
623 	request_ghash = false;
624 	send_partial_ghash = false;
625 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM &&
626 	    tlsp->sc->tlst.partial_ghash && tlsp->sc->tlst.short_records) {
627 		u_int trailer_len;
628 
629 		trailer_len = m_tls->m_epg_trllen;
630 		if (tlsp->tls13)
631 			trailer_len--;
632 		KASSERT(trailer_len == AES_GMAC_HASH_LEN,
633 		    ("invalid trailer length for AES-GCM"));
634 
635 		/* Is this the start of a TLS record? */
636 		if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen) {
637 			/*
638 			 * Might use partial GHASH if this doesn't
639 			 * send the full record.
640 			 */
641 			if (tlen < rlen) {
642 				if (tlen < (rlen - trailer_len))
643 					send_partial_ghash = true;
644 				request_ghash = true;
645 			}
646 		} else {
647 			send_partial_ghash = true;
648 			if (tlen < rlen)
649 				request_ghash = true;
650 			if (tlen >= (rlen - trailer_len))
651 				last_ghash_frag = true;
652 		}
653 	}
654 
655 	/*
656 	 * Assume not sending partial GHASH for this call to get the
657 	 * larger size.
658 	 */
659 	short_record = ktls_is_short_record(tlsp, m_tls, tlen, rlen,
660 	    &header_len, &offset, &plen, &leading_waste, &trailing_waste,
661 	    false, request_ghash);
662 
663 	inline_key = send_partial_ghash || tlsp->inline_key;
664 
665 	/* Calculate the size of the work request. */
666 	wr_len = ktls_base_wr_size(tlsp, inline_key);
667 
668 	if (send_partial_ghash)
669 		wr_len += AES_GMAC_HASH_LEN;
670 
671 	if (leading_waste != 0 || trailing_waste != 0) {
672 		/*
673 		 * Partial records might require a SplitMode
674 		 * CPL_RX_PHYS_DSGL.
675 		 */
676 		wr_len += sizeof(struct cpl_t7_rx_phys_dsgl);
677 	}
678 
679 	/* Budget for an LSO header even if we don't use it. */
680 	wr_len += sizeof(struct cpl_tx_pkt_lso_core);
681 
682 	/*
683 	 * Headers (including the TLS header) are always sent as
684 	 * immediate data.  Short records include a raw AES IV as
685 	 * immediate data.  TLS 1.3 non-short records include a
686 	 * placeholder for the sequence number as immediate data.
687 	 * Short records using a partial hash may also need to send
688 	 * TLS AAD.  If a partial hash might be sent, assume a short
689 	 * record to get the larger size.
690 	 */
691 	imm_len = m->m_len + header_len;
692 	if (short_record || send_partial_ghash) {
693 		imm_len += AES_BLOCK_LEN;
694 		if (send_partial_ghash && header_len != 0)
695 			imm_len += ktls_gcm_aad_len(tlsp);
696 	} else if (tlsp->tls13)
697 		imm_len += sizeof(uint64_t);
698 	wr_len += roundup2(imm_len, 16);
699 
700 	/*
701 	 * TLS record payload via DSGL.  For partial GCM mode we
702 	 * might need an extra SG entry for a placeholder.
703 	 */
704 	*nsegsp = sglist_count_mbuf_epg(m_tls, m_tls->m_epg_hdrlen + offset,
705 	    plen);
706 	wr_len += ktls_sgl_size(*nsegsp + (last_ghash_frag ? 1 : 0));
707 
708 	if (request_ghash) {
709 		/* AES-GCM records might return a partial hash. */
710 		wr_len += sizeof(struct ulp_txpkt);
711 		wr_len += sizeof(struct ulptx_idata);
712 		wr_len += sizeof(struct cpl_tx_tls_ack);
713 		wr_len += sizeof(struct rss_header) +
714 		    sizeof(struct cpl_fw6_pld);
715 		wr_len += AES_GMAC_HASH_LEN;
716 	}
717 
718 	wr_len = roundup2(wr_len, 16);
719 	return (wr_len);
720 }
721 
722 /* Queue the next pending packet. */
723 static void
ktls_queue_next_packet(struct tlspcb * tlsp,bool enqueue_only)724 ktls_queue_next_packet(struct tlspcb *tlsp, bool enqueue_only)
725 {
726 #ifdef KTR
727 	struct ether_header *eh;
728 	struct tcphdr *tcp;
729 	tcp_seq tcp_seqno;
730 #endif
731 	struct mbuf *m;
732 	void *items[1];
733 	int rc;
734 
735 	TXQ_LOCK_ASSERT_OWNED(tlsp->txq);
736 	KASSERT(tlsp->queue_mbufs, ("%s: mbufs not being queued for %p",
737 	    __func__, tlsp));
738 	for (;;) {
739 		m = mbufq_dequeue(&tlsp->pending_mbufs);
740 		if (m == NULL) {
741 			tlsp->queue_mbufs = false;
742 			return;
743 		}
744 
745 #ifdef KTR
746 		eh = mtod(m, struct ether_header *);
747 		tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen +
748 		    m->m_pkthdr.l3hlen);
749 		tcp_seqno = ntohl(tcp->th_seq);
750 #ifdef VERBOSE_TRACES
751 		CTR(KTR_CXGBE, "%s: pkt len %d TCP seq %u", __func__,
752 		    m->m_pkthdr.len, tcp_seqno);
753 #endif
754 #endif
755 
756 		items[0] = m;
757 		if (enqueue_only)
758 			rc = mp_ring_enqueue_only(tlsp->txq->r, items, 1);
759 		else {
760 			TXQ_UNLOCK(tlsp->txq);
761 			rc = mp_ring_enqueue(tlsp->txq->r, items, 1, 256);
762 			TXQ_LOCK(tlsp->txq);
763 		}
764 		if (__predict_true(rc == 0))
765 			return;
766 
767 		CTR(KTR_CXGBE, "%s: pkt len %d TCP seq %u dropped", __func__,
768 		    m->m_pkthdr.len, tcp_seqno);
769 		m_freem(m);
770 	}
771 }
772 
773 int
t7_ktls_parse_pkt(struct mbuf * m)774 t7_ktls_parse_pkt(struct mbuf *m)
775 {
776 	struct tlspcb *tlsp;
777 	struct ether_header *eh;
778 	struct ip *ip;
779 	struct ip6_hdr *ip6;
780 	struct tcphdr *tcp;
781 	struct mbuf *m_tls;
782 	void *items[1];
783 	int error, nsegs;
784 	u_int wr_len, tot_len;
785 	uint16_t eh_type;
786 
787 	/*
788 	 * Locate headers in initial mbuf.
789 	 *
790 	 * XXX: This assumes all of the headers are in the initial mbuf.
791 	 * Could perhaps use m_advance() like parse_pkt() if that turns
792 	 * out to not be true.
793 	 */
794 	M_ASSERTPKTHDR(m);
795 	MPASS(m->m_pkthdr.snd_tag != NULL);
796 	tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
797 
798 	if (m->m_len <= sizeof(*eh) + sizeof(*ip)) {
799 		CTR(KTR_CXGBE, "%s: %p header mbuf too short", __func__, tlsp);
800 		return (EINVAL);
801 	}
802 	eh = mtod(m, struct ether_header *);
803 	eh_type = ntohs(eh->ether_type);
804 	if (eh_type == ETHERTYPE_VLAN) {
805 		struct ether_vlan_header *evh = (void *)eh;
806 
807 		eh_type = ntohs(evh->evl_proto);
808 		m->m_pkthdr.l2hlen = sizeof(*evh);
809 	} else
810 		m->m_pkthdr.l2hlen = sizeof(*eh);
811 
812 	switch (eh_type) {
813 	case ETHERTYPE_IP:
814 		ip = (struct ip *)(eh + 1);
815 		if (ip->ip_p != IPPROTO_TCP) {
816 			CTR(KTR_CXGBE, "%s: %p mbuf not IPPROTO_TCP", __func__,
817 			    tlsp);
818 			return (EINVAL);
819 		}
820 		m->m_pkthdr.l3hlen = ip->ip_hl * 4;
821 		break;
822 	case ETHERTYPE_IPV6:
823 		ip6 = (struct ip6_hdr *)(eh + 1);
824 		if (ip6->ip6_nxt != IPPROTO_TCP) {
825 			CTR(KTR_CXGBE, "%s: %p, mbuf not IPPROTO_TCP (%u)",
826 			    __func__, tlsp, ip6->ip6_nxt);
827 			return (EINVAL);
828 		}
829 		m->m_pkthdr.l3hlen = sizeof(struct ip6_hdr);
830 		break;
831 	default:
832 		CTR(KTR_CXGBE, "%s: %p mbuf not ETHERTYPE_IP{,V6}", __func__,
833 		    tlsp);
834 		return (EINVAL);
835 	}
836 	if (m->m_len < m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
837 	    sizeof(*tcp)) {
838 		CTR(KTR_CXGBE, "%s: %p header mbuf too short (2)", __func__,
839 		    tlsp);
840 		return (EINVAL);
841 	}
842 	tcp = (struct tcphdr *)((char *)(eh + 1) + m->m_pkthdr.l3hlen);
843 	m->m_pkthdr.l4hlen = tcp->th_off * 4;
844 
845 	/* Bail if there is TCP payload before the TLS record. */
846 	if (m->m_len != m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen +
847 	    m->m_pkthdr.l4hlen) {
848 		CTR(KTR_CXGBE,
849 		    "%s: %p header mbuf bad length (%d + %d + %d != %d)",
850 		    __func__, tlsp, m->m_pkthdr.l2hlen, m->m_pkthdr.l3hlen,
851 		    m->m_pkthdr.l4hlen, m->m_len);
852 		return (EINVAL);
853 	}
854 
855 	/* Assume all headers are in 'm' for now. */
856 	MPASS(m->m_next != NULL);
857 	MPASS(m->m_next->m_flags & M_EXTPG);
858 
859 	tot_len = 0;
860 
861 	/*
862 	 * Each of the remaining mbufs in the chain should reference a
863 	 * TLS record.
864 	 */
865 	for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
866 		MPASS(m_tls->m_flags & M_EXTPG);
867 
868 		wr_len = ktls_wr_len(tlsp, m, m_tls, &nsegs);
869 #ifdef VERBOSE_TRACES
870 		CTR(KTR_CXGBE, "%s: %p wr_len %d nsegs %d", __func__, tlsp,
871 		    wr_len, nsegs);
872 #endif
873 		if (wr_len > SGE_MAX_WR_LEN || nsegs > TX_SGL_SEGS)
874 			return (EFBIG);
875 		tot_len += roundup2(wr_len, EQ_ESIZE);
876 
877 		/*
878 		 * Store 'nsegs' for the first TLS record in the
879 		 * header mbuf's metadata.
880 		 */
881 		if (m_tls == m->m_next)
882 			set_mbuf_nsegs(m, nsegs);
883 	}
884 
885 	MPASS(tot_len != 0);
886 	set_mbuf_len16(m, tot_len / 16);
887 
888 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
889 		/* Defer packets beyond what has been sent so far. */
890 		TXQ_LOCK(tlsp->txq);
891 		if (tlsp->queue_mbufs) {
892 			error = mbufq_enqueue(&tlsp->pending_mbufs, m);
893 			if (error == 0) {
894 #ifdef VERBOSE_TRACES
895 				CTR(KTR_CXGBE,
896 				    "%s: %p len16 %d nsegs %d TCP seq %u deferred",
897 				    __func__, tlsp, mbuf_len16(m),
898 				    mbuf_nsegs(m), ntohl(tcp->th_seq));
899 #endif
900 			}
901 			TXQ_UNLOCK(tlsp->txq);
902 			return (error);
903 		}
904 		tlsp->queue_mbufs = true;
905 		TXQ_UNLOCK(tlsp->txq);
906 	}
907 
908 #ifdef VERBOSE_TRACES
909 	CTR(KTR_CXGBE, "%s: %p len16 %d nsegs %d", __func__, tlsp,
910 	    mbuf_len16(m), mbuf_nsegs(m));
911 #endif
912 	items[0] = m;
913 	error = mp_ring_enqueue(tlsp->txq->r, items, 1, 256);
914 	if (__predict_false(error != 0)) {
915 		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
916 			TXQ_LOCK(tlsp->txq);
917 			ktls_queue_next_packet(tlsp, false);
918 			TXQ_UNLOCK(tlsp->txq);
919 		}
920 	}
921 	return (error);
922 }
923 
924 static inline bool
needs_vlan_insertion(struct mbuf * m)925 needs_vlan_insertion(struct mbuf *m)
926 {
927 
928 	M_ASSERTPKTHDR(m);
929 
930 	return (m->m_flags & M_VLANTAG);
931 }
932 
933 static inline uint64_t
pkt_ctrl1(struct sge_txq * txq,struct mbuf * m,uint16_t eh_type)934 pkt_ctrl1(struct sge_txq *txq, struct mbuf *m, uint16_t eh_type)
935 {
936 	uint64_t ctrl1;
937 
938 	/* Checksums are always offloaded */
939 	if (eh_type == ETHERTYPE_IP) {
940 		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP) |
941 		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
942 		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
943 	} else {
944 		MPASS(m->m_pkthdr.l3hlen == sizeof(struct ip6_hdr));
945 		ctrl1 = V_TXPKT_CSUM_TYPE(TX_CSUM_TCPIP6) |
946 		    V_T6_TXPKT_ETHHDR_LEN(m->m_pkthdr.l2hlen - ETHER_HDR_LEN) |
947 		    V_TXPKT_IPHDR_LEN(m->m_pkthdr.l3hlen);
948 	}
949 	txq->txcsum++;
950 
951 	/* VLAN tag insertion */
952 	if (needs_vlan_insertion(m)) {
953 		ctrl1 |= F_TXPKT_VLAN_VLD |
954 		    V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
955 		txq->vlan_insertion++;
956 	}
957 
958 	return (ctrl1);
959 }
960 
961 static inline void *
write_lso_cpl(void * cpl,struct mbuf * m0,uint16_t mss,uint16_t eh_type,int total_len)962 write_lso_cpl(void *cpl, struct mbuf *m0, uint16_t mss, uint16_t eh_type,
963     int total_len)
964 {
965 	struct cpl_tx_pkt_lso_core *lso;
966 	uint32_t ctrl;
967 
968 	KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 &&
969 	    m0->m_pkthdr.l4hlen > 0,
970 	    ("%s: mbuf %p needs TSO but missing header lengths",
971 		__func__, m0));
972 
973 	ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) |
974 	    F_LSO_FIRST_SLICE | F_LSO_LAST_SLICE |
975 	    V_LSO_ETHHDR_LEN((m0->m_pkthdr.l2hlen - ETHER_HDR_LEN) >> 2) |
976 	    V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) |
977 	    V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2);
978 	if (eh_type == ETHERTYPE_IPV6)
979 		ctrl |= F_LSO_IPV6;
980 
981 	lso = cpl;
982 	lso->lso_ctrl = htobe32(ctrl);
983 	lso->ipid_ofst = htobe16(0);
984 	lso->mss = htobe16(mss);
985 	lso->seqno_offset = htobe32(0);
986 	lso->len = htobe32(total_len);
987 
988 	return (lso + 1);
989 }
990 
991 static inline void *
write_tx_tls_ack(void * dst,u_int rx_chid,u_int hash_len,bool ghash_lcb)992 write_tx_tls_ack(void *dst, u_int rx_chid, u_int hash_len, bool ghash_lcb)
993 {
994 	struct cpl_tx_tls_ack *cpl;
995 	uint32_t flags;
996 
997 	flags = ghash_lcb ? F_CPL_TX_TLS_ACK_LCB : F_CPL_TX_TLS_ACK_PHASH;
998 	cpl = dst;
999 	cpl->op_to_Rsvd2 = htobe32(V_CPL_TX_TLS_ACK_OPCODE(CPL_TX_TLS_ACK) |
1000 	    V_T7_CPL_TX_TLS_ACK_RXCHID(rx_chid) | F_CPL_TX_TLS_ACK_ULPTXLPBK |
1001 	    flags);
1002 
1003 	/* 32 == AckEncCpl, 16 == LCB */
1004 	cpl->PldLen = htobe32(V_CPL_TX_TLS_ACK_PLDLEN(32 + 16 + hash_len));
1005 	cpl->Rsvd3 = 0;
1006 
1007 	return (cpl + 1);
1008 }
1009 
1010 static inline void *
write_fw6_pld(void * dst,u_int rx_chid,u_int rx_qid,u_int hash_len,uint64_t cookie)1011 write_fw6_pld(void *dst, u_int rx_chid, u_int rx_qid, u_int hash_len,
1012     uint64_t cookie)
1013 {
1014 	struct rss_header *rss;
1015 	struct cpl_fw6_pld *cpl;
1016 
1017 	rss = dst;
1018 	memset(rss, 0, sizeof(*rss));
1019 	rss->opcode = CPL_FW6_PLD;
1020 	rss->qid = htobe16(rx_qid);
1021 	rss->channel = rx_chid;
1022 
1023 	cpl = (void *)(rss + 1);
1024 	memset(cpl, 0, sizeof(*cpl));
1025 	cpl->opcode = CPL_FW6_PLD;
1026 	cpl->len = htobe16(hash_len);
1027 	cpl->data[1] = htobe64(cookie);
1028 
1029 	return (cpl + 1);
1030 }
1031 
1032 static inline void *
write_split_mode_rx_phys(void * dst,struct mbuf * m,struct mbuf * m_tls,u_int crypto_hdr_len,u_int leading_waste,u_int trailing_waste)1033 write_split_mode_rx_phys(void *dst, struct mbuf *m, struct mbuf *m_tls,
1034     u_int crypto_hdr_len, u_int leading_waste, u_int trailing_waste)
1035 {
1036 	struct cpl_t7_rx_phys_dsgl *cpl;
1037 	uint16_t *len;
1038 	uint8_t numsge;
1039 
1040 	/* Forward first (3) and third (1) segments. */
1041 	numsge = 0xa;
1042 
1043 	cpl = dst;
1044 	cpl->ot.opcode = CPL_RX_PHYS_DSGL;
1045 	cpl->PhysAddrFields_lo_to_NumSGE =
1046 	    htobe32(F_CPL_T7_RX_PHYS_DSGL_SPLITMODE |
1047 	    V_CPL_T7_RX_PHYS_DSGL_NUMSGE(numsge));
1048 
1049 	len = (uint16_t *)(cpl->RSSCopy);
1050 
1051 	/*
1052 	 * First segment always contains packet headers as well as
1053 	 * transmit-related CPLs.
1054 	 */
1055 	len[0] = htobe16(crypto_hdr_len);
1056 
1057 	/*
1058 	 * Second segment is "gap" of data to drop at the front of the
1059 	 * TLS record.
1060 	 */
1061 	len[1] = htobe16(leading_waste);
1062 
1063 	/* Third segment is how much of the TLS record to send. */
1064 	len[2] = htobe16(m_tls->m_len);
1065 
1066 	/* Fourth segment is how much data to drop at the end. */
1067 	len[3] = htobe16(trailing_waste);
1068 
1069 #ifdef VERBOSE_TRACES
1070 	CTR(KTR_CXGBE, "%s: forward %u skip %u forward %u skip %u",
1071 	    __func__, be16toh(len[0]), be16toh(len[1]), be16toh(len[2]),
1072 	    be16toh(len[3]));
1073 #endif
1074 	return (cpl + 1);
1075 }
1076 
1077 /*
1078  * If the SGL ends on an address that is not 16 byte aligned, this function will
1079  * add a 0 filled flit at the end.
1080  */
1081 static void *
write_gl_to_buf(struct sglist * gl,caddr_t to)1082 write_gl_to_buf(struct sglist *gl, caddr_t to)
1083 {
1084 	struct sglist_seg *seg;
1085 	__be64 *flitp;
1086 	struct ulptx_sgl *usgl;
1087 	int i, nflits, nsegs;
1088 
1089 	KASSERT(((uintptr_t)to & 0xf) == 0,
1090 	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, to));
1091 
1092 	nsegs = gl->sg_nseg;
1093 	MPASS(nsegs > 0);
1094 
1095 	nflits = (3 * (nsegs - 1)) / 2 + ((nsegs - 1) & 1) + 2;
1096 	flitp = (__be64 *)to;
1097 	seg = &gl->sg_segs[0];
1098 	usgl = (void *)flitp;
1099 
1100 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
1101 	    V_ULPTX_NSGE(nsegs));
1102 	usgl->len0 = htobe32(seg->ss_len);
1103 	usgl->addr0 = htobe64(seg->ss_paddr);
1104 	seg++;
1105 
1106 	for (i = 0; i < nsegs - 1; i++, seg++) {
1107 		usgl->sge[i / 2].len[i & 1] = htobe32(seg->ss_len);
1108 		usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ss_paddr);
1109 	}
1110 	if (i & 1)
1111 		usgl->sge[i / 2].len[1] = htobe32(0);
1112 	flitp += nflits;
1113 
1114 	if (nflits & 1) {
1115 		MPASS(((uintptr_t)flitp) & 0xf);
1116 		*flitp++ = 0;
1117 	}
1118 
1119 	MPASS((((uintptr_t)flitp) & 0xf) == 0);
1120 	return (flitp);
1121 }
1122 
1123 static inline void
copy_to_txd(struct sge_eq * eq,const char * from,caddr_t * to,int len)1124 copy_to_txd(struct sge_eq *eq, const char *from, caddr_t *to, int len)
1125 {
1126 
1127 	MPASS((uintptr_t)(*to) >= (uintptr_t)&eq->desc[0]);
1128 	MPASS((uintptr_t)(*to) < (uintptr_t)&eq->desc[eq->sidx]);
1129 
1130 	if (__predict_true((uintptr_t)(*to) + len <=
1131 	    (uintptr_t)&eq->desc[eq->sidx])) {
1132 		bcopy(from, *to, len);
1133 		(*to) += len;
1134 		if ((uintptr_t)(*to) == (uintptr_t)&eq->desc[eq->sidx])
1135 			(*to) = (caddr_t)eq->desc;
1136 	} else {
1137 		int portion = (uintptr_t)&eq->desc[eq->sidx] - (uintptr_t)(*to);
1138 
1139 		bcopy(from, *to, portion);
1140 		from += portion;
1141 		portion = len - portion;	/* remaining */
1142 		bcopy(from, (void *)eq->desc, portion);
1143 		(*to) = (caddr_t)eq->desc + portion;
1144 	}
1145 }
1146 
1147 static int
ktls_write_tunnel_packet(struct sge_txq * txq,void * dst,struct mbuf * m,const void * src,u_int len,u_int available,tcp_seq tcp_seqno,u_int pidx,uint16_t eh_type,bool last_wr)1148 ktls_write_tunnel_packet(struct sge_txq *txq, void *dst, struct mbuf *m,
1149     const void *src, u_int len, u_int available, tcp_seq tcp_seqno, u_int pidx,
1150     uint16_t eh_type, bool last_wr)
1151 {
1152 	struct tx_sdesc *txsd;
1153 	struct fw_eth_tx_pkt_wr *wr;
1154 	struct cpl_tx_pkt_core *cpl;
1155 	uint32_t ctrl;
1156 	int len16, ndesc, pktlen;
1157 	struct ether_header *eh;
1158 	struct ip *ip, newip;
1159 	struct ip6_hdr *ip6, newip6;
1160 	struct tcphdr *tcp, newtcp;
1161 	caddr_t out;
1162 
1163 	TXQ_LOCK_ASSERT_OWNED(txq);
1164 	M_ASSERTPKTHDR(m);
1165 
1166 	wr = dst;
1167 	pktlen = m->m_len + len;
1168 	ctrl = sizeof(struct cpl_tx_pkt_core) + pktlen;
1169 	len16 = howmany(sizeof(struct fw_eth_tx_pkt_wr) + ctrl, 16);
1170 	ndesc = tx_len16_to_desc(len16);
1171 	MPASS(ndesc <= available);
1172 
1173 	/* Firmware work request header */
1174 	/* TODO: Handle VF work request. */
1175 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
1176 	    V_FW_ETH_TX_PKT_WR_IMMDLEN(ctrl));
1177 
1178 	ctrl = V_FW_WR_LEN16(len16);
1179 	wr->equiq_to_len16 = htobe32(ctrl);
1180 	wr->r3 = 0;
1181 
1182 	cpl = (void *)(wr + 1);
1183 
1184 	/* CPL header */
1185 	cpl->ctrl0 = txq->cpl_ctrl0;
1186 	cpl->pack = 0;
1187 	cpl->len = htobe16(pktlen);
1188 
1189 	out = (void *)(cpl + 1);
1190 
1191 	/* Copy over Ethernet header. */
1192 	eh = mtod(m, struct ether_header *);
1193 	copy_to_txd(&txq->eq, (caddr_t)eh, &out, m->m_pkthdr.l2hlen);
1194 
1195 	/* Fixup length in IP header and copy out. */
1196 	if (eh_type == ETHERTYPE_IP) {
1197 		ip = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1198 		newip = *ip;
1199 		newip.ip_len = htons(pktlen - m->m_pkthdr.l2hlen);
1200 		copy_to_txd(&txq->eq, (caddr_t)&newip, &out, sizeof(newip));
1201 		if (m->m_pkthdr.l3hlen > sizeof(*ip))
1202 			copy_to_txd(&txq->eq, (caddr_t)(ip + 1), &out,
1203 			    m->m_pkthdr.l3hlen - sizeof(*ip));
1204 	} else {
1205 		ip6 = (void *)((char *)eh + m->m_pkthdr.l2hlen);
1206 		newip6 = *ip6;
1207 		newip6.ip6_plen = htons(pktlen - m->m_pkthdr.l2hlen -
1208 		    sizeof(*ip6));
1209 		copy_to_txd(&txq->eq, (caddr_t)&newip6, &out, sizeof(newip6));
1210 		MPASS(m->m_pkthdr.l3hlen == sizeof(*ip6));
1211 	}
1212 	cpl->ctrl1 = htobe64(pkt_ctrl1(txq, m, eh_type));
1213 
1214 	/* Set sequence number in TCP header. */
1215 	tcp = (void *)((char *)eh + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1216 	newtcp = *tcp;
1217 	newtcp.th_seq = htonl(tcp_seqno);
1218 	copy_to_txd(&txq->eq, (caddr_t)&newtcp, &out, sizeof(newtcp));
1219 
1220 	/* Copy rest of TCP header. */
1221 	copy_to_txd(&txq->eq, (caddr_t)(tcp + 1), &out, m->m_len -
1222 	    (m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen + sizeof(*tcp)));
1223 
1224 	/* Copy the payload data. */
1225 	copy_to_txd(&txq->eq, src, &out, len);
1226 	txq->imm_wrs++;
1227 
1228 	txq->txpkt_wrs++;
1229 
1230 	txsd = &txq->sdesc[pidx];
1231 	if (last_wr)
1232 		txsd->m = m;
1233 	else
1234 		txsd->m = NULL;
1235 	txsd->desc_used = ndesc;
1236 
1237 	return (ndesc);
1238 }
1239 
1240 static int
ktls_write_tls_wr(struct tlspcb * tlsp,struct sge_txq * txq,void * dst,struct mbuf * m,struct tcphdr * tcp,struct mbuf * m_tls,u_int available,tcp_seq tcp_seqno,u_int pidx,uint16_t eh_type,uint16_t mss)1241 ktls_write_tls_wr(struct tlspcb *tlsp, struct sge_txq *txq,
1242     void *dst, struct mbuf *m, struct tcphdr *tcp, struct mbuf *m_tls,
1243     u_int available, tcp_seq tcp_seqno, u_int pidx, uint16_t eh_type,
1244     uint16_t mss)
1245 {
1246 	struct sge_eq *eq = &txq->eq;
1247 	struct tx_sdesc *txsd;
1248 	struct fw_ulptx_wr *wr;
1249 	struct ulp_txpkt *txpkt;
1250 	struct ulptx_sc_memrd *memrd;
1251 	struct ulptx_idata *idata;
1252 	struct cpl_tx_sec_pdu *sec_pdu;
1253 	struct cpl_tx_pkt_core *tx_pkt;
1254 	const struct tls_record_layer *hdr;
1255 	struct ip *ip;
1256 	struct ip6_hdr *ip6;
1257 	struct tcphdr *newtcp;
1258 	char *iv, *out;
1259 	u_int aad_start, aad_stop;
1260 	u_int auth_start, auth_stop, auth_insert;
1261 	u_int cipher_start, cipher_stop, iv_offset;
1262 	u_int header_len, offset, plen, rlen, tlen;
1263 	u_int imm_len, ndesc, nsegs, txpkt_lens[2], wr_len;
1264 	u_int cpl_len, crypto_hdr_len, post_key_context_len;
1265 	u_int leading_waste, trailing_waste;
1266 	u_short ip_len;
1267 	bool inline_key, ghash_lcb, last_ghash_frag, last_wr, need_lso;
1268 	bool request_ghash, send_partial_ghash, short_record, split_mode;
1269 	bool using_scratch;
1270 
1271 	MPASS(tlsp->txq == txq);
1272 	M_ASSERTEXTPG(m_tls);
1273 
1274 	/* Final work request for this mbuf chain? */
1275 	last_wr = (m_tls->m_next == NULL);
1276 
1277 	/*
1278 	 * The relative offset of the last byte to send from the TLS
1279 	 * record.
1280 	 */
1281 	tlen = mtod(m_tls, vm_offset_t) + m_tls->m_len;
1282 	if (tlen <= m_tls->m_epg_hdrlen) {
1283 		/*
1284 		 * For requests that only want to send the TLS header,
1285 		 * send a tunnelled packet as immediate data.
1286 		 */
1287 #ifdef VERBOSE_TRACES
1288 		CTR(KTR_CXGBE, "%s: %p header-only TLS record %u", __func__,
1289 		    tlsp, (u_int)m_tls->m_epg_seqno);
1290 #endif
1291 		/* This should always be the last TLS record in a chain. */
1292 		MPASS(last_wr);
1293 
1294 		txq->kern_tls_header++;
1295 
1296 		return (ktls_write_tunnel_packet(txq, dst, m,
1297 		    (char *)m_tls->m_epg_hdr + mtod(m_tls, vm_offset_t),
1298 		    m_tls->m_len, available, tcp_seqno, pidx, eh_type,
1299 		    last_wr));
1300 	}
1301 
1302 	/* Locate the TLS header. */
1303 	hdr = (void *)m_tls->m_epg_hdr;
1304 	rlen = TLS_HEADER_LENGTH + ntohs(hdr->tls_length);
1305 
1306 #ifdef VERBOSE_TRACES
1307 	CTR(KTR_CXGBE, "%s: offset %lu len %u TCP seq %u TLS record %u",
1308 	    __func__, mtod(m_tls, vm_offset_t), m_tls->m_len, tcp_seqno,
1309 	    (u_int)m_tls->m_epg_seqno);
1310 #endif
1311 
1312 	/* Should this request make use of GHASH state? */
1313 	ghash_lcb = false;
1314 	last_ghash_frag = false;
1315 	request_ghash = false;
1316 	send_partial_ghash = false;
1317 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM &&
1318 	    tlsp->sc->tlst.partial_ghash && tlsp->sc->tlst.short_records) {
1319 		u_int trailer_len;
1320 
1321 		trailer_len = m_tls->m_epg_trllen;
1322 		if (tlsp->tls13)
1323 			trailer_len--;
1324 		KASSERT(trailer_len == AES_GMAC_HASH_LEN,
1325 		    ("invalid trailer length for AES-GCM"));
1326 
1327 		/* Is this the start of a TLS record? */
1328 		if (mtod(m_tls, vm_offset_t) <= m_tls->m_epg_hdrlen) {
1329 			/*
1330 			 * If this is the very first TLS record or
1331 			 * if this is a newer TLS record, request a partial
1332 			 * hash, but not if we are going to send the whole
1333 			 * thing.
1334 			 */
1335 			if ((tlsp->ghash_tls_seqno == 0 ||
1336 			    tlsp->ghash_tls_seqno < m_tls->m_epg_seqno) &&
1337 			    tlen < rlen) {
1338 				/*
1339 				 * If we are only missing part or all
1340 				 * of the trailer, send a normal full
1341 				 * record but request the hash.
1342 				 * Otherwise, use partial GHASH mode.
1343 				 */
1344 				if (tlen >= (rlen - trailer_len))
1345 					ghash_lcb = true;
1346 				else
1347 					send_partial_ghash = true;
1348 				request_ghash = true;
1349 				tlsp->ghash_tls_seqno = m_tls->m_epg_seqno;
1350 			}
1351 		} else if (tlsp->ghash_tls_seqno == m_tls->m_epg_seqno &&
1352 		    tlsp->ghash_valid) {
1353 			/*
1354 			 * Compute the offset of the first AES block as
1355 			 * is done in ktls_is_short_record.
1356 			 */
1357 			if (rlen - tlen < trailer_len)
1358 				plen = rlen - (m_tls->m_epg_hdrlen +
1359 				    trailer_len);
1360 			else
1361 				plen = tlen - m_tls->m_epg_hdrlen;
1362 			offset = mtod(m_tls, vm_offset_t) - m_tls->m_epg_hdrlen;
1363 			if (offset >= plen)
1364 				offset = plen;
1365 			else
1366 				offset = rounddown2(offset, AES_BLOCK_LEN);
1367 			if (tlsp->ghash_offset == offset) {
1368 				if (offset == plen) {
1369 					/*
1370 					 * Send a partial trailer as a
1371 					 * tunnelled packet as
1372 					 * immediate data.
1373 					 */
1374 #ifdef VERBOSE_TRACES
1375 					CTR(KTR_CXGBE,
1376 					    "%s: %p trailer-only TLS record %u",
1377 					    __func__, tlsp,
1378 					    (u_int)m_tls->m_epg_seqno);
1379 #endif
1380 
1381 					txq->kern_tls_trailer++;
1382 
1383 					offset = mtod(m_tls, vm_offset_t) -
1384 					    (m_tls->m_epg_hdrlen + plen);
1385 					KASSERT(offset <= AES_GMAC_HASH_LEN,
1386 					    ("offset outside of trailer"));
1387 					return (ktls_write_tunnel_packet(txq,
1388 					    dst, m, tlsp->ghash + offset,
1389 					    m_tls->m_len, available, tcp_seqno,
1390 					    pidx, eh_type, last_wr));
1391 				}
1392 
1393 				/*
1394 				 * If this request sends the end of
1395 				 * the payload, it is the last
1396 				 * fragment.
1397 				 */
1398 				if (tlen >= (rlen - trailer_len)) {
1399 					last_ghash_frag = true;
1400 					ghash_lcb = true;
1401 				}
1402 
1403 				/*
1404 				 * Only use partial GCM mode (rather
1405 				 * than an AES-CTR short record) if
1406 				 * there is input auth data to pass to
1407 				 * the GHASH.  That is true so long as
1408 				 * there is at least one full block of
1409 				 * payload data, or if the remaining
1410 				 * payload data is the final partial
1411 				 * block.
1412 				 */
1413 				if (plen - offset >= GMAC_BLOCK_LEN ||
1414 				    last_ghash_frag) {
1415 					send_partial_ghash = true;
1416 
1417 					/*
1418 					 * If not sending the complete
1419 					 * end of the record, this is
1420 					 * a middle request so needs
1421 					 * to request an updated
1422 					 * partial hash.
1423 					 */
1424 					if (tlen < rlen)
1425 						request_ghash = true;
1426 				}
1427 			}
1428 		}
1429 	}
1430 
1431 	short_record = ktls_is_short_record(tlsp, m_tls, tlen, rlen,
1432 	    &header_len, &offset, &plen, &leading_waste, &trailing_waste,
1433 	    send_partial_ghash, request_ghash);
1434 
1435 	if (short_record) {
1436 #ifdef VERBOSE_TRACES
1437 		CTR(KTR_CXGBE,
1438 		    "%s: %p short TLS record %u hdr %u offs %u plen %u",
1439 		    __func__, tlsp, (u_int)m_tls->m_epg_seqno, header_len,
1440 		    offset, plen);
1441 		if (send_partial_ghash) {
1442 			if (header_len != 0)
1443 				CTR(KTR_CXGBE, "%s: %p sending initial GHASH",
1444 				    __func__, tlsp);
1445 			else
1446 				CTR(KTR_CXGBE, "%s: %p sending partial GHASH for offset %u%s",
1447 				    __func__, tlsp, tlsp->ghash_offset,
1448 				    last_ghash_frag ? ", last_frag" : "");
1449 		}
1450 #endif
1451 		KASSERT(send_partial_ghash || !request_ghash,
1452 		    ("requesting but not sending partial hash for short record"));
1453 	} else {
1454 		KASSERT(!send_partial_ghash,
1455 		    ("sending partial hash with full record"));
1456 	}
1457 
1458 	if (tlen < rlen && m_tls->m_next == NULL &&
1459 	    (tcp->th_flags & TH_FIN) != 0) {
1460 		txq->kern_tls_fin_short++;
1461 #ifdef INVARIANTS
1462 		panic("%s: FIN on short TLS record", __func__);
1463 #endif
1464 	}
1465 
1466 	/*
1467 	 * Use cached value for first record in chain if not using
1468 	 * partial GCM mode. ktls_parse_pkt() calculates nsegs based
1469 	 * on send_partial_ghash being false.
1470 	 */
1471 	if (m->m_next == m_tls && !send_partial_ghash)
1472 		nsegs = mbuf_nsegs(m);
1473 	else
1474 		nsegs = sglist_count_mbuf_epg(m_tls,
1475 		    m_tls->m_epg_hdrlen + offset, plen);
1476 
1477 	/* Determine if we need an LSO header. */
1478 	need_lso = (m_tls->m_len > mss);
1479 
1480 	/* Calculate the size of the TLS work request. */
1481 	inline_key = send_partial_ghash || tlsp->inline_key;
1482 	wr_len = ktls_base_wr_size(tlsp, inline_key);
1483 
1484 	if (send_partial_ghash) {
1485 		/* Inline key context includes partial hash in OPAD. */
1486 		wr_len += AES_GMAC_HASH_LEN;
1487 	}
1488 
1489 	/*
1490 	 * SplitMode is required if there is any thing we need to trim
1491 	 * from the crypto output, either at the front or end of the
1492 	 * record.  Note that short records might not need trimming.
1493 	 */
1494 	split_mode = leading_waste != 0 || trailing_waste != 0;
1495 	if (split_mode) {
1496 		/*
1497 		 * Partial records require a SplitMode
1498 		 * CPL_RX_PHYS_DSGL.
1499 		 */
1500 		wr_len += sizeof(struct cpl_t7_rx_phys_dsgl);
1501 	}
1502 
1503 	if (need_lso)
1504 		wr_len += sizeof(struct cpl_tx_pkt_lso_core);
1505 
1506 	imm_len = m->m_len + header_len;
1507 	if (short_record) {
1508 		imm_len += AES_BLOCK_LEN;
1509 		if (send_partial_ghash && header_len != 0)
1510 			imm_len += ktls_gcm_aad_len(tlsp);
1511 	} else if (tlsp->tls13)
1512 		imm_len += sizeof(uint64_t);
1513 	wr_len += roundup2(imm_len, 16);
1514 	wr_len += ktls_sgl_size(nsegs + (last_ghash_frag ? 1 : 0));
1515 	wr_len = roundup2(wr_len, 16);
1516 	txpkt_lens[0] = wr_len - sizeof(*wr);
1517 
1518 	if (request_ghash) {
1519 		/*
1520 		 * Requesting the hash entails a second ULP_TX_PKT
1521 		 * containing CPL_TX_TLS_ACK, CPL_FW6_PLD, and space
1522 		 * for the hash.
1523 		 */
1524 		txpkt_lens[1] = sizeof(struct ulp_txpkt);
1525 		txpkt_lens[1] += sizeof(struct ulptx_idata);
1526 		txpkt_lens[1] += sizeof(struct cpl_tx_tls_ack);
1527 		txpkt_lens[1] += sizeof(struct rss_header) +
1528 		    sizeof(struct cpl_fw6_pld);
1529 		txpkt_lens[1] += AES_GMAC_HASH_LEN;
1530 		wr_len += txpkt_lens[1];
1531 	} else
1532 		txpkt_lens[1] = 0;
1533 
1534 	ndesc = howmany(wr_len, EQ_ESIZE);
1535 	MPASS(ndesc <= available);
1536 
1537 	/*
1538 	 * Use the per-txq scratch pad if near the end of the ring to
1539 	 * simplify handling of wrap-around.
1540 	 */
1541 	using_scratch = (eq->sidx - pidx < ndesc);
1542 	if (using_scratch)
1543 		wr = (void *)txq->ss;
1544 	else
1545 		wr = dst;
1546 
1547 	/* FW_ULPTX_WR */
1548 	wr->op_to_compl = htobe32(V_FW_WR_OP(FW_ULPTX_WR));
1549 	wr->flowid_len16 = htobe32(F_FW_ULPTX_WR_DATA |
1550 	    V_FW_WR_LEN16(wr_len / 16));
1551 	wr->cookie = 0;
1552 
1553 	/* ULP_TXPKT */
1554 	txpkt = (void *)(wr + 1);
1555 	txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
1556 	    V_ULP_TXPKT_DATAMODIFY(0) |
1557 	    V_T7_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) |
1558 	    V_ULP_TXPKT_DEST(0) |
1559 	    V_ULP_TXPKT_CMDMORE(request_ghash ? 1 : 0) |
1560 	    V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1));
1561 	txpkt->len = htobe32(howmany(txpkt_lens[0], 16));
1562 
1563 	/* ULPTX_IDATA sub-command */
1564 	idata = (void *)(txpkt + 1);
1565 	idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1566 	    V_ULP_TX_SC_MORE(1));
1567 	idata->len = sizeof(struct cpl_tx_sec_pdu);
1568 
1569 	/*
1570 	 * After the key context comes CPL_RX_PHYS_DSGL, CPL_TX_*, and
1571 	 * immediate data containing headers.  When using an inline
1572 	 * key, these are counted as part of this ULPTX_IDATA.  When
1573 	 * reading the key from memory, these are part of a separate
1574 	 * ULPTX_IDATA.
1575 	 */
1576 	cpl_len = sizeof(struct cpl_tx_pkt_core);
1577 	if (need_lso)
1578 		cpl_len += sizeof(struct cpl_tx_pkt_lso_core);
1579 	if (split_mode)
1580 		cpl_len += sizeof(struct cpl_t7_rx_phys_dsgl);
1581 	post_key_context_len = cpl_len + imm_len;
1582 
1583 	if (inline_key) {
1584 		idata->len += tlsp->tx_key_info_size + post_key_context_len;
1585 		if (send_partial_ghash) {
1586 			/* Partial GHASH in key context. */
1587 			idata->len += AES_GMAC_HASH_LEN;
1588 		}
1589 	}
1590 	idata->len = htobe32(idata->len);
1591 
1592 	/* CPL_TX_SEC_PDU */
1593 	sec_pdu = (void *)(idata + 1);
1594 
1595 	/*
1596 	 * Packet headers are passed through unchanged by the crypto
1597 	 * engine by marking them as header data in SCMD0.
1598 	 */
1599 	crypto_hdr_len = m->m_len;
1600 
1601 	if (send_partial_ghash) {
1602 		/*
1603 		 * For short records using a partial hash, the TLS
1604 		 * header is counted as header data in SCMD0.  TLS AAD
1605 		 * is next (if AAD is present) followed by the AES-CTR
1606 		 * IV.  Last is the cipher region for the payload.
1607 		 */
1608 		if (header_len != 0) {
1609 			aad_start = 1;
1610 			aad_stop = ktls_gcm_aad_len(tlsp);
1611 		} else {
1612 			aad_start = 0;
1613 			aad_stop = 0;
1614 		}
1615 		iv_offset = aad_stop + 1;
1616 		cipher_start = iv_offset + AES_BLOCK_LEN;
1617 		cipher_stop = 0;
1618 		if (last_ghash_frag) {
1619 			auth_start = cipher_start;
1620 			auth_stop = AES_GMAC_HASH_LEN;
1621 			auth_insert = auth_stop;
1622 		} else if (plen < GMAC_BLOCK_LEN) {
1623 			/*
1624 			 * A request that sends part of the first AES
1625 			 * block will only have AAD.
1626 			 */
1627 			KASSERT(header_len != 0,
1628 			    ("%s: partial GHASH with no auth", __func__));
1629 			auth_start = 0;
1630 			auth_stop = 0;
1631 			auth_insert = 0;
1632 		} else {
1633 			auth_start = cipher_start;
1634 			auth_stop = plen % GMAC_BLOCK_LEN;
1635 			auth_insert = 0;
1636 		}
1637 
1638 		sec_pdu->pldlen = htobe32(aad_stop + AES_BLOCK_LEN + plen +
1639 		    (last_ghash_frag ? AES_GMAC_HASH_LEN : 0));
1640 
1641 		/*
1642 		 * For short records, the TLS header is treated as
1643 		 * header data.
1644 		 */
1645 		crypto_hdr_len += header_len;
1646 
1647 		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1648 		sec_pdu->seqno_numivs = tlsp->scmd0_partial.seqno_numivs;
1649 		sec_pdu->ivgen_hdrlen = tlsp->scmd0_partial.ivgen_hdrlen;
1650 		if (last_ghash_frag)
1651 			sec_pdu->ivgen_hdrlen |= V_SCMD_LAST_FRAG(1);
1652 		else
1653 			sec_pdu->ivgen_hdrlen |= V_SCMD_MORE_FRAGS(1);
1654 		sec_pdu->ivgen_hdrlen = htobe32(sec_pdu->ivgen_hdrlen |
1655 		    V_SCMD_HDR_LEN(crypto_hdr_len));
1656 
1657 		txq->kern_tls_partial_ghash++;
1658 	} else if (short_record) {
1659 		/*
1660 		 * For short records without a partial hash, the TLS
1661 		 * header is counted as header data in SCMD0 and the
1662 		 * IV is next, followed by a cipher region for the
1663 		 * payload.
1664 		 */
1665 		aad_start = 0;
1666 		aad_stop = 0;
1667 		iv_offset = 1;
1668 		auth_start = 0;
1669 		auth_stop = 0;
1670 		auth_insert = 0;
1671 		cipher_start = AES_BLOCK_LEN + 1;
1672 		cipher_stop = 0;
1673 
1674 		sec_pdu->pldlen = htobe32(AES_BLOCK_LEN + plen);
1675 
1676 		/*
1677 		 * For short records, the TLS header is treated as
1678 		 * header data.
1679 		 */
1680 		crypto_hdr_len += header_len;
1681 
1682 		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1683 		sec_pdu->seqno_numivs = tlsp->scmd0_short.seqno_numivs;
1684 		sec_pdu->ivgen_hdrlen = htobe32(
1685 		    tlsp->scmd0_short.ivgen_hdrlen |
1686 		    V_SCMD_HDR_LEN(crypto_hdr_len));
1687 
1688 		txq->kern_tls_short++;
1689 	} else {
1690 		/*
1691 		 * AAD is TLS header.  IV is after AAD for TLS < 1.3.
1692 		 * For TLS 1.3, a placeholder for the TLS sequence
1693 		 * number is provided as an IV before the AAD.  The
1694 		 * cipher region starts after the AAD and IV.  See
1695 		 * comments in ccr_authenc() and ccr_gmac() in
1696 		 * t4_crypto.c regarding cipher and auth start/stop
1697 		 * values.
1698 		 */
1699 		if (tlsp->tls13) {
1700 			iv_offset = 1;
1701 			aad_start = 1 + sizeof(uint64_t);
1702 			aad_stop = sizeof(uint64_t) + TLS_HEADER_LENGTH;
1703 			cipher_start = aad_stop + 1;
1704 		} else {
1705 			aad_start = 1;
1706 			aad_stop = TLS_HEADER_LENGTH;
1707 			iv_offset = TLS_HEADER_LENGTH + 1;
1708 			cipher_start = m_tls->m_epg_hdrlen + 1;
1709 		}
1710 		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
1711 			cipher_stop = 0;
1712 			auth_start = cipher_start;
1713 			auth_stop = 0;
1714 			auth_insert = 0;
1715 		} else {
1716 			cipher_stop = 0;
1717 			auth_start = cipher_start;
1718 			auth_stop = 0;
1719 			auth_insert = 0;
1720 		}
1721 
1722 		sec_pdu->pldlen = htobe32((tlsp->tls13 ? sizeof(uint64_t) : 0) +
1723 		    m_tls->m_epg_hdrlen + plen);
1724 
1725 		/* These two flits are actually a CPL_TLS_TX_SCMD_FMT. */
1726 		sec_pdu->seqno_numivs = tlsp->scmd0.seqno_numivs;
1727 		sec_pdu->ivgen_hdrlen = htobe32(tlsp->scmd0.ivgen_hdrlen |
1728 		    V_SCMD_HDR_LEN(crypto_hdr_len));
1729 
1730 		if (split_mode)
1731 			txq->kern_tls_partial++;
1732 		else
1733 			txq->kern_tls_full++;
1734 	}
1735 	sec_pdu->op_ivinsrtofst = htobe32(
1736 	    V_CPL_TX_SEC_PDU_OPCODE(CPL_TX_SEC_PDU) |
1737 	    V_CPL_TX_SEC_PDU_CPLLEN(cpl_len / 8) |
1738 	    V_CPL_TX_SEC_PDU_PLACEHOLDER(send_partial_ghash ? 1 : 0) |
1739 	    V_CPL_TX_SEC_PDU_IVINSRTOFST(iv_offset));
1740 	sec_pdu->aadstart_cipherstop_hi = htobe32(
1741 	    V_CPL_TX_SEC_PDU_AADSTART(aad_start) |
1742 	    V_CPL_TX_SEC_PDU_AADSTOP(aad_stop) |
1743 	    V_CPL_TX_SEC_PDU_CIPHERSTART(cipher_start) |
1744 	    V_CPL_TX_SEC_PDU_CIPHERSTOP_HI(cipher_stop >> 4));
1745 	sec_pdu->cipherstop_lo_authinsert = htobe32(
1746 	    V_CPL_TX_SEC_PDU_CIPHERSTOP_LO(cipher_stop & 0xf) |
1747 	    V_CPL_TX_SEC_PDU_AUTHSTART(auth_start) |
1748 	    V_CPL_TX_SEC_PDU_AUTHSTOP(auth_stop) |
1749 	    V_CPL_TX_SEC_PDU_AUTHINSERT(auth_insert));
1750 
1751 	if (send_partial_ghash && last_ghash_frag) {
1752 		uint64_t aad_len, cipher_len;
1753 
1754 		aad_len = ktls_gcm_aad_len(tlsp);
1755 		cipher_len = rlen - (m_tls->m_epg_hdrlen + AES_GMAC_HASH_LEN);
1756 		sec_pdu->scmd1 = htobe64(aad_len << 44 | cipher_len);
1757 	} else
1758 		sec_pdu->scmd1 = htobe64(m_tls->m_epg_seqno);
1759 
1760 	/* Key context */
1761 	out = (void *)(sec_pdu + 1);
1762 	if (inline_key) {
1763 		memcpy(out, &tlsp->keyctx, tlsp->tx_key_info_size);
1764 		if (send_partial_ghash) {
1765 			struct tls_keyctx *keyctx = (void *)out;
1766 
1767 			keyctx->u.txhdr.ctxlen++;
1768 			keyctx->u.txhdr.dualck_to_txvalid &= ~htobe16(
1769 			    V_KEY_CONTEXT_MK_SIZE(M_KEY_CONTEXT_MK_SIZE));
1770 			keyctx->u.txhdr.dualck_to_txvalid |= htobe16(
1771 			    F_KEY_CONTEXT_OPAD_PRESENT |
1772 			    V_KEY_CONTEXT_MK_SIZE(0));
1773 		}
1774 		out += tlsp->tx_key_info_size;
1775 		if (send_partial_ghash) {
1776 			if (header_len != 0)
1777 				memset(out, 0, AES_GMAC_HASH_LEN);
1778 			else
1779 				memcpy(out, tlsp->ghash, AES_GMAC_HASH_LEN);
1780 			out += AES_GMAC_HASH_LEN;
1781 		}
1782 	} else {
1783 		/* ULPTX_SC_MEMRD to read key context. */
1784 		memrd = (void *)out;
1785 		memrd->cmd_to_len = htobe32(V_ULPTX_CMD(ULP_TX_SC_MEMRD) |
1786 		    V_ULP_TX_SC_MORE(1) |
1787 		    V_ULPTX_LEN16(tlsp->tx_key_info_size >> 4));
1788 		memrd->addr = htobe32(tlsp->tx_key_addr >> 5);
1789 
1790 		/* ULPTX_IDATA for CPL_TX_* and headers. */
1791 		idata = (void *)(memrd + 1);
1792 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1793 		    V_ULP_TX_SC_MORE(1));
1794 		idata->len = htobe32(post_key_context_len);
1795 
1796 		out = (void *)(idata + 1);
1797 	}
1798 
1799 	/* CPL_RX_PHYS_DSGL */
1800 	if (split_mode) {
1801 		crypto_hdr_len = sizeof(struct cpl_tx_pkt_core);
1802 		if (need_lso)
1803 			crypto_hdr_len += sizeof(struct cpl_tx_pkt_lso_core);
1804 		crypto_hdr_len += m->m_len;
1805 		out = write_split_mode_rx_phys(out, m, m_tls, crypto_hdr_len,
1806 		    leading_waste, trailing_waste);
1807 	}
1808 
1809 	/* CPL_TX_PKT_LSO */
1810 	if (need_lso) {
1811 		out = write_lso_cpl(out, m, mss, eh_type, m->m_len +
1812 		    m_tls->m_len);
1813 		txq->tso_wrs++;
1814 	}
1815 
1816 	/* CPL_TX_PKT_XT */
1817 	tx_pkt = (void *)out;
1818 	tx_pkt->ctrl0 = txq->cpl_ctrl0;
1819 	tx_pkt->ctrl1 = htobe64(pkt_ctrl1(txq, m, eh_type));
1820 	tx_pkt->pack = 0;
1821 	tx_pkt->len = htobe16(m->m_len + m_tls->m_len);
1822 
1823 	/* Copy the packet headers. */
1824 	out = (void *)(tx_pkt + 1);
1825 	memcpy(out, mtod(m, char *), m->m_len);
1826 
1827 	/* Modify the packet length in the IP header. */
1828 	ip_len = m->m_len + m_tls->m_len - m->m_pkthdr.l2hlen;
1829 	if (eh_type == ETHERTYPE_IP) {
1830 		ip = (void *)(out + m->m_pkthdr.l2hlen);
1831 		be16enc(&ip->ip_len, ip_len);
1832 	} else {
1833 		ip6 = (void *)(out + m->m_pkthdr.l2hlen);
1834 		be16enc(&ip6->ip6_plen, ip_len - sizeof(*ip6));
1835 	}
1836 
1837 	/* Modify sequence number and flags in TCP header. */
1838 	newtcp = (void *)(out + m->m_pkthdr.l2hlen + m->m_pkthdr.l3hlen);
1839 	be32enc(&newtcp->th_seq, tcp_seqno);
1840 	if (!last_wr)
1841 		newtcp->th_flags = tcp->th_flags & ~(TH_PUSH | TH_FIN);
1842 	out += m->m_len;
1843 
1844 	/*
1845 	 * Insert placeholder for sequence number as IV for TLS 1.3
1846 	 * non-short records.
1847 	 */
1848 	if (tlsp->tls13 && !short_record) {
1849 		memset(out, 0, sizeof(uint64_t));
1850 		out += sizeof(uint64_t);
1851 	}
1852 
1853 	/* Populate the TLS header */
1854 	memcpy(out, m_tls->m_epg_hdr, header_len);
1855 	out += header_len;
1856 
1857 	/* TLS AAD for short records using a partial hash. */
1858 	if (send_partial_ghash && header_len != 0) {
1859 		if (tlsp->tls13) {
1860 			struct tls_aead_data_13 ad;
1861 
1862 			ad.type = hdr->tls_type;
1863 			ad.tls_vmajor = hdr->tls_vmajor;
1864 			ad.tls_vminor = hdr->tls_vminor;
1865 			ad.tls_length = hdr->tls_length;
1866 			memcpy(out, &ad, sizeof(ad));
1867 			out += sizeof(ad);
1868 		} else {
1869 			struct tls_aead_data ad;
1870 			uint16_t cipher_len;
1871 
1872 			cipher_len = rlen -
1873 			    (m_tls->m_epg_hdrlen + AES_GMAC_HASH_LEN);
1874 			ad.seq = htobe64(m_tls->m_epg_seqno);
1875 			ad.type = hdr->tls_type;
1876 			ad.tls_vmajor = hdr->tls_vmajor;
1877 			ad.tls_vminor = hdr->tls_vminor;
1878 			ad.tls_length = htons(cipher_len);
1879 			memcpy(out, &ad, sizeof(ad));
1880 			out += sizeof(ad);
1881 		}
1882 	}
1883 
1884 	/* AES IV for a short record. */
1885 	if (short_record) {
1886 		iv = out;
1887 		if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM) {
1888 			memcpy(iv, tlsp->keyctx.u.txhdr.txsalt, SALT_SIZE);
1889 			if (tlsp->tls13) {
1890 				uint64_t value;
1891 
1892 				value = be64dec(tlsp->keyctx.u.txhdr.txsalt +
1893 				    4);
1894 				value ^= m_tls->m_epg_seqno;
1895 				be64enc(iv + 4, value);
1896 			} else
1897 				memcpy(iv + 4, hdr + 1, 8);
1898 			if (send_partial_ghash)
1899 				be32enc(iv + 12, 1 + offset / AES_BLOCK_LEN);
1900 			else
1901 				be32enc(iv + 12, 2 + offset / AES_BLOCK_LEN);
1902 		} else
1903 			memcpy(iv, hdr + 1, AES_BLOCK_LEN);
1904 		out += AES_BLOCK_LEN;
1905 	}
1906 
1907 	if (imm_len % 16 != 0) {
1908 		if (imm_len % 8 != 0) {
1909 			/* Zero pad to an 8-byte boundary. */
1910 			memset(out, 0, 8 - (imm_len % 8));
1911 			out += 8 - (imm_len % 8);
1912 		}
1913 
1914 		/*
1915 		 * Insert a ULP_TX_SC_NOOP if needed so the SGL is
1916 		 * 16-byte aligned.
1917 		 */
1918 		if (imm_len % 16 <= 8) {
1919 			idata = (void *)out;
1920 			idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP) |
1921 			    V_ULP_TX_SC_MORE(1));
1922 			idata->len = htobe32(0);
1923 			out = (void *)(idata + 1);
1924 		}
1925 	}
1926 
1927 	/* SGL for record payload */
1928 	sglist_reset(txq->gl);
1929 	if (sglist_append_mbuf_epg(txq->gl, m_tls, m_tls->m_epg_hdrlen + offset,
1930 	    plen) != 0) {
1931 #ifdef INVARIANTS
1932 		panic("%s: failed to append sglist", __func__);
1933 #endif
1934 	}
1935 	if (last_ghash_frag) {
1936 		if (sglist_append_phys(txq->gl, zero_buffer_pa,
1937 		    AES_GMAC_HASH_LEN) != 0) {
1938 #ifdef INVARIANTS
1939 			panic("%s: failed to append sglist (2)", __func__);
1940 #endif
1941 		}
1942 	}
1943 	out = write_gl_to_buf(txq->gl, out);
1944 
1945 	if (request_ghash) {
1946 		/* ULP_TXPKT */
1947 		txpkt = (void *)out;
1948 		txpkt->cmd_dest = htobe32(V_ULPTX_CMD(ULP_TX_PKT) |
1949 		    V_ULP_TXPKT_DATAMODIFY(0) |
1950 		    V_T7_ULP_TXPKT_CHANNELID(tlsp->vi->pi->port_id) |
1951 		    V_ULP_TXPKT_DEST(0) |
1952 		    V_ULP_TXPKT_FID(txq->eq.cntxt_id) | V_ULP_TXPKT_RO(1));
1953 		txpkt->len = htobe32(howmany(txpkt_lens[1], 16));
1954 
1955 		/* ULPTX_IDATA sub-command */
1956 		idata = (void *)(txpkt + 1);
1957 		idata->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM) |
1958 		    V_ULP_TX_SC_MORE(0));
1959 		idata->len = sizeof(struct cpl_tx_tls_ack);
1960 		idata->len += sizeof(struct rss_header) +
1961 		    sizeof(struct cpl_fw6_pld);
1962 		idata->len += AES_GMAC_HASH_LEN;
1963 		idata->len = htobe32(idata->len);
1964 		out = (void *)(idata + 1);
1965 
1966 		/* CPL_TX_TLS_ACK */
1967 		out = write_tx_tls_ack(out, tlsp->rx_chid, AES_GMAC_HASH_LEN,
1968 		    ghash_lcb);
1969 
1970 		/* CPL_FW6_PLD */
1971 		out = write_fw6_pld(out, tlsp->rx_chid, tlsp->rx_qid,
1972 		    AES_GMAC_HASH_LEN, (uintptr_t)tlsp | CPL_FW6_COOKIE_KTLS);
1973 
1974 		/* Space for partial hash. */
1975 		memset(out, 0, AES_GMAC_HASH_LEN);
1976 		out += AES_GMAC_HASH_LEN;
1977 
1978 		tlsp->ghash_pending = true;
1979 		tlsp->ghash_valid = false;
1980 		tlsp->ghash_lcb = ghash_lcb;
1981 		if (last_ghash_frag)
1982 			tlsp->ghash_offset = offset + plen;
1983 		else
1984 			tlsp->ghash_offset = rounddown2(offset + plen,
1985 			    GMAC_BLOCK_LEN);
1986 #ifdef VERBOSE_TRACES
1987 		CTR(KTR_CXGBE, "%s: %p requesting GHASH for offset %u",
1988 		    __func__, tlsp, tlsp->ghash_offset);
1989 #endif
1990 		m_snd_tag_ref(&tlsp->com);
1991 
1992 		txq->kern_tls_ghash_requested++;
1993 	}
1994 
1995 	if (using_scratch) {
1996 		out = dst;
1997 		copy_to_txd(eq, txq->ss, &out, wr_len);
1998 	}
1999 
2000 	txq->kern_tls_records++;
2001 	txq->kern_tls_octets += m_tls->m_len;
2002 	if (split_mode) {
2003 		txq->kern_tls_splitmode++;
2004 		txq->kern_tls_waste += leading_waste + trailing_waste;
2005 	}
2006 	if (need_lso)
2007 		txq->kern_tls_lso++;
2008 
2009 	txsd = &txq->sdesc[pidx];
2010 	if (last_wr)
2011 		txsd->m = m;
2012 	else
2013 		txsd->m = NULL;
2014 	txsd->desc_used = ndesc;
2015 
2016 	return (ndesc);
2017 }
2018 
2019 int
t7_ktls_write_wr(struct sge_txq * txq,void * dst,struct mbuf * m,u_int available)2020 t7_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m,
2021     u_int available)
2022 {
2023 	struct sge_eq *eq = &txq->eq;
2024 	struct tlspcb *tlsp;
2025 	struct tcphdr *tcp;
2026 	struct mbuf *m_tls;
2027 	struct ether_header *eh;
2028 	tcp_seq tcp_seqno;
2029 	u_int ndesc, pidx, totdesc;
2030 	uint16_t eh_type, mss;
2031 
2032 	TXQ_LOCK_ASSERT_OWNED(txq);
2033 	M_ASSERTPKTHDR(m);
2034 	MPASS(m->m_pkthdr.snd_tag != NULL);
2035 	tlsp = mst_to_tls(m->m_pkthdr.snd_tag);
2036 
2037 	totdesc = 0;
2038 	eh = mtod(m, struct ether_header *);
2039 	eh_type = ntohs(eh->ether_type);
2040 	if (eh_type == ETHERTYPE_VLAN) {
2041 		struct ether_vlan_header *evh = (void *)eh;
2042 
2043 		eh_type = ntohs(evh->evl_proto);
2044 	}
2045 
2046 	tcp = (struct tcphdr *)((char *)eh + m->m_pkthdr.l2hlen +
2047 	    m->m_pkthdr.l3hlen);
2048 	pidx = eq->pidx;
2049 
2050 	/* Determine MSS. */
2051 	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2052 		mss = m->m_pkthdr.tso_segsz;
2053 		tlsp->prev_mss = mss;
2054 	} else if (tlsp->prev_mss != 0)
2055 		mss = tlsp->prev_mss;
2056 	else
2057 		mss = if_getmtu(tlsp->vi->ifp) -
2058 		    (m->m_pkthdr.l3hlen + m->m_pkthdr.l4hlen);
2059 
2060 	/* Fetch the starting TCP sequence number for this chain. */
2061 	tcp_seqno = ntohl(tcp->th_seq);
2062 #ifdef VERBOSE_TRACES
2063 	CTR(KTR_CXGBE, "%s: pkt len %d TCP seq %u", __func__, m->m_pkthdr.len,
2064 	    tcp_seqno);
2065 #endif
2066 	KASSERT(!tlsp->ghash_pending, ("%s: GHASH pending for send", __func__));
2067 
2068 	/*
2069 	 * Iterate over each TLS record constructing a work request
2070 	 * for that record.
2071 	 */
2072 	for (m_tls = m->m_next; m_tls != NULL; m_tls = m_tls->m_next) {
2073 		MPASS(m_tls->m_flags & M_EXTPG);
2074 
2075 		ndesc = ktls_write_tls_wr(tlsp, txq, dst, m, tcp, m_tls,
2076 		    available - totdesc, tcp_seqno, pidx, eh_type, mss);
2077 		totdesc += ndesc;
2078 		IDXINCR(pidx, ndesc, eq->sidx);
2079 		dst = &eq->desc[pidx];
2080 
2081 		tcp_seqno += m_tls->m_len;
2082 	}
2083 
2084 	/*
2085 	 * Queue another packet if this was a GCM request that didn't
2086 	 * request a GHASH response.
2087 	 */
2088 	if (tlsp->enc_mode == SCMD_CIPH_MODE_AES_GCM && !tlsp->ghash_pending)
2089 		ktls_queue_next_packet(tlsp, true);
2090 
2091 	MPASS(totdesc <= available);
2092 	return (totdesc);
2093 }
2094 
2095 static void
t7_tls_tag_free(struct m_snd_tag * mst)2096 t7_tls_tag_free(struct m_snd_tag *mst)
2097 {
2098 	struct adapter *sc;
2099 	struct tlspcb *tlsp;
2100 
2101 	tlsp = mst_to_tls(mst);
2102 	sc = tlsp->sc;
2103 
2104 	CTR2(KTR_CXGBE, "%s: %p", __func__, tlsp);
2105 
2106 	if (tlsp->tx_key_addr >= 0)
2107 		t4_free_tls_keyid(sc, tlsp->tx_key_addr);
2108 
2109 	KASSERT(mbufq_len(&tlsp->pending_mbufs) == 0,
2110 	    ("%s: pending mbufs", __func__));
2111 
2112 	zfree(tlsp, M_CXGBE);
2113 }
2114 
2115 static int
ktls_fw6_pld(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)2116 ktls_fw6_pld(struct sge_iq *iq, const struct rss_header *rss,
2117     struct mbuf *m)
2118 {
2119 	const struct cpl_fw6_pld *cpl;
2120 	struct tlspcb *tlsp;
2121 	const void *ghash;
2122 
2123 	if (m != NULL)
2124 		cpl = mtod(m, const void *);
2125 	else
2126 		cpl = (const void *)(rss + 1);
2127 
2128 	tlsp = (struct tlspcb *)(uintptr_t)CPL_FW6_PLD_COOKIE(cpl);
2129 	KASSERT(cpl->data[0] == 0, ("%s: error status returned", __func__));
2130 
2131 	TXQ_LOCK(tlsp->txq);
2132 #ifdef VERBOSE_TRACES
2133 	CTR(KTR_CXGBE, "%s: %p received GHASH for offset %u%s", __func__, tlsp,
2134 	    tlsp->ghash_offset, tlsp->ghash_lcb ? " in LCB" : "");
2135 #endif
2136 	if (tlsp->ghash_lcb)
2137 		ghash = &cpl->data[2];
2138 	else
2139 		ghash = cpl + 1;
2140 	memcpy(tlsp->ghash, ghash, AES_GMAC_HASH_LEN);
2141 	tlsp->ghash_valid = true;
2142 	tlsp->ghash_pending = false;
2143 	tlsp->txq->kern_tls_ghash_received++;
2144 
2145 	ktls_queue_next_packet(tlsp, false);
2146 	TXQ_UNLOCK(tlsp->txq);
2147 
2148 	m_snd_tag_rele(&tlsp->com);
2149 	m_freem(m);
2150 	return (0);
2151 }
2152 
2153 void
t7_ktls_modload(void)2154 t7_ktls_modload(void)
2155 {
2156 	zero_buffer = malloc_aligned(AES_GMAC_HASH_LEN, AES_GMAC_HASH_LEN,
2157 	    M_CXGBE, M_ZERO | M_WAITOK);
2158 	zero_buffer_pa = vtophys(zero_buffer);
2159 	t4_register_shared_cpl_handler(CPL_FW6_PLD, ktls_fw6_pld,
2160 	    CPL_FW6_COOKIE_KTLS);
2161 }
2162 
2163 void
t7_ktls_modunload(void)2164 t7_ktls_modunload(void)
2165 {
2166 	free(zero_buffer, M_CXGBE);
2167 	t4_register_shared_cpl_handler(CPL_FW6_PLD, NULL, CPL_FW6_COOKIE_KTLS);
2168 }
2169 
2170 #else
2171 
2172 int
t7_tls_tag_alloc(struct ifnet * ifp,union if_snd_tag_alloc_params * params,struct m_snd_tag ** pt)2173 t7_tls_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params,
2174     struct m_snd_tag **pt)
2175 {
2176 	return (ENXIO);
2177 }
2178 
2179 int
t7_ktls_parse_pkt(struct mbuf * m)2180 t7_ktls_parse_pkt(struct mbuf *m)
2181 {
2182 	return (EINVAL);
2183 }
2184 
2185 int
t7_ktls_write_wr(struct sge_txq * txq,void * dst,struct mbuf * m,u_int available)2186 t7_ktls_write_wr(struct sge_txq *txq, void *dst, struct mbuf *m,
2187     u_int available)
2188 {
2189 	panic("can't happen");
2190 }
2191 
2192 void
t7_ktls_modload(void)2193 t7_ktls_modload(void)
2194 {
2195 }
2196 
2197 void
t7_ktls_modunload(void)2198 t7_ktls_modunload(void)
2199 {
2200 }
2201 
2202 #endif
2203