xref: /freebsd/lib/libnvmf/nvmf_tcp.c (revision 06b2ed7a3adf023d8a8ce821616052f03dc18ed6)
12da066efSJohn Baldwin /*-
22da066efSJohn Baldwin  * SPDX-License-Identifier: BSD-2-Clause
32da066efSJohn Baldwin  *
42da066efSJohn Baldwin  * Copyright (c) 2022-2024 Chelsio Communications, Inc.
52da066efSJohn Baldwin  * Written by: John Baldwin <jhb@FreeBSD.org>
62da066efSJohn Baldwin  */
72da066efSJohn Baldwin 
82da066efSJohn Baldwin #include <sys/endian.h>
92da066efSJohn Baldwin #include <sys/gsb_crc32.h>
102da066efSJohn Baldwin #include <sys/queue.h>
112da066efSJohn Baldwin #include <sys/uio.h>
122da066efSJohn Baldwin #include <assert.h>
132da066efSJohn Baldwin #include <errno.h>
142da066efSJohn Baldwin #include <stdio.h>
152da066efSJohn Baldwin #include <stdlib.h>
162da066efSJohn Baldwin #include <string.h>
172da066efSJohn Baldwin #include <unistd.h>
182da066efSJohn Baldwin 
192da066efSJohn Baldwin #include "libnvmf.h"
202da066efSJohn Baldwin #include "internal.h"
212da066efSJohn Baldwin #include "nvmf_tcp.h"
222da066efSJohn Baldwin 
232da066efSJohn Baldwin struct nvmf_tcp_qpair;
242da066efSJohn Baldwin 
252da066efSJohn Baldwin struct nvmf_tcp_command_buffer {
262da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp;
272da066efSJohn Baldwin 
282da066efSJohn Baldwin 	void	*data;
292da066efSJohn Baldwin 	size_t	data_len;
302da066efSJohn Baldwin 	size_t	data_xfered;
312da066efSJohn Baldwin 	uint32_t data_offset;
322da066efSJohn Baldwin 
332da066efSJohn Baldwin 	uint16_t cid;
342da066efSJohn Baldwin 	uint16_t ttag;
352da066efSJohn Baldwin 
362da066efSJohn Baldwin 	LIST_ENTRY(nvmf_tcp_command_buffer) link;
372da066efSJohn Baldwin };
382da066efSJohn Baldwin 
392da066efSJohn Baldwin LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer);
402da066efSJohn Baldwin 
412da066efSJohn Baldwin struct nvmf_tcp_association {
422da066efSJohn Baldwin 	struct nvmf_association na;
432da066efSJohn Baldwin 
442da066efSJohn Baldwin 	uint32_t ioccsz;
452da066efSJohn Baldwin };
462da066efSJohn Baldwin 
472da066efSJohn Baldwin struct nvmf_tcp_rxpdu {
482da066efSJohn Baldwin 	struct nvme_tcp_common_pdu_hdr *hdr;
492da066efSJohn Baldwin 	uint32_t data_len;
502da066efSJohn Baldwin };
512da066efSJohn Baldwin 
522da066efSJohn Baldwin struct nvmf_tcp_capsule {
532da066efSJohn Baldwin 	struct nvmf_capsule nc;
542da066efSJohn Baldwin 
552da066efSJohn Baldwin 	struct nvmf_tcp_rxpdu rx_pdu;
562da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
572da066efSJohn Baldwin 
582da066efSJohn Baldwin 	TAILQ_ENTRY(nvmf_tcp_capsule) link;
592da066efSJohn Baldwin };
602da066efSJohn Baldwin 
612da066efSJohn Baldwin struct nvmf_tcp_qpair {
622da066efSJohn Baldwin 	struct nvmf_qpair qp;
632da066efSJohn Baldwin 	int s;
642da066efSJohn Baldwin 
652da066efSJohn Baldwin 	uint8_t	txpda;
662da066efSJohn Baldwin 	uint8_t rxpda;
672da066efSJohn Baldwin 	bool header_digests;
682da066efSJohn Baldwin 	bool data_digests;
692da066efSJohn Baldwin 	uint32_t maxr2t;
702da066efSJohn Baldwin 	uint32_t maxh2cdata;
712da066efSJohn Baldwin 	uint32_t max_icd;	/* Host only */
722da066efSJohn Baldwin 	uint16_t next_ttag;	/* Controller only */
732da066efSJohn Baldwin 
742da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer_list tx_buffers;
752da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer_list rx_buffers;
762da066efSJohn Baldwin 	TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules;
772da066efSJohn Baldwin };
782da066efSJohn Baldwin 
792da066efSJohn Baldwin #define	TASSOC(nc)	((struct nvmf_tcp_association *)(na))
802da066efSJohn Baldwin #define	TCAP(nc)	((struct nvmf_tcp_capsule *)(nc))
812da066efSJohn Baldwin #define	CTCAP(nc)	((const struct nvmf_tcp_capsule *)(nc))
822da066efSJohn Baldwin #define	TQP(qp)		((struct nvmf_tcp_qpair *)(qp))
832da066efSJohn Baldwin 
842da066efSJohn Baldwin static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET];
852da066efSJohn Baldwin 
862da066efSJohn Baldwin static uint32_t
compute_digest(const void * buf,size_t len)872da066efSJohn Baldwin compute_digest(const void *buf, size_t len)
882da066efSJohn Baldwin {
892da066efSJohn Baldwin 	return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff);
902da066efSJohn Baldwin }
912da066efSJohn Baldwin 
922da066efSJohn Baldwin static struct nvmf_tcp_command_buffer *
tcp_alloc_command_buffer(struct nvmf_tcp_qpair * qp,void * data,uint32_t data_offset,size_t data_len,uint16_t cid,uint16_t ttag,bool receive)932da066efSJohn Baldwin tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data,
942da066efSJohn Baldwin     uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag,
952da066efSJohn Baldwin     bool receive)
962da066efSJohn Baldwin {
972da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
982da066efSJohn Baldwin 
992da066efSJohn Baldwin 	cb = malloc(sizeof(*cb));
1002da066efSJohn Baldwin 	cb->qp = qp;
1012da066efSJohn Baldwin 	cb->data = data;
1022da066efSJohn Baldwin 	cb->data_offset = data_offset;
1032da066efSJohn Baldwin 	cb->data_len = data_len;
1042da066efSJohn Baldwin 	cb->data_xfered = 0;
1052da066efSJohn Baldwin 	cb->cid = cid;
1062da066efSJohn Baldwin 	cb->ttag = ttag;
1072da066efSJohn Baldwin 
1082da066efSJohn Baldwin 	if (receive)
1092da066efSJohn Baldwin 		LIST_INSERT_HEAD(&qp->rx_buffers, cb, link);
1102da066efSJohn Baldwin 	else
1112da066efSJohn Baldwin 		LIST_INSERT_HEAD(&qp->tx_buffers, cb, link);
1122da066efSJohn Baldwin 	return (cb);
1132da066efSJohn Baldwin }
1142da066efSJohn Baldwin 
1152da066efSJohn Baldwin static struct nvmf_tcp_command_buffer *
tcp_find_command_buffer(struct nvmf_tcp_qpair * qp,uint16_t cid,uint16_t ttag,bool receive)1162da066efSJohn Baldwin tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
1172da066efSJohn Baldwin     bool receive)
1182da066efSJohn Baldwin {
1192da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer_list *list;
1202da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
1212da066efSJohn Baldwin 
1222da066efSJohn Baldwin 	list = receive ? &qp->rx_buffers : &qp->tx_buffers;
1232da066efSJohn Baldwin 	LIST_FOREACH(cb, list, link) {
1242da066efSJohn Baldwin 		if (cb->cid == cid && cb->ttag == ttag)
1252da066efSJohn Baldwin 			return (cb);
1262da066efSJohn Baldwin 	}
1272da066efSJohn Baldwin 	return (NULL);
1282da066efSJohn Baldwin }
1292da066efSJohn Baldwin 
1302da066efSJohn Baldwin static void
tcp_purge_command_buffer(struct nvmf_tcp_qpair * qp,uint16_t cid,uint16_t ttag,bool receive)1312da066efSJohn Baldwin tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
1322da066efSJohn Baldwin     bool receive)
1332da066efSJohn Baldwin {
1342da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
1352da066efSJohn Baldwin 
1362da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, cid, ttag, receive);
1372da066efSJohn Baldwin 	if (cb != NULL)
1382da066efSJohn Baldwin 		LIST_REMOVE(cb, link);
1392da066efSJohn Baldwin }
1402da066efSJohn Baldwin 
1412da066efSJohn Baldwin static void
tcp_free_command_buffer(struct nvmf_tcp_command_buffer * cb)1422da066efSJohn Baldwin tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb)
1432da066efSJohn Baldwin {
1442da066efSJohn Baldwin 	LIST_REMOVE(cb, link);
1452da066efSJohn Baldwin 	free(cb);
1462da066efSJohn Baldwin }
1472da066efSJohn Baldwin 
1482da066efSJohn Baldwin static int
nvmf_tcp_write_pdu(struct nvmf_tcp_qpair * qp,const void * pdu,size_t len)1492da066efSJohn Baldwin nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len)
1502da066efSJohn Baldwin {
1512da066efSJohn Baldwin 	ssize_t nwritten;
1522da066efSJohn Baldwin 	const char *cp;
1532da066efSJohn Baldwin 
1542da066efSJohn Baldwin 	cp = pdu;
1552da066efSJohn Baldwin 	while (len != 0) {
1562da066efSJohn Baldwin 		nwritten = write(qp->s, cp, len);
1572da066efSJohn Baldwin 		if (nwritten < 0)
1582da066efSJohn Baldwin 			return (errno);
1592da066efSJohn Baldwin 		len -= nwritten;
1602da066efSJohn Baldwin 		cp += nwritten;
1612da066efSJohn Baldwin 	}
1622da066efSJohn Baldwin 	return (0);
1632da066efSJohn Baldwin }
1642da066efSJohn Baldwin 
1652da066efSJohn Baldwin static int
nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair * qp,struct iovec * iov,u_int iovcnt,size_t len)1662da066efSJohn Baldwin nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov,
1672da066efSJohn Baldwin     u_int iovcnt, size_t len)
1682da066efSJohn Baldwin {
1692da066efSJohn Baldwin 	ssize_t nwritten;
1702da066efSJohn Baldwin 
1712da066efSJohn Baldwin 	for (;;) {
1722da066efSJohn Baldwin 		nwritten = writev(qp->s, iov, iovcnt);
1732da066efSJohn Baldwin 		if (nwritten < 0)
1742da066efSJohn Baldwin 			return (errno);
1752da066efSJohn Baldwin 
1762da066efSJohn Baldwin 		len -= nwritten;
1772da066efSJohn Baldwin 		if (len == 0)
1782da066efSJohn Baldwin 			return (0);
1792da066efSJohn Baldwin 
1802da066efSJohn Baldwin 		while (iov->iov_len <= (size_t)nwritten) {
1812da066efSJohn Baldwin 			nwritten -= iov->iov_len;
1822da066efSJohn Baldwin 			iovcnt--;
1832da066efSJohn Baldwin 			iov++;
1842da066efSJohn Baldwin 		}
1852da066efSJohn Baldwin 
1862da066efSJohn Baldwin 		iov->iov_base = (char *)iov->iov_base + nwritten;
1872da066efSJohn Baldwin 		iov->iov_len -= nwritten;
1882da066efSJohn Baldwin 	}
1892da066efSJohn Baldwin }
1902da066efSJohn Baldwin 
1912da066efSJohn Baldwin static void
nvmf_tcp_report_error(struct nvmf_association * na,struct nvmf_tcp_qpair * qp,uint16_t fes,uint32_t fei,const void * rx_pdu,size_t pdu_len,u_int hlen)1922da066efSJohn Baldwin nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
1932da066efSJohn Baldwin     uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen)
1942da066efSJohn Baldwin {
1952da066efSJohn Baldwin 	struct nvme_tcp_term_req_hdr hdr;
1962da066efSJohn Baldwin 	struct iovec iov[2];
1972da066efSJohn Baldwin 
1982da066efSJohn Baldwin 	if (hlen != 0) {
1992da066efSJohn Baldwin 		if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE)
2002da066efSJohn Baldwin 			hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE;
2012da066efSJohn Baldwin 		if (hlen > pdu_len)
2022da066efSJohn Baldwin 			hlen = pdu_len;
2032da066efSJohn Baldwin 	}
2042da066efSJohn Baldwin 
2052da066efSJohn Baldwin 	memset(&hdr, 0, sizeof(hdr));
2062da066efSJohn Baldwin 	hdr.common.pdu_type = na->na_controller ?
2072da066efSJohn Baldwin 	    NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
2082da066efSJohn Baldwin 	hdr.common.hlen = sizeof(hdr);
2092da066efSJohn Baldwin 	hdr.common.plen = sizeof(hdr) + hlen;
2102da066efSJohn Baldwin 	hdr.fes = htole16(fes);
2112da066efSJohn Baldwin 	le32enc(hdr.fei, fei);
2122da066efSJohn Baldwin 	iov[0].iov_base = &hdr;
2132da066efSJohn Baldwin 	iov[0].iov_len = sizeof(hdr);
2142da066efSJohn Baldwin 	iov[1].iov_base = __DECONST(void *, rx_pdu);
2152da066efSJohn Baldwin 	iov[1].iov_len = hlen;
2162da066efSJohn Baldwin 
2172da066efSJohn Baldwin 	(void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen);
2182da066efSJohn Baldwin 	close(qp->s);
2192da066efSJohn Baldwin 	qp->s = -1;
2202da066efSJohn Baldwin }
2212da066efSJohn Baldwin 
2222da066efSJohn Baldwin static int
nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu,size_t pdu_len)2232da066efSJohn Baldwin nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu,
2242da066efSJohn Baldwin     size_t pdu_len)
2252da066efSJohn Baldwin {
2262da066efSJohn Baldwin 	const struct nvme_tcp_common_pdu_hdr *ch;
2272da066efSJohn Baldwin 	uint32_t data_len, fei, plen;
2282da066efSJohn Baldwin 	uint32_t digest, rx_digest;
2292da066efSJohn Baldwin 	u_int hlen;
2302da066efSJohn Baldwin 	int error;
2312da066efSJohn Baldwin 	uint16_t fes;
2322da066efSJohn Baldwin 
2332da066efSJohn Baldwin 	/* Determine how large of a PDU header to return for errors. */
2342da066efSJohn Baldwin 	ch = pdu->hdr;
2352da066efSJohn Baldwin 	hlen = ch->hlen;
2362da066efSJohn Baldwin 	plen = le32toh(ch->plen);
2372da066efSJohn Baldwin 	if (hlen < sizeof(*ch) || hlen > plen)
2382da066efSJohn Baldwin 		hlen = sizeof(*ch);
2392da066efSJohn Baldwin 
2402da066efSJohn Baldwin 	error = nvmf_tcp_validate_pdu_header(ch,
2412da066efSJohn Baldwin 	    qp->qp.nq_association->na_controller, qp->header_digests,
2422da066efSJohn Baldwin 	    qp->data_digests, qp->rxpda, &data_len, &fes, &fei);
2432da066efSJohn Baldwin 	if (error != 0) {
2442da066efSJohn Baldwin 		if (error == ECONNRESET) {
2452da066efSJohn Baldwin 			close(qp->s);
2462da066efSJohn Baldwin 			qp->s = -1;
2472da066efSJohn Baldwin 		} else {
2482da066efSJohn Baldwin 			nvmf_tcp_report_error(qp->qp.nq_association, qp,
2492da066efSJohn Baldwin 			    fes, fei, ch, pdu_len, hlen);
2502da066efSJohn Baldwin 		}
2512da066efSJohn Baldwin 		return (error);
2522da066efSJohn Baldwin 	}
2532da066efSJohn Baldwin 
2542da066efSJohn Baldwin 	/* Check header digest if present. */
2552da066efSJohn Baldwin 	if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) {
2562da066efSJohn Baldwin 		digest = compute_digest(ch, ch->hlen);
2572da066efSJohn Baldwin 		memcpy(&rx_digest, (const char *)ch + ch->hlen,
2582da066efSJohn Baldwin 		    sizeof(rx_digest));
2592da066efSJohn Baldwin 		if (digest != rx_digest) {
2602da066efSJohn Baldwin 			printf("NVMe/TCP: Header digest mismatch\n");
2612da066efSJohn Baldwin 			nvmf_tcp_report_error(qp->qp.nq_association, qp,
2622da066efSJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch,
2632da066efSJohn Baldwin 			    pdu_len, hlen);
2642da066efSJohn Baldwin 			return (EBADMSG);
2652da066efSJohn Baldwin 		}
2662da066efSJohn Baldwin 	}
2672da066efSJohn Baldwin 
2682da066efSJohn Baldwin 	/* Check data digest if present. */
2692da066efSJohn Baldwin 	if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) {
2702da066efSJohn Baldwin 		digest = compute_digest((const char *)ch + ch->pdo, data_len);
2712da066efSJohn Baldwin 		memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest),
2722da066efSJohn Baldwin 		    sizeof(rx_digest));
2732da066efSJohn Baldwin 		if (digest != rx_digest) {
2742da066efSJohn Baldwin 			printf("NVMe/TCP: Data digest mismatch\n");
2752da066efSJohn Baldwin 			return (EBADMSG);
2762da066efSJohn Baldwin 		}
2772da066efSJohn Baldwin 	}
2782da066efSJohn Baldwin 
2792da066efSJohn Baldwin 	pdu->data_len = data_len;
2802da066efSJohn Baldwin 	return (0);
2812da066efSJohn Baldwin }
2822da066efSJohn Baldwin 
2832da066efSJohn Baldwin /*
2842da066efSJohn Baldwin  * Read data from a socket, retrying until the data has been fully
2852da066efSJohn Baldwin  * read or an error occurs.
2862da066efSJohn Baldwin  */
2872da066efSJohn Baldwin static int
nvmf_tcp_read_buffer(int s,void * buf,size_t len)2882da066efSJohn Baldwin nvmf_tcp_read_buffer(int s, void *buf, size_t len)
2892da066efSJohn Baldwin {
2902da066efSJohn Baldwin 	ssize_t nread;
2912da066efSJohn Baldwin 	char *cp;
2922da066efSJohn Baldwin 
2932da066efSJohn Baldwin 	cp = buf;
2942da066efSJohn Baldwin 	while (len != 0) {
2952da066efSJohn Baldwin 		nread = read(s, cp, len);
2962da066efSJohn Baldwin 		if (nread < 0)
2972da066efSJohn Baldwin 			return (errno);
2982da066efSJohn Baldwin 		if (nread == 0)
2992da066efSJohn Baldwin 			return (ECONNRESET);
3002da066efSJohn Baldwin 		len -= nread;
3012da066efSJohn Baldwin 		cp += nread;
3022da066efSJohn Baldwin 	}
3032da066efSJohn Baldwin 	return (0);
3042da066efSJohn Baldwin }
3052da066efSJohn Baldwin 
3062da066efSJohn Baldwin static int
nvmf_tcp_read_pdu(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu)3072da066efSJohn Baldwin nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
3082da066efSJohn Baldwin {
3092da066efSJohn Baldwin 	struct nvme_tcp_common_pdu_hdr ch;
3102da066efSJohn Baldwin 	uint32_t plen;
3112da066efSJohn Baldwin 	int error;
3122da066efSJohn Baldwin 
3132da066efSJohn Baldwin 	memset(pdu, 0, sizeof(*pdu));
3142da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch));
3152da066efSJohn Baldwin 	if (error != 0)
3162da066efSJohn Baldwin 		return (error);
3172da066efSJohn Baldwin 
3182da066efSJohn Baldwin 	plen = le32toh(ch.plen);
3192da066efSJohn Baldwin 
3202da066efSJohn Baldwin 	/*
3212da066efSJohn Baldwin 	 * Validate a header with garbage lengths to trigger
3222da066efSJohn Baldwin 	 * an error message without reading more.
3232da066efSJohn Baldwin 	 */
3242da066efSJohn Baldwin 	if (plen < sizeof(ch) || ch.hlen > plen) {
3252da066efSJohn Baldwin 		pdu->hdr = &ch;
3262da066efSJohn Baldwin 		error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch));
3272da066efSJohn Baldwin 		pdu->hdr = NULL;
3282da066efSJohn Baldwin 		assert(error != 0);
3292da066efSJohn Baldwin 		return (error);
3302da066efSJohn Baldwin 	}
3312da066efSJohn Baldwin 
3322da066efSJohn Baldwin 	/* Read the rest of the PDU. */
3332da066efSJohn Baldwin 	pdu->hdr = malloc(plen);
3342da066efSJohn Baldwin 	memcpy(pdu->hdr, &ch, sizeof(ch));
3352da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch));
3362da066efSJohn Baldwin 	if (error != 0)
3372da066efSJohn Baldwin 		return (error);
3382da066efSJohn Baldwin 	error = nvmf_tcp_validate_pdu(qp, pdu, plen);
3392da066efSJohn Baldwin 	if (error != 0) {
3402da066efSJohn Baldwin 		free(pdu->hdr);
3412da066efSJohn Baldwin 		pdu->hdr = NULL;
3422da066efSJohn Baldwin 	}
3432da066efSJohn Baldwin 	return (error);
3442da066efSJohn Baldwin }
3452da066efSJohn Baldwin 
3462da066efSJohn Baldwin static void
nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu * pdu)3472da066efSJohn Baldwin nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu)
3482da066efSJohn Baldwin {
3492da066efSJohn Baldwin 	free(pdu->hdr);
3502da066efSJohn Baldwin 	pdu->hdr = NULL;
3512da066efSJohn Baldwin }
3522da066efSJohn Baldwin 
3532da066efSJohn Baldwin static int
nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu * pdu)3542da066efSJohn Baldwin nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu)
3552da066efSJohn Baldwin {
3562da066efSJohn Baldwin 	struct nvme_tcp_term_req_hdr *hdr;
3572da066efSJohn Baldwin 
3582da066efSJohn Baldwin 	hdr = (void *)pdu->hdr;
3592da066efSJohn Baldwin 
3602da066efSJohn Baldwin 	printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n",
3612da066efSJohn Baldwin 	    le16toh(hdr->fes), le32dec(hdr->fei));
3622da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
3632da066efSJohn Baldwin 	return (ECONNRESET);
3642da066efSJohn Baldwin }
3652da066efSJohn Baldwin 
3662da066efSJohn Baldwin static int
nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu)3672da066efSJohn Baldwin nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp,
3682da066efSJohn Baldwin     struct nvmf_tcp_rxpdu *pdu)
3692da066efSJohn Baldwin {
3702da066efSJohn Baldwin 	struct nvme_tcp_cmd *cmd;
3712da066efSJohn Baldwin 	struct nvmf_capsule *nc;
3722da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc;
3732da066efSJohn Baldwin 
3742da066efSJohn Baldwin 	cmd = (void *)pdu->hdr;
3752da066efSJohn Baldwin 
3762da066efSJohn Baldwin 	nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe);
3772da066efSJohn Baldwin 	if (nc == NULL)
3782da066efSJohn Baldwin 		return (ENOMEM);
3792da066efSJohn Baldwin 
3802da066efSJohn Baldwin 	tc = TCAP(nc);
3812da066efSJohn Baldwin 	tc->rx_pdu = *pdu;
3822da066efSJohn Baldwin 
3832da066efSJohn Baldwin 	TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
3842da066efSJohn Baldwin 	return (0);
3852da066efSJohn Baldwin }
3862da066efSJohn Baldwin 
3872da066efSJohn Baldwin static int
nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu)3882da066efSJohn Baldwin nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp,
3892da066efSJohn Baldwin     struct nvmf_tcp_rxpdu *pdu)
3902da066efSJohn Baldwin {
3912da066efSJohn Baldwin 	struct nvme_tcp_rsp *rsp;
3922da066efSJohn Baldwin 	struct nvmf_capsule *nc;
3932da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc;
3942da066efSJohn Baldwin 
3952da066efSJohn Baldwin 	rsp = (void *)pdu->hdr;
3962da066efSJohn Baldwin 
3972da066efSJohn Baldwin 	nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe);
3982da066efSJohn Baldwin 	if (nc == NULL)
3992da066efSJohn Baldwin 		return (ENOMEM);
4002da066efSJohn Baldwin 
4012da066efSJohn Baldwin 	nc->nc_sqhd_valid = true;
4022da066efSJohn Baldwin 	tc = TCAP(nc);
4032da066efSJohn Baldwin 	tc->rx_pdu = *pdu;
4042da066efSJohn Baldwin 
4052da066efSJohn Baldwin 	TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
4062da066efSJohn Baldwin 
4072da066efSJohn Baldwin 	/*
4082da066efSJohn Baldwin 	 * Once the CQE has been received, no further transfers to the
4092da066efSJohn Baldwin 	 * command buffer for the associated CID can occur.
4102da066efSJohn Baldwin 	 */
4112da066efSJohn Baldwin 	tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true);
4122da066efSJohn Baldwin 	tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false);
4132da066efSJohn Baldwin 
4142da066efSJohn Baldwin 	return (0);
4152da066efSJohn Baldwin }
4162da066efSJohn Baldwin 
4172da066efSJohn Baldwin /*
4182da066efSJohn Baldwin  * Construct and send a PDU that contains an optional data payload.
4192da066efSJohn Baldwin  * This includes dealing with digests and the length fields in the
4202da066efSJohn Baldwin  * common header.
4212da066efSJohn Baldwin  */
4222da066efSJohn Baldwin static int
nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair * qp,void * hdr,size_t hlen,void * data,uint32_t data_len)4232da066efSJohn Baldwin nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen,
4242da066efSJohn Baldwin     void *data, uint32_t data_len)
4252da066efSJohn Baldwin {
4262da066efSJohn Baldwin 	struct nvme_tcp_common_pdu_hdr *ch;
4272da066efSJohn Baldwin 	struct iovec iov[5];
4282da066efSJohn Baldwin 	u_int iovcnt;
4292da066efSJohn Baldwin 	uint32_t header_digest, data_digest, pad, pdo, plen;
4302da066efSJohn Baldwin 
4312da066efSJohn Baldwin 	plen = hlen;
4322da066efSJohn Baldwin 	if (qp->header_digests)
4332da066efSJohn Baldwin 		plen += sizeof(header_digest);
4342da066efSJohn Baldwin 	if (data_len != 0) {
435*06b2ed7aSJohn Baldwin 		pdo = roundup(plen, qp->txpda);
4362da066efSJohn Baldwin 		pad = pdo - plen;
4372da066efSJohn Baldwin 		plen = pdo + data_len;
4382da066efSJohn Baldwin 		if (qp->data_digests)
4392da066efSJohn Baldwin 			plen += sizeof(data_digest);
4402da066efSJohn Baldwin 	} else {
4412da066efSJohn Baldwin 		assert(data == NULL);
4422da066efSJohn Baldwin 		pdo = 0;
4432da066efSJohn Baldwin 		pad = 0;
4442da066efSJohn Baldwin 	}
4452da066efSJohn Baldwin 
4462da066efSJohn Baldwin 	ch = hdr;
4472da066efSJohn Baldwin 	ch->hlen = hlen;
4482da066efSJohn Baldwin 	if (qp->header_digests)
4492da066efSJohn Baldwin 		ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF;
4502da066efSJohn Baldwin 	if (qp->data_digests && data_len != 0)
4512da066efSJohn Baldwin 		ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF;
4522da066efSJohn Baldwin 	ch->pdo = pdo;
4532da066efSJohn Baldwin 	ch->plen = htole32(plen);
4542da066efSJohn Baldwin 
4552da066efSJohn Baldwin 	/* CH + PSH */
4562da066efSJohn Baldwin 	iov[0].iov_base = hdr;
4572da066efSJohn Baldwin 	iov[0].iov_len = hlen;
4582da066efSJohn Baldwin 	iovcnt = 1;
4592da066efSJohn Baldwin 
4602da066efSJohn Baldwin 	/* HDGST */
4612da066efSJohn Baldwin 	if (qp->header_digests) {
4622da066efSJohn Baldwin 		header_digest = compute_digest(hdr, hlen);
4632da066efSJohn Baldwin 		iov[iovcnt].iov_base = &header_digest;
4642da066efSJohn Baldwin 		iov[iovcnt].iov_len = sizeof(header_digest);
4652da066efSJohn Baldwin 		iovcnt++;
4662da066efSJohn Baldwin 	}
4672da066efSJohn Baldwin 
4682da066efSJohn Baldwin 	if (pad != 0) {
4692da066efSJohn Baldwin 		/* PAD */
4702da066efSJohn Baldwin 		iov[iovcnt].iov_base = __DECONST(char *, zero_padding);
4712da066efSJohn Baldwin 		iov[iovcnt].iov_len = pad;
4722da066efSJohn Baldwin 		iovcnt++;
4732da066efSJohn Baldwin 	}
4742da066efSJohn Baldwin 
4752da066efSJohn Baldwin 	if (data_len != 0) {
4762da066efSJohn Baldwin 		/* DATA */
4772da066efSJohn Baldwin 		iov[iovcnt].iov_base = data;
4782da066efSJohn Baldwin 		iov[iovcnt].iov_len = data_len;
4792da066efSJohn Baldwin 		iovcnt++;
4802da066efSJohn Baldwin 
4812da066efSJohn Baldwin 		/* DDGST */
4822da066efSJohn Baldwin 		if (qp->data_digests) {
4832da066efSJohn Baldwin 			data_digest = compute_digest(data, data_len);
4842da066efSJohn Baldwin 			iov[iovcnt].iov_base = &data_digest;
4852da066efSJohn Baldwin 			iov[iovcnt].iov_len = sizeof(data_digest);
4862da066efSJohn Baldwin 			iovcnt++;
4872da066efSJohn Baldwin 		}
4882da066efSJohn Baldwin 	}
4892da066efSJohn Baldwin 
4902da066efSJohn Baldwin 	return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen));
4912da066efSJohn Baldwin }
4922da066efSJohn Baldwin 
4932da066efSJohn Baldwin static int
nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu)4942da066efSJohn Baldwin nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
4952da066efSJohn Baldwin {
4962da066efSJohn Baldwin 	struct nvme_tcp_h2c_data_hdr *h2c;
4972da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
4982da066efSJohn Baldwin 	uint32_t data_len, data_offset;
4992da066efSJohn Baldwin 	const char *icd;
5002da066efSJohn Baldwin 
5012da066efSJohn Baldwin 	h2c = (void *)pdu->hdr;
5022da066efSJohn Baldwin 	if (le32toh(h2c->datal) > qp->maxh2cdata) {
5032da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5042da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0,
5052da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5062da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5072da066efSJohn Baldwin 		return (EBADMSG);
5082da066efSJohn Baldwin 	}
5092da066efSJohn Baldwin 
5102da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true);
5112da066efSJohn Baldwin 	if (cb == NULL) {
5122da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5132da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5142da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr,
5152da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5162da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5172da066efSJohn Baldwin 		return (EBADMSG);
5182da066efSJohn Baldwin 	}
5192da066efSJohn Baldwin 
5202da066efSJohn Baldwin 	data_len = le32toh(h2c->datal);
5212da066efSJohn Baldwin 	if (data_len != pdu->data_len) {
5222da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5232da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5242da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr,
5252da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5262da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5272da066efSJohn Baldwin 		return (EBADMSG);
5282da066efSJohn Baldwin 	}
5292da066efSJohn Baldwin 
5302da066efSJohn Baldwin 	data_offset = le32toh(h2c->datao);
5312da066efSJohn Baldwin 	if (data_offset < cb->data_offset ||
5322da066efSJohn Baldwin 	    data_offset + data_len > cb->data_offset + cb->data_len) {
5332da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5342da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
5352da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5362da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5372da066efSJohn Baldwin 		return (EBADMSG);
5382da066efSJohn Baldwin 	}
5392da066efSJohn Baldwin 
5402da066efSJohn Baldwin 	if (data_offset != cb->data_offset + cb->data_xfered) {
5412da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5422da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
5432da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5442da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5452da066efSJohn Baldwin 		return (EBADMSG);
5462da066efSJohn Baldwin 	}
5472da066efSJohn Baldwin 
5482da066efSJohn Baldwin 	if ((cb->data_xfered + data_len == cb->data_len) !=
5492da066efSJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) {
5502da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5512da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
5522da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5532da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5542da066efSJohn Baldwin 		return (EBADMSG);
5552da066efSJohn Baldwin 	}
5562da066efSJohn Baldwin 
5572da066efSJohn Baldwin 	cb->data_xfered += data_len;
5582da066efSJohn Baldwin 	data_offset -= cb->data_offset;
5592da066efSJohn Baldwin 	icd = (const char *)pdu->hdr + pdu->hdr->pdo;
5602da066efSJohn Baldwin 	memcpy((char *)cb->data + data_offset, icd, data_len);
5612da066efSJohn Baldwin 
5622da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
5632da066efSJohn Baldwin 	return (0);
5642da066efSJohn Baldwin }
5652da066efSJohn Baldwin 
5662da066efSJohn Baldwin static int
nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu)5672da066efSJohn Baldwin nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
5682da066efSJohn Baldwin {
5692da066efSJohn Baldwin 	struct nvme_tcp_c2h_data_hdr *c2h;
5702da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
5712da066efSJohn Baldwin 	uint32_t data_len, data_offset;
5722da066efSJohn Baldwin 	const char *icd;
5732da066efSJohn Baldwin 
5742da066efSJohn Baldwin 	c2h = (void *)pdu->hdr;
5752da066efSJohn Baldwin 
5762da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true);
5772da066efSJohn Baldwin 	if (cb == NULL) {
5782da066efSJohn Baldwin 		/*
5792da066efSJohn Baldwin 		 * XXX: Could be PDU sequence error if cccid is for a
5802da066efSJohn Baldwin 		 * command that doesn't use a command buffer.
5812da066efSJohn Baldwin 		 */
5822da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5832da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5842da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr,
5852da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5862da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5872da066efSJohn Baldwin 		return (EBADMSG);
5882da066efSJohn Baldwin 	}
5892da066efSJohn Baldwin 
5902da066efSJohn Baldwin 	data_len = le32toh(c2h->datal);
5912da066efSJohn Baldwin 	if (data_len != pdu->data_len) {
5922da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
5932da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
5942da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr,
5952da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
5962da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
5972da066efSJohn Baldwin 		return (EBADMSG);
5982da066efSJohn Baldwin 	}
5992da066efSJohn Baldwin 
6002da066efSJohn Baldwin 	data_offset = le32toh(c2h->datao);
6012da066efSJohn Baldwin 	if (data_offset < cb->data_offset ||
6022da066efSJohn Baldwin 	    data_offset + data_len > cb->data_offset + cb->data_len) {
6032da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
6042da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
6052da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
6062da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
6072da066efSJohn Baldwin 		return (EBADMSG);
6082da066efSJohn Baldwin 	}
6092da066efSJohn Baldwin 
6102da066efSJohn Baldwin 	if (data_offset != cb->data_offset + cb->data_xfered) {
6112da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
6122da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
6132da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
6142da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
6152da066efSJohn Baldwin 		return (EBADMSG);
6162da066efSJohn Baldwin 	}
6172da066efSJohn Baldwin 
6182da066efSJohn Baldwin 	if ((cb->data_xfered + data_len == cb->data_len) !=
6192da066efSJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) {
6202da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
6212da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
6222da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
6232da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
6242da066efSJohn Baldwin 		return (EBADMSG);
6252da066efSJohn Baldwin 	}
6262da066efSJohn Baldwin 
6272da066efSJohn Baldwin 	cb->data_xfered += data_len;
6282da066efSJohn Baldwin 	data_offset -= cb->data_offset;
6292da066efSJohn Baldwin 	icd = (const char *)pdu->hdr + pdu->hdr->pdo;
6302da066efSJohn Baldwin 	memcpy((char *)cb->data + data_offset, icd, data_len);
6312da066efSJohn Baldwin 
6322da066efSJohn Baldwin 	if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
6332da066efSJohn Baldwin 		struct nvme_completion cqe;
6342da066efSJohn Baldwin 		struct nvmf_tcp_capsule *tc;
6352da066efSJohn Baldwin 		struct nvmf_capsule *nc;
6362da066efSJohn Baldwin 
6372da066efSJohn Baldwin 		memset(&cqe, 0, sizeof(cqe));
6382da066efSJohn Baldwin 		cqe.cid = cb->cid;
6392da066efSJohn Baldwin 
6402da066efSJohn Baldwin 		nc = nvmf_allocate_response(&qp->qp, &cqe);
6412da066efSJohn Baldwin 		if (nc == NULL) {
6422da066efSJohn Baldwin 			nvmf_tcp_free_pdu(pdu);
6432da066efSJohn Baldwin 			return (ENOMEM);
6442da066efSJohn Baldwin 		}
6452da066efSJohn Baldwin 		nc->nc_sqhd_valid = false;
6462da066efSJohn Baldwin 
6472da066efSJohn Baldwin 		tc = TCAP(nc);
6482da066efSJohn Baldwin 		TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link);
6492da066efSJohn Baldwin 	}
6502da066efSJohn Baldwin 
6512da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
6522da066efSJohn Baldwin 	return (0);
6532da066efSJohn Baldwin }
6542da066efSJohn Baldwin 
6552da066efSJohn Baldwin /* NB: cid and ttag and little-endian already. */
6562da066efSJohn Baldwin static int
tcp_send_h2c_pdu(struct nvmf_tcp_qpair * qp,uint16_t cid,uint16_t ttag,uint32_t data_offset,void * buf,size_t len,bool last_pdu)6572da066efSJohn Baldwin tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
6582da066efSJohn Baldwin     uint32_t data_offset, void *buf, size_t len, bool last_pdu)
6592da066efSJohn Baldwin {
6602da066efSJohn Baldwin 	struct nvme_tcp_h2c_data_hdr h2c;
6612da066efSJohn Baldwin 
6622da066efSJohn Baldwin 	memset(&h2c, 0, sizeof(h2c));
6632da066efSJohn Baldwin 	h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA;
6642da066efSJohn Baldwin 	if (last_pdu)
6652da066efSJohn Baldwin 		h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
6662da066efSJohn Baldwin 	h2c.cccid = cid;
6672da066efSJohn Baldwin 	h2c.ttag = ttag;
6682da066efSJohn Baldwin 	h2c.datao = htole32(data_offset);
6692da066efSJohn Baldwin 	h2c.datal = htole32(len);
6702da066efSJohn Baldwin 
6712da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len));
6722da066efSJohn Baldwin }
6732da066efSJohn Baldwin 
6742da066efSJohn Baldwin /* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */
6752da066efSJohn Baldwin static int
tcp_send_h2c_pdus(struct nvmf_tcp_qpair * qp,uint16_t cid,uint16_t ttag,uint32_t data_offset,void * buf,size_t len,bool last_pdu)6762da066efSJohn Baldwin tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
6772da066efSJohn Baldwin     uint32_t data_offset, void *buf, size_t len, bool last_pdu)
6782da066efSJohn Baldwin {
6792da066efSJohn Baldwin 	char *p;
6802da066efSJohn Baldwin 
6812da066efSJohn Baldwin 	p = buf;
6822da066efSJohn Baldwin 	while (len != 0) {
6832da066efSJohn Baldwin 		size_t todo;
6842da066efSJohn Baldwin 		int error;
6852da066efSJohn Baldwin 
6862da066efSJohn Baldwin 		todo = len;
6872da066efSJohn Baldwin 		if (todo > qp->maxh2cdata)
6882da066efSJohn Baldwin 			todo = qp->maxh2cdata;
6892da066efSJohn Baldwin 		error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo,
6902da066efSJohn Baldwin 		    last_pdu && todo == len);
6912da066efSJohn Baldwin 		if (error != 0)
6922da066efSJohn Baldwin 			return (error);
6932da066efSJohn Baldwin 		p += todo;
6942da066efSJohn Baldwin 		len -= todo;
6952da066efSJohn Baldwin 	}
6962da066efSJohn Baldwin 	return (0);
6972da066efSJohn Baldwin }
6982da066efSJohn Baldwin 
6992da066efSJohn Baldwin static int
nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair * qp,struct nvmf_tcp_rxpdu * pdu)7002da066efSJohn Baldwin nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu)
7012da066efSJohn Baldwin {
7022da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
7032da066efSJohn Baldwin 	struct nvme_tcp_r2t_hdr *r2t;
7042da066efSJohn Baldwin 	uint32_t data_len, data_offset;
7052da066efSJohn Baldwin 	int error;
7062da066efSJohn Baldwin 
7072da066efSJohn Baldwin 	r2t = (void *)pdu->hdr;
7082da066efSJohn Baldwin 
7092da066efSJohn Baldwin 	cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false);
7102da066efSJohn Baldwin 	if (cb == NULL) {
7112da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
7122da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
7132da066efSJohn Baldwin 		    offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr,
7142da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
7152da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
7162da066efSJohn Baldwin 		return (EBADMSG);
7172da066efSJohn Baldwin 	}
7182da066efSJohn Baldwin 
7192da066efSJohn Baldwin 	data_offset = le32toh(r2t->r2to);
7202da066efSJohn Baldwin 	if (data_offset != cb->data_xfered) {
7212da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
7222da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr,
7232da066efSJohn Baldwin 		    le32toh(pdu->hdr->plen), pdu->hdr->hlen);
7242da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
7252da066efSJohn Baldwin 		return (EBADMSG);
7262da066efSJohn Baldwin 	}
7272da066efSJohn Baldwin 
7282da066efSJohn Baldwin 	/*
7292da066efSJohn Baldwin 	 * XXX: The spec does not specify how to handle R2T tranfers
7302da066efSJohn Baldwin 	 * out of range of the original command.
7312da066efSJohn Baldwin 	 */
7322da066efSJohn Baldwin 	data_len = le32toh(r2t->r2tl);
7332da066efSJohn Baldwin 	if (data_offset + data_len > cb->data_len) {
7342da066efSJohn Baldwin 		nvmf_tcp_report_error(qp->qp.nq_association, qp,
7352da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
7362da066efSJohn Baldwin 		    pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen);
7372da066efSJohn Baldwin 		nvmf_tcp_free_pdu(pdu);
7382da066efSJohn Baldwin 		return (EBADMSG);
7392da066efSJohn Baldwin 	}
7402da066efSJohn Baldwin 
7412da066efSJohn Baldwin 	cb->data_xfered += data_len;
7422da066efSJohn Baldwin 
7432da066efSJohn Baldwin 	/*
7442da066efSJohn Baldwin 	 * Write out one or more H2C_DATA PDUs containing the
7452da066efSJohn Baldwin 	 * requested data.
7462da066efSJohn Baldwin 	 */
7472da066efSJohn Baldwin 	error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag,
7482da066efSJohn Baldwin 	    data_offset, (char *)cb->data + data_offset, data_len, true);
7492da066efSJohn Baldwin 
7502da066efSJohn Baldwin 	nvmf_tcp_free_pdu(pdu);
7512da066efSJohn Baldwin 	return (error);
7522da066efSJohn Baldwin }
7532da066efSJohn Baldwin 
7542da066efSJohn Baldwin static int
nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair * qp)7552da066efSJohn Baldwin nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp)
7562da066efSJohn Baldwin {
7572da066efSJohn Baldwin 	struct nvmf_tcp_rxpdu pdu;
7582da066efSJohn Baldwin 	int error;
7592da066efSJohn Baldwin 
7602da066efSJohn Baldwin 	error = nvmf_tcp_read_pdu(qp, &pdu);
7612da066efSJohn Baldwin 	if (error != 0)
7622da066efSJohn Baldwin 		return (error);
7632da066efSJohn Baldwin 
7642da066efSJohn Baldwin 	switch (pdu.hdr->pdu_type) {
7652da066efSJohn Baldwin 	default:
7662da066efSJohn Baldwin 		__unreachable();
7672da066efSJohn Baldwin 		break;
7682da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
7692da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
7702da066efSJohn Baldwin 		return (nvmf_tcp_handle_term_req(&pdu));
7712da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_CAPSULE_CMD:
7722da066efSJohn Baldwin 		return (nvmf_tcp_save_command_capsule(qp, &pdu));
7732da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_CAPSULE_RESP:
7742da066efSJohn Baldwin 		return (nvmf_tcp_save_response_capsule(qp, &pdu));
7752da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_H2C_DATA:
7762da066efSJohn Baldwin 		return (nvmf_tcp_handle_h2c_data(qp, &pdu));
7772da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_C2H_DATA:
7782da066efSJohn Baldwin 		return (nvmf_tcp_handle_c2h_data(qp, &pdu));
7792da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_R2T:
7802da066efSJohn Baldwin 		return (nvmf_tcp_handle_r2t(qp, &pdu));
7812da066efSJohn Baldwin 	}
7822da066efSJohn Baldwin }
7832da066efSJohn Baldwin 
7842da066efSJohn Baldwin static bool
nvmf_tcp_validate_ic_pdu(struct nvmf_association * na,struct nvmf_tcp_qpair * qp,const struct nvme_tcp_common_pdu_hdr * ch,size_t pdu_len)7852da066efSJohn Baldwin nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
7862da066efSJohn Baldwin     const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len)
7872da066efSJohn Baldwin {
7882da066efSJohn Baldwin 	const struct nvme_tcp_ic_req *pdu;
7892da066efSJohn Baldwin 	uint32_t plen;
7902da066efSJohn Baldwin 	u_int hlen;
7912da066efSJohn Baldwin 
7922da066efSJohn Baldwin 	/* Determine how large of a PDU header to return for errors. */
7932da066efSJohn Baldwin 	hlen = ch->hlen;
7942da066efSJohn Baldwin 	plen = le32toh(ch->plen);
7952da066efSJohn Baldwin 	if (hlen < sizeof(*ch) || hlen > plen)
7962da066efSJohn Baldwin 		hlen = sizeof(*ch);
7972da066efSJohn Baldwin 
7982da066efSJohn Baldwin 	/*
7992da066efSJohn Baldwin 	 * Errors must be reported for the lowest incorrect field
8002da066efSJohn Baldwin 	 * first, so validate fields in order.
8012da066efSJohn Baldwin 	 */
8022da066efSJohn Baldwin 
8032da066efSJohn Baldwin 	/* Validate pdu_type. */
8042da066efSJohn Baldwin 
8052da066efSJohn Baldwin 	/* Controllers only receive PDUs with a PDU direction of 0. */
806a7db82cfSJohn Baldwin 	if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) {
8072da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
8082da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8092da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
8102da066efSJohn Baldwin 		    hlen);
8112da066efSJohn Baldwin 		return (false);
8122da066efSJohn Baldwin 	}
8132da066efSJohn Baldwin 
8142da066efSJohn Baldwin 	switch (ch->pdu_type) {
8152da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_IC_REQ:
8162da066efSJohn Baldwin 	case NVME_TCP_PDU_TYPE_IC_RESP:
8172da066efSJohn Baldwin 		break;
8182da066efSJohn Baldwin 	default:
8192da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type);
8202da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8212da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len,
8222da066efSJohn Baldwin 		    hlen);
8232da066efSJohn Baldwin 		return (false);
8242da066efSJohn Baldwin 	}
8252da066efSJohn Baldwin 
8262da066efSJohn Baldwin 	/* Validate flags. */
8272da066efSJohn Baldwin 	if (ch->flags != 0) {
8282da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU header flags %#x",
8292da066efSJohn Baldwin 		    ch->flags);
8302da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8312da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len,
8322da066efSJohn Baldwin 		    hlen);
8332da066efSJohn Baldwin 		return (false);
8342da066efSJohn Baldwin 	}
8352da066efSJohn Baldwin 
8362da066efSJohn Baldwin 	/* Validate hlen. */
8372da066efSJohn Baldwin 	if (ch->hlen != 128) {
8382da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU header length %u",
8392da066efSJohn Baldwin 		    ch->hlen);
8402da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8412da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len,
8422da066efSJohn Baldwin 		    hlen);
8432da066efSJohn Baldwin 		return (false);
8442da066efSJohn Baldwin 	}
8452da066efSJohn Baldwin 
8462da066efSJohn Baldwin 	/* Validate pdo. */
8472da066efSJohn Baldwin 	if (ch->pdo != 0) {
8482da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo);
8492da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8502da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len,
8512da066efSJohn Baldwin 		    hlen);
8522da066efSJohn Baldwin 		return (false);
8532da066efSJohn Baldwin 	}
8542da066efSJohn Baldwin 
8552da066efSJohn Baldwin 	/* Validate plen. */
8562da066efSJohn Baldwin 	if (plen != 128) {
8572da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid PDU length %u", plen);
8582da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8592da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len,
8602da066efSJohn Baldwin 		    hlen);
8612da066efSJohn Baldwin 		return (false);
8622da066efSJohn Baldwin 	}
8632da066efSJohn Baldwin 
8642da066efSJohn Baldwin 	/* Validate fields common to both ICReq and ICResp. */
8652da066efSJohn Baldwin 	pdu = (const struct nvme_tcp_ic_req *)ch;
8662da066efSJohn Baldwin 	if (le16toh(pdu->pfv) != 0) {
8672da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Unsupported PDU version %u",
8682da066efSJohn Baldwin 		    le16toh(pdu->pfv));
8692da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8702da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
8712da066efSJohn Baldwin 		    8, ch, pdu_len, hlen);
8722da066efSJohn Baldwin 		return (false);
8732da066efSJohn Baldwin 	}
8742da066efSJohn Baldwin 
8752da066efSJohn Baldwin 	if (pdu->hpda > NVME_TCP_HPDA_MAX) {
8762da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda);
8772da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8782da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len,
8792da066efSJohn Baldwin 		    hlen);
8802da066efSJohn Baldwin 		return (false);
8812da066efSJohn Baldwin 	}
8822da066efSJohn Baldwin 
8832da066efSJohn Baldwin 	if (pdu->dgst.bits.reserved != 0) {
8842da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Invalid digest settings");
8852da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
8862da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len,
8872da066efSJohn Baldwin 		    hlen);
8882da066efSJohn Baldwin 		return (false);
8892da066efSJohn Baldwin 	}
8902da066efSJohn Baldwin 
8912da066efSJohn Baldwin 	return (true);
8922da066efSJohn Baldwin }
8932da066efSJohn Baldwin 
8942da066efSJohn Baldwin static bool
nvmf_tcp_read_ic_req(struct nvmf_association * na,struct nvmf_tcp_qpair * qp,struct nvme_tcp_ic_req * pdu)8952da066efSJohn Baldwin nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
8962da066efSJohn Baldwin     struct nvme_tcp_ic_req *pdu)
8972da066efSJohn Baldwin {
8982da066efSJohn Baldwin 	int error;
8992da066efSJohn Baldwin 
9002da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
9012da066efSJohn Baldwin 	if (error != 0) {
9022da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Failed to read IC request: %s",
9032da066efSJohn Baldwin 		    strerror(error));
9042da066efSJohn Baldwin 		return (false);
9052da066efSJohn Baldwin 	}
9062da066efSJohn Baldwin 
9072da066efSJohn Baldwin 	return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
9082da066efSJohn Baldwin }
9092da066efSJohn Baldwin 
9102da066efSJohn Baldwin static bool
nvmf_tcp_read_ic_resp(struct nvmf_association * na,struct nvmf_tcp_qpair * qp,struct nvme_tcp_ic_resp * pdu)9112da066efSJohn Baldwin nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp,
9122da066efSJohn Baldwin     struct nvme_tcp_ic_resp *pdu)
9132da066efSJohn Baldwin {
9142da066efSJohn Baldwin 	int error;
9152da066efSJohn Baldwin 
9162da066efSJohn Baldwin 	error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu));
9172da066efSJohn Baldwin 	if (error != 0) {
9182da066efSJohn Baldwin 		na_error(na, "NVMe/TCP: Failed to read IC response: %s",
9192da066efSJohn Baldwin 		    strerror(error));
9202da066efSJohn Baldwin 		return (false);
9212da066efSJohn Baldwin 	}
9222da066efSJohn Baldwin 
9232da066efSJohn Baldwin 	return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu)));
9242da066efSJohn Baldwin }
9252da066efSJohn Baldwin 
9262da066efSJohn Baldwin static struct nvmf_association *
tcp_allocate_association(bool controller,const struct nvmf_association_params * params)927846d702fSJohn Baldwin tcp_allocate_association(bool controller,
928846d702fSJohn Baldwin     const struct nvmf_association_params *params)
9292da066efSJohn Baldwin {
9302da066efSJohn Baldwin 	struct nvmf_tcp_association *ta;
9312da066efSJohn Baldwin 
932846d702fSJohn Baldwin 	if (controller) {
933846d702fSJohn Baldwin 		/* 7.4.10.3 */
934846d702fSJohn Baldwin 		if (params->tcp.maxh2cdata < 4096 ||
935846d702fSJohn Baldwin 		    params->tcp.maxh2cdata % 4 != 0)
936846d702fSJohn Baldwin 			return (NULL);
937846d702fSJohn Baldwin 	}
938846d702fSJohn Baldwin 
9392da066efSJohn Baldwin 	ta = calloc(1, sizeof(*ta));
9402da066efSJohn Baldwin 
9412da066efSJohn Baldwin 	return (&ta->na);
9422da066efSJohn Baldwin }
9432da066efSJohn Baldwin 
9442da066efSJohn Baldwin static void
tcp_update_association(struct nvmf_association * na,const struct nvme_controller_data * cdata)9452da066efSJohn Baldwin tcp_update_association(struct nvmf_association *na,
9462da066efSJohn Baldwin     const struct nvme_controller_data *cdata)
9472da066efSJohn Baldwin {
9482da066efSJohn Baldwin 	struct nvmf_tcp_association *ta = TASSOC(na);
9492da066efSJohn Baldwin 
9502da066efSJohn Baldwin 	ta->ioccsz = le32toh(cdata->ioccsz);
9512da066efSJohn Baldwin }
9522da066efSJohn Baldwin 
9532da066efSJohn Baldwin static void
tcp_free_association(struct nvmf_association * na)9542da066efSJohn Baldwin tcp_free_association(struct nvmf_association *na)
9552da066efSJohn Baldwin {
9562da066efSJohn Baldwin 	free(na);
9572da066efSJohn Baldwin }
9582da066efSJohn Baldwin 
9592da066efSJohn Baldwin static bool
tcp_connect(struct nvmf_tcp_qpair * qp,struct nvmf_association * na,bool admin)9602da066efSJohn Baldwin tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin)
9612da066efSJohn Baldwin {
9622da066efSJohn Baldwin 	const struct nvmf_association_params *params = &na->na_params;
9632da066efSJohn Baldwin 	struct nvmf_tcp_association *ta = TASSOC(na);
9642da066efSJohn Baldwin 	struct nvme_tcp_ic_req ic_req;
9652da066efSJohn Baldwin 	struct nvme_tcp_ic_resp ic_resp;
966fd0e6af5SJohn Baldwin 	uint32_t maxh2cdata;
9672da066efSJohn Baldwin 	int error;
9682da066efSJohn Baldwin 
9692da066efSJohn Baldwin 	if (!admin) {
9702da066efSJohn Baldwin 		if (ta->ioccsz == 0) {
9712da066efSJohn Baldwin 			na_error(na, "TCP I/O queues require cdata");
9722da066efSJohn Baldwin 			return (false);
9732da066efSJohn Baldwin 		}
9742da066efSJohn Baldwin 		if (ta->ioccsz < 4) {
9752da066efSJohn Baldwin 			na_error(na, "Invalid IOCCSZ %u", ta->ioccsz);
9762da066efSJohn Baldwin 			return (false);
9772da066efSJohn Baldwin 		}
9782da066efSJohn Baldwin 	}
9792da066efSJohn Baldwin 
9802da066efSJohn Baldwin 	memset(&ic_req, 0, sizeof(ic_req));
9812da066efSJohn Baldwin 	ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ;
9822da066efSJohn Baldwin 	ic_req.common.hlen = sizeof(ic_req);
9832da066efSJohn Baldwin 	ic_req.common.plen = htole32(sizeof(ic_req));
9842da066efSJohn Baldwin 	ic_req.pfv = htole16(0);
9852da066efSJohn Baldwin 	ic_req.hpda = params->tcp.pda;
9862da066efSJohn Baldwin 	if (params->tcp.header_digests)
9872da066efSJohn Baldwin 		ic_req.dgst.bits.hdgst_enable = 1;
9882da066efSJohn Baldwin 	if (params->tcp.data_digests)
9892da066efSJohn Baldwin 		ic_req.dgst.bits.ddgst_enable = 1;
9902da066efSJohn Baldwin 	ic_req.maxr2t = htole32(params->tcp.maxr2t);
9912da066efSJohn Baldwin 
9922da066efSJohn Baldwin 	error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req));
9932da066efSJohn Baldwin 	if (error != 0) {
9942da066efSJohn Baldwin 		na_error(na, "Failed to write IC request: %s", strerror(error));
9952da066efSJohn Baldwin 		return (false);
9962da066efSJohn Baldwin 	}
9972da066efSJohn Baldwin 
9982da066efSJohn Baldwin 	if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp))
9992da066efSJohn Baldwin 		return (false);
10002da066efSJohn Baldwin 
10012da066efSJohn Baldwin 	/* Ensure the controller didn't enable digests we didn't request. */
10022da066efSJohn Baldwin 	if ((!params->tcp.header_digests &&
10032da066efSJohn Baldwin 	    ic_resp.dgst.bits.hdgst_enable != 0) ||
10042da066efSJohn Baldwin 	    (!params->tcp.data_digests &&
10052da066efSJohn Baldwin 	    ic_resp.dgst.bits.ddgst_enable != 0)) {
10062da066efSJohn Baldwin 		na_error(na, "Controller enabled unrequested digests");
10072da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
10082da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER,
10092da066efSJohn Baldwin 		    11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp));
10102da066efSJohn Baldwin 		return (false);
10112da066efSJohn Baldwin 	}
10122da066efSJohn Baldwin 
10132da066efSJohn Baldwin 	/*
10142da066efSJohn Baldwin 	 * XXX: Is there an upper-bound to enforce here?  Perhaps pick
10152da066efSJohn Baldwin 	 * some large value and report larger values as an unsupported
10162da066efSJohn Baldwin 	 * parameter?
10172da066efSJohn Baldwin 	 */
1018fd0e6af5SJohn Baldwin 	maxh2cdata = le32toh(ic_resp.maxh2cdata);
1019fd0e6af5SJohn Baldwin 	if (maxh2cdata < 4096 || maxh2cdata % 4 != 0) {
1020fd0e6af5SJohn Baldwin 		na_error(na, "Invalid MAXH2CDATA %u", maxh2cdata);
10212da066efSJohn Baldwin 		nvmf_tcp_report_error(na, qp,
10222da066efSJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp,
10232da066efSJohn Baldwin 		    sizeof(ic_resp), sizeof(ic_resp));
10242da066efSJohn Baldwin 		return (false);
10252da066efSJohn Baldwin 	}
10262da066efSJohn Baldwin 
10277b8dd078SJohn Baldwin 	qp->rxpda = (params->tcp.pda + 1) * 4;
10287b8dd078SJohn Baldwin 	qp->txpda = (ic_resp.cpda + 1) * 4;
10292da066efSJohn Baldwin 	qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
10302da066efSJohn Baldwin 	qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
10312da066efSJohn Baldwin 	qp->maxr2t = params->tcp.maxr2t;
1032fd0e6af5SJohn Baldwin 	qp->maxh2cdata = maxh2cdata;
10332da066efSJohn Baldwin 	if (admin)
10342da066efSJohn Baldwin 		/* 7.4.3 */
10352da066efSJohn Baldwin 		qp->max_icd = 8192;
10362da066efSJohn Baldwin 	else
10372da066efSJohn Baldwin 		qp->max_icd = (ta->ioccsz - 4) * 16;
10382da066efSJohn Baldwin 
10392da066efSJohn Baldwin 	return (0);
10402da066efSJohn Baldwin }
10412da066efSJohn Baldwin 
10422da066efSJohn Baldwin static bool
tcp_accept(struct nvmf_tcp_qpair * qp,struct nvmf_association * na)10432da066efSJohn Baldwin tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na)
10442da066efSJohn Baldwin {
10452da066efSJohn Baldwin 	const struct nvmf_association_params *params = &na->na_params;
10462da066efSJohn Baldwin 	struct nvme_tcp_ic_req ic_req;
10472da066efSJohn Baldwin 	struct nvme_tcp_ic_resp ic_resp;
10482da066efSJohn Baldwin 	int error;
10492da066efSJohn Baldwin 
10502da066efSJohn Baldwin 	if (!nvmf_tcp_read_ic_req(na, qp, &ic_req))
10512da066efSJohn Baldwin 		return (false);
10522da066efSJohn Baldwin 
10532da066efSJohn Baldwin 	memset(&ic_resp, 0, sizeof(ic_resp));
10542da066efSJohn Baldwin 	ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP;
10552da066efSJohn Baldwin 	ic_resp.common.hlen = sizeof(ic_req);
10562da066efSJohn Baldwin 	ic_resp.common.plen = htole32(sizeof(ic_req));
10572da066efSJohn Baldwin 	ic_resp.pfv = htole16(0);
10582da066efSJohn Baldwin 	ic_resp.cpda = params->tcp.pda;
10592da066efSJohn Baldwin 	if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0)
10602da066efSJohn Baldwin 		ic_resp.dgst.bits.hdgst_enable = 1;
10612da066efSJohn Baldwin 	if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0)
10622da066efSJohn Baldwin 		ic_resp.dgst.bits.ddgst_enable = 1;
10632da066efSJohn Baldwin 	ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata);
10642da066efSJohn Baldwin 
10652da066efSJohn Baldwin 	error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp));
10662da066efSJohn Baldwin 	if (error != 0) {
10672da066efSJohn Baldwin 		na_error(na, "Failed to write IC response: %s",
10682da066efSJohn Baldwin 		    strerror(error));
10692da066efSJohn Baldwin 		return (false);
10702da066efSJohn Baldwin 	}
10712da066efSJohn Baldwin 
10727b8dd078SJohn Baldwin 	qp->rxpda = (params->tcp.pda + 1) * 4;
10737b8dd078SJohn Baldwin 	qp->txpda = (ic_req.hpda + 1) * 4;
10742da066efSJohn Baldwin 	qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0;
10752da066efSJohn Baldwin 	qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0;
10762da066efSJohn Baldwin 	qp->maxr2t = le32toh(ic_req.maxr2t);
10772da066efSJohn Baldwin 	qp->maxh2cdata = params->tcp.maxh2cdata;
10782da066efSJohn Baldwin 	qp->max_icd = 0;	/* XXX */
10792da066efSJohn Baldwin 	return (0);
10802da066efSJohn Baldwin }
10812da066efSJohn Baldwin 
10822da066efSJohn Baldwin static struct nvmf_qpair *
tcp_allocate_qpair(struct nvmf_association * na,const struct nvmf_qpair_params * qparams)10832da066efSJohn Baldwin tcp_allocate_qpair(struct nvmf_association *na,
10842da066efSJohn Baldwin     const struct nvmf_qpair_params *qparams)
10852da066efSJohn Baldwin {
10862da066efSJohn Baldwin 	const struct nvmf_association_params *aparams = &na->na_params;
10872da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp;
10882da066efSJohn Baldwin 	int error;
10892da066efSJohn Baldwin 
10902da066efSJohn Baldwin 	if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) {
10912da066efSJohn Baldwin 		na_error(na, "Invalid PDA");
10922da066efSJohn Baldwin 		return (NULL);
10932da066efSJohn Baldwin 	}
10942da066efSJohn Baldwin 
10952da066efSJohn Baldwin 	qp = calloc(1, sizeof(*qp));
10962da066efSJohn Baldwin 	qp->s = qparams->tcp.fd;
10972da066efSJohn Baldwin 	LIST_INIT(&qp->rx_buffers);
10982da066efSJohn Baldwin 	LIST_INIT(&qp->tx_buffers);
10992da066efSJohn Baldwin 	TAILQ_INIT(&qp->rx_capsules);
11002da066efSJohn Baldwin 	if (na->na_controller)
11012da066efSJohn Baldwin 		error = tcp_accept(qp, na);
11022da066efSJohn Baldwin 	else
11032da066efSJohn Baldwin 		error = tcp_connect(qp, na, qparams->admin);
11042da066efSJohn Baldwin 	if (error != 0) {
11052da066efSJohn Baldwin 		free(qp);
11062da066efSJohn Baldwin 		return (NULL);
11072da066efSJohn Baldwin 	}
11082da066efSJohn Baldwin 
11092da066efSJohn Baldwin 	return (&qp->qp);
11102da066efSJohn Baldwin }
11112da066efSJohn Baldwin 
11122da066efSJohn Baldwin static void
tcp_free_qpair(struct nvmf_qpair * nq)11132da066efSJohn Baldwin tcp_free_qpair(struct nvmf_qpair *nq)
11142da066efSJohn Baldwin {
11152da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
11162da066efSJohn Baldwin 	struct nvmf_tcp_capsule *ntc, *tc;
11172da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *ncb, *cb;
11182da066efSJohn Baldwin 
11192da066efSJohn Baldwin 	TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) {
11202da066efSJohn Baldwin 		TAILQ_REMOVE(&qp->rx_capsules, tc, link);
11212da066efSJohn Baldwin 		nvmf_free_capsule(&tc->nc);
11222da066efSJohn Baldwin 	}
11232da066efSJohn Baldwin 	LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) {
11242da066efSJohn Baldwin 		tcp_free_command_buffer(cb);
11252da066efSJohn Baldwin 	}
11262da066efSJohn Baldwin 	LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) {
11272da066efSJohn Baldwin 		tcp_free_command_buffer(cb);
11282da066efSJohn Baldwin 	}
11292da066efSJohn Baldwin 	free(qp);
11302da066efSJohn Baldwin }
11312da066efSJohn Baldwin 
11322da066efSJohn Baldwin static int
tcp_kernel_handoff_params(struct nvmf_qpair * nq,struct nvmf_handoff_qpair_params * qparams)11332da066efSJohn Baldwin tcp_kernel_handoff_params(struct nvmf_qpair *nq,
11342da066efSJohn Baldwin     struct nvmf_handoff_qpair_params *qparams)
11352da066efSJohn Baldwin {
11362da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
11372da066efSJohn Baldwin 
11382da066efSJohn Baldwin 	qparams->tcp.fd = qp->s;
11392da066efSJohn Baldwin 	qparams->tcp.rxpda = qp->rxpda;
11402da066efSJohn Baldwin 	qparams->tcp.txpda = qp->txpda;
11412da066efSJohn Baldwin 	qparams->tcp.header_digests = qp->header_digests;
11422da066efSJohn Baldwin 	qparams->tcp.data_digests = qp->data_digests;
11432da066efSJohn Baldwin 	qparams->tcp.maxr2t = qp->maxr2t;
11442da066efSJohn Baldwin 	qparams->tcp.maxh2cdata = qp->maxh2cdata;
11452da066efSJohn Baldwin 	qparams->tcp.max_icd = qp->max_icd;
11462da066efSJohn Baldwin 
11472da066efSJohn Baldwin 	return (0);
11482da066efSJohn Baldwin }
11492da066efSJohn Baldwin 
11502da066efSJohn Baldwin static struct nvmf_capsule *
tcp_allocate_capsule(struct nvmf_qpair * qp __unused)11512da066efSJohn Baldwin tcp_allocate_capsule(struct nvmf_qpair *qp __unused)
11522da066efSJohn Baldwin {
11532da066efSJohn Baldwin 	struct nvmf_tcp_capsule *nc;
11542da066efSJohn Baldwin 
11552da066efSJohn Baldwin 	nc = calloc(1, sizeof(*nc));
11562da066efSJohn Baldwin 	return (&nc->nc);
11572da066efSJohn Baldwin }
11582da066efSJohn Baldwin 
11592da066efSJohn Baldwin static void
tcp_free_capsule(struct nvmf_capsule * nc)11602da066efSJohn Baldwin tcp_free_capsule(struct nvmf_capsule *nc)
11612da066efSJohn Baldwin {
11622da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc = TCAP(nc);
11632da066efSJohn Baldwin 
11642da066efSJohn Baldwin 	nvmf_tcp_free_pdu(&tc->rx_pdu);
11652da066efSJohn Baldwin 	if (tc->cb != NULL)
11662da066efSJohn Baldwin 		tcp_free_command_buffer(tc->cb);
11672da066efSJohn Baldwin 	free(tc);
11682da066efSJohn Baldwin }
11692da066efSJohn Baldwin 
11702da066efSJohn Baldwin static int
tcp_transmit_command(struct nvmf_capsule * nc)11712da066efSJohn Baldwin tcp_transmit_command(struct nvmf_capsule *nc)
11722da066efSJohn Baldwin {
11732da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
11742da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc = TCAP(nc);
11752da066efSJohn Baldwin 	struct nvme_tcp_cmd cmd;
11762da066efSJohn Baldwin 	struct nvme_sgl_descriptor *sgl;
11772da066efSJohn Baldwin 	int error;
11782da066efSJohn Baldwin 	bool use_icd;
11792da066efSJohn Baldwin 
11802da066efSJohn Baldwin 	use_icd = false;
11812da066efSJohn Baldwin 	if (nc->nc_data_len != 0 && nc->nc_send_data &&
11822da066efSJohn Baldwin 	    nc->nc_data_len <= qp->max_icd)
11832da066efSJohn Baldwin 		use_icd = true;
11842da066efSJohn Baldwin 
11852da066efSJohn Baldwin 	memset(&cmd, 0, sizeof(cmd));
11862da066efSJohn Baldwin 	cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD;
11872da066efSJohn Baldwin 	cmd.ccsqe = nc->nc_sqe;
11882da066efSJohn Baldwin 
11892da066efSJohn Baldwin 	/* Populate SGL in SQE. */
11902da066efSJohn Baldwin 	sgl = &cmd.ccsqe.sgl;
11912da066efSJohn Baldwin 	memset(sgl, 0, sizeof(*sgl));
11922da066efSJohn Baldwin 	sgl->address = 0;
11932da066efSJohn Baldwin 	sgl->length = htole32(nc->nc_data_len);
11942da066efSJohn Baldwin 	if (use_icd) {
11952da066efSJohn Baldwin 		/* Use in-capsule data. */
11962da066efSJohn Baldwin 		sgl->type = NVME_SGL_TYPE_ICD;
11972da066efSJohn Baldwin 	} else {
11982da066efSJohn Baldwin 		/* Use a command buffer. */
11992da066efSJohn Baldwin 		sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER;
12002da066efSJohn Baldwin 	}
12012da066efSJohn Baldwin 
12022da066efSJohn Baldwin 	/* Send command capsule. */
12032da066efSJohn Baldwin 	error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ?
12042da066efSJohn Baldwin 	    nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0);
12052da066efSJohn Baldwin 	if (error != 0)
12062da066efSJohn Baldwin 		return (error);
12072da066efSJohn Baldwin 
12082da066efSJohn Baldwin 	/*
12092da066efSJohn Baldwin 	 * If data will be transferred using a command buffer, allocate a
12102da066efSJohn Baldwin 	 * buffer structure and queue it.
12112da066efSJohn Baldwin 	 */
12122da066efSJohn Baldwin 	if (nc->nc_data_len != 0 && !use_icd)
12132da066efSJohn Baldwin 		tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0,
12142da066efSJohn Baldwin 		    nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data);
12152da066efSJohn Baldwin 
12162da066efSJohn Baldwin 	return (0);
12172da066efSJohn Baldwin }
12182da066efSJohn Baldwin 
12192da066efSJohn Baldwin static int
tcp_transmit_response(struct nvmf_capsule * nc)12202da066efSJohn Baldwin tcp_transmit_response(struct nvmf_capsule *nc)
12212da066efSJohn Baldwin {
12222da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
12232da066efSJohn Baldwin 	struct nvme_tcp_rsp rsp;
12242da066efSJohn Baldwin 
12252da066efSJohn Baldwin 	memset(&rsp, 0, sizeof(rsp));
12262da066efSJohn Baldwin 	rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP;
12272da066efSJohn Baldwin 	rsp.rccqe = nc->nc_cqe;
12282da066efSJohn Baldwin 
12292da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0));
12302da066efSJohn Baldwin }
12312da066efSJohn Baldwin 
12322da066efSJohn Baldwin static int
tcp_transmit_capsule(struct nvmf_capsule * nc)12332da066efSJohn Baldwin tcp_transmit_capsule(struct nvmf_capsule *nc)
12342da066efSJohn Baldwin {
12352da066efSJohn Baldwin 	if (nc->nc_qe_len == sizeof(struct nvme_command))
12362da066efSJohn Baldwin 		return (tcp_transmit_command(nc));
12372da066efSJohn Baldwin 	else
12382da066efSJohn Baldwin 		return (tcp_transmit_response(nc));
12392da066efSJohn Baldwin }
12402da066efSJohn Baldwin 
12412da066efSJohn Baldwin static int
tcp_receive_capsule(struct nvmf_qpair * nq,struct nvmf_capsule ** ncp)12422da066efSJohn Baldwin tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp)
12432da066efSJohn Baldwin {
12442da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nq);
12452da066efSJohn Baldwin 	struct nvmf_tcp_capsule *tc;
12462da066efSJohn Baldwin 	int error;
12472da066efSJohn Baldwin 
12482da066efSJohn Baldwin 	while (TAILQ_EMPTY(&qp->rx_capsules)) {
12492da066efSJohn Baldwin 		error = nvmf_tcp_receive_pdu(qp);
12502da066efSJohn Baldwin 		if (error != 0)
12512da066efSJohn Baldwin 			return (error);
12522da066efSJohn Baldwin 	}
12532da066efSJohn Baldwin 	tc = TAILQ_FIRST(&qp->rx_capsules);
12542da066efSJohn Baldwin 	TAILQ_REMOVE(&qp->rx_capsules, tc, link);
12552da066efSJohn Baldwin 	*ncp = &tc->nc;
12562da066efSJohn Baldwin 	return (0);
12572da066efSJohn Baldwin }
12582da066efSJohn Baldwin 
12592da066efSJohn Baldwin static uint8_t
tcp_validate_command_capsule(const struct nvmf_capsule * nc)12602da066efSJohn Baldwin tcp_validate_command_capsule(const struct nvmf_capsule *nc)
12612da066efSJohn Baldwin {
12622da066efSJohn Baldwin 	const struct nvmf_tcp_capsule *tc = CTCAP(nc);
12632da066efSJohn Baldwin 	const struct nvme_sgl_descriptor *sgl;
12642da066efSJohn Baldwin 
12652da066efSJohn Baldwin 	assert(tc->rx_pdu.hdr != NULL);
12662da066efSJohn Baldwin 
12672da066efSJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
12682da066efSJohn Baldwin 	switch (sgl->type) {
12692da066efSJohn Baldwin 	case NVME_SGL_TYPE_ICD:
12702da066efSJohn Baldwin 		if (tc->rx_pdu.data_len != le32toh(sgl->length)) {
12712da066efSJohn Baldwin 			printf("NVMe/TCP: Command Capsule with mismatched ICD length\n");
12722da066efSJohn Baldwin 			return (NVME_SC_DATA_SGL_LENGTH_INVALID);
12732da066efSJohn Baldwin 		}
12742da066efSJohn Baldwin 		break;
12752da066efSJohn Baldwin 	case NVME_SGL_TYPE_COMMAND_BUFFER:
12762da066efSJohn Baldwin 		if (tc->rx_pdu.data_len != 0) {
12772da066efSJohn Baldwin 			printf("NVMe/TCP: Command Buffer SGL with ICD\n");
12782da066efSJohn Baldwin 			return (NVME_SC_INVALID_FIELD);
12792da066efSJohn Baldwin 		}
12802da066efSJohn Baldwin 		break;
12812da066efSJohn Baldwin 	default:
12822da066efSJohn Baldwin 		printf("NVMe/TCP: Invalid SGL type in Command Capsule\n");
12832da066efSJohn Baldwin 		return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID);
12842da066efSJohn Baldwin 	}
12852da066efSJohn Baldwin 
12862da066efSJohn Baldwin 	if (sgl->address != 0) {
12872da066efSJohn Baldwin 		printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n");
12882da066efSJohn Baldwin 		return (NVME_SC_SGL_OFFSET_INVALID);
12892da066efSJohn Baldwin 	}
12902da066efSJohn Baldwin 
12912da066efSJohn Baldwin 	return (NVME_SC_SUCCESS);
12922da066efSJohn Baldwin }
12932da066efSJohn Baldwin 
12942da066efSJohn Baldwin static size_t
tcp_capsule_data_len(const struct nvmf_capsule * nc)12952da066efSJohn Baldwin tcp_capsule_data_len(const struct nvmf_capsule *nc)
12962da066efSJohn Baldwin {
12972da066efSJohn Baldwin 	assert(nc->nc_qe_len == sizeof(struct nvme_command));
12982da066efSJohn Baldwin 	return (le32toh(nc->nc_sqe.sgl.length));
12992da066efSJohn Baldwin }
13002da066efSJohn Baldwin 
13012da066efSJohn Baldwin /* NB: cid and ttag are both little-endian already. */
13022da066efSJohn Baldwin static int
tcp_send_r2t(struct nvmf_tcp_qpair * qp,uint16_t cid,uint16_t ttag,uint32_t data_offset,uint32_t data_len)13032da066efSJohn Baldwin tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag,
13042da066efSJohn Baldwin     uint32_t data_offset, uint32_t data_len)
13052da066efSJohn Baldwin {
13062da066efSJohn Baldwin 	struct nvme_tcp_r2t_hdr r2t;
13072da066efSJohn Baldwin 
13082da066efSJohn Baldwin 	memset(&r2t, 0, sizeof(r2t));
13092da066efSJohn Baldwin 	r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T;
13102da066efSJohn Baldwin 	r2t.cccid = cid;
13112da066efSJohn Baldwin 	r2t.ttag = ttag;
13122da066efSJohn Baldwin 	r2t.r2to = htole32(data_offset);
13132da066efSJohn Baldwin 	r2t.r2tl = htole32(data_len);
13142da066efSJohn Baldwin 
13152da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0));
13162da066efSJohn Baldwin }
13172da066efSJohn Baldwin 
13182da066efSJohn Baldwin static int
tcp_receive_r2t_data(const struct nvmf_capsule * nc,uint32_t data_offset,void * buf,size_t len)13192da066efSJohn Baldwin tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset,
13202da066efSJohn Baldwin     void *buf, size_t len)
13212da066efSJohn Baldwin {
13222da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
13232da066efSJohn Baldwin 	struct nvmf_tcp_command_buffer *cb;
13242da066efSJohn Baldwin 	int error;
13252da066efSJohn Baldwin 	uint16_t ttag;
13262da066efSJohn Baldwin 
13272da066efSJohn Baldwin 	/*
13282da066efSJohn Baldwin 	 * Don't bother byte-swapping ttag as it is just a cookie
13292da066efSJohn Baldwin 	 * value returned by the other end as-is.
13302da066efSJohn Baldwin 	 */
13312da066efSJohn Baldwin 	ttag = qp->next_ttag++;
13322da066efSJohn Baldwin 
13332da066efSJohn Baldwin 	error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len);
13342da066efSJohn Baldwin 	if (error != 0)
13352da066efSJohn Baldwin 		return (error);
13362da066efSJohn Baldwin 
13372da066efSJohn Baldwin 	cb = tcp_alloc_command_buffer(qp, buf, data_offset, len,
13382da066efSJohn Baldwin 	    nc->nc_sqe.cid, ttag, true);
13392da066efSJohn Baldwin 
13402da066efSJohn Baldwin 	/* Parse received PDUs until the data transfer is complete. */
13412da066efSJohn Baldwin 	while (cb->data_xfered < cb->data_len) {
13422da066efSJohn Baldwin 		error = nvmf_tcp_receive_pdu(qp);
13432da066efSJohn Baldwin 		if (error != 0)
13442da066efSJohn Baldwin 			break;
13452da066efSJohn Baldwin 	}
13462da066efSJohn Baldwin 	tcp_free_command_buffer(cb);
13472da066efSJohn Baldwin 	return (error);
13482da066efSJohn Baldwin }
13492da066efSJohn Baldwin 
13502da066efSJohn Baldwin static int
tcp_receive_icd_data(const struct nvmf_capsule * nc,uint32_t data_offset,void * buf,size_t len)13512da066efSJohn Baldwin tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset,
13522da066efSJohn Baldwin     void *buf, size_t len)
13532da066efSJohn Baldwin {
13542da066efSJohn Baldwin 	const struct nvmf_tcp_capsule *tc = CTCAP(nc);
13552da066efSJohn Baldwin 	const char *icd;
13562da066efSJohn Baldwin 
13572da066efSJohn Baldwin 	icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset;
13582da066efSJohn Baldwin 	memcpy(buf, icd, len);
13592da066efSJohn Baldwin 	return (0);
13602da066efSJohn Baldwin }
13612da066efSJohn Baldwin 
13622da066efSJohn Baldwin static int
tcp_receive_controller_data(const struct nvmf_capsule * nc,uint32_t data_offset,void * buf,size_t len)13632da066efSJohn Baldwin tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset,
13642da066efSJohn Baldwin     void *buf, size_t len)
13652da066efSJohn Baldwin {
13662da066efSJohn Baldwin 	struct nvmf_association *na = nc->nc_qpair->nq_association;
13672da066efSJohn Baldwin 	const struct nvme_sgl_descriptor *sgl;
13682da066efSJohn Baldwin 	size_t data_len;
13692da066efSJohn Baldwin 
13702da066efSJohn Baldwin 	if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
13712da066efSJohn Baldwin 		return (EINVAL);
13722da066efSJohn Baldwin 
13732da066efSJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
13742da066efSJohn Baldwin 	data_len = le32toh(sgl->length);
13752da066efSJohn Baldwin 	if (data_offset + len > data_len)
13762da066efSJohn Baldwin 		return (EFBIG);
13772da066efSJohn Baldwin 
13782da066efSJohn Baldwin 	if (sgl->type == NVME_SGL_TYPE_ICD)
13792da066efSJohn Baldwin 		return (tcp_receive_icd_data(nc, data_offset, buf, len));
13802da066efSJohn Baldwin 	else
13812da066efSJohn Baldwin 		return (tcp_receive_r2t_data(nc, data_offset, buf, len));
13822da066efSJohn Baldwin }
13832da066efSJohn Baldwin 
13842da066efSJohn Baldwin /* NB: cid is little-endian already. */
13852da066efSJohn Baldwin static int
tcp_send_c2h_pdu(struct nvmf_tcp_qpair * qp,uint16_t cid,uint32_t data_offset,const void * buf,size_t len,bool last_pdu,bool success)13862da066efSJohn Baldwin tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid,
13872da066efSJohn Baldwin     uint32_t data_offset, const void *buf, size_t len, bool last_pdu,
13882da066efSJohn Baldwin     bool success)
13892da066efSJohn Baldwin {
13902da066efSJohn Baldwin 	struct nvme_tcp_c2h_data_hdr c2h;
13912da066efSJohn Baldwin 
13922da066efSJohn Baldwin 	memset(&c2h, 0, sizeof(c2h));
13932da066efSJohn Baldwin 	c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA;
13942da066efSJohn Baldwin 	if (last_pdu)
13952da066efSJohn Baldwin 		c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
13962da066efSJohn Baldwin 	if (success)
13972da066efSJohn Baldwin 		c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
13982da066efSJohn Baldwin 	c2h.cccid = cid;
13992da066efSJohn Baldwin 	c2h.datao = htole32(data_offset);
14002da066efSJohn Baldwin 	c2h.datal = htole32(len);
14012da066efSJohn Baldwin 
14022da066efSJohn Baldwin 	return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h),
14032da066efSJohn Baldwin 	    __DECONST(void *, buf), len));
14042da066efSJohn Baldwin }
14052da066efSJohn Baldwin 
14062da066efSJohn Baldwin static int
tcp_send_controller_data(const struct nvmf_capsule * nc,const void * buf,size_t len)14072da066efSJohn Baldwin tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf,
14082da066efSJohn Baldwin     size_t len)
14092da066efSJohn Baldwin {
14102da066efSJohn Baldwin 	struct nvmf_association *na = nc->nc_qpair->nq_association;
14112da066efSJohn Baldwin 	struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair);
14122da066efSJohn Baldwin 	const struct nvme_sgl_descriptor *sgl;
14132da066efSJohn Baldwin 	const char *src;
14142da066efSJohn Baldwin 	size_t todo;
14152da066efSJohn Baldwin 	uint32_t data_len, data_offset;
14162da066efSJohn Baldwin 	int error;
14172da066efSJohn Baldwin 	bool last_pdu, send_success_flag;
14182da066efSJohn Baldwin 
14192da066efSJohn Baldwin 	if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller)
14202da066efSJohn Baldwin 		return (EINVAL);
14212da066efSJohn Baldwin 
14222da066efSJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
14232da066efSJohn Baldwin 	data_len = le32toh(sgl->length);
14242da066efSJohn Baldwin 	if (len != data_len) {
14252da066efSJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
14262da066efSJohn Baldwin 		return (EFBIG);
14272da066efSJohn Baldwin 	}
14282da066efSJohn Baldwin 
14292da066efSJohn Baldwin 	if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) {
14302da066efSJohn Baldwin 		nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD);
14312da066efSJohn Baldwin 		return (EINVAL);
14322da066efSJohn Baldwin 	}
14332da066efSJohn Baldwin 
14342da066efSJohn Baldwin 	/* Use the SUCCESS flag if SQ flow control is disabled. */
14352da066efSJohn Baldwin 	send_success_flag = !qp->qp.nq_flow_control;
14362da066efSJohn Baldwin 
14372da066efSJohn Baldwin 	/*
14382da066efSJohn Baldwin 	 * Write out one or more C2H_DATA PDUs containing the data.
14392da066efSJohn Baldwin 	 * Each PDU is arbitrarily capped at 256k.
14402da066efSJohn Baldwin 	 */
14412da066efSJohn Baldwin 	data_offset = 0;
14422da066efSJohn Baldwin 	src = buf;
14432da066efSJohn Baldwin 	while (len > 0) {
14442da066efSJohn Baldwin 		if (len > 256 * 1024) {
14452da066efSJohn Baldwin 			todo = 256 * 1024;
14462da066efSJohn Baldwin 			last_pdu = false;
14472da066efSJohn Baldwin 		} else {
14482da066efSJohn Baldwin 			todo = len;
14492da066efSJohn Baldwin 			last_pdu = true;
14502da066efSJohn Baldwin 		}
14512da066efSJohn Baldwin 		error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset,
14522da066efSJohn Baldwin 		    src, todo, last_pdu, last_pdu && send_success_flag);
14532da066efSJohn Baldwin 		if (error != 0) {
14542da066efSJohn Baldwin 			nvmf_send_generic_error(nc,
14552da066efSJohn Baldwin 			    NVME_SC_TRANSIENT_TRANSPORT_ERROR);
14562da066efSJohn Baldwin 			return (error);
14572da066efSJohn Baldwin 		}
14582da066efSJohn Baldwin 		data_offset += todo;
14592da066efSJohn Baldwin 		src += todo;
14602da066efSJohn Baldwin 		len -= todo;
14612da066efSJohn Baldwin 	}
14622da066efSJohn Baldwin 	if (!send_success_flag)
14632da066efSJohn Baldwin 		nvmf_send_success(nc);
14642da066efSJohn Baldwin 	return (0);
14652da066efSJohn Baldwin }
14662da066efSJohn Baldwin 
14672da066efSJohn Baldwin struct nvmf_transport_ops tcp_ops = {
14682da066efSJohn Baldwin 	.allocate_association = tcp_allocate_association,
14692da066efSJohn Baldwin 	.update_association = tcp_update_association,
14702da066efSJohn Baldwin 	.free_association = tcp_free_association,
14712da066efSJohn Baldwin 	.allocate_qpair = tcp_allocate_qpair,
14722da066efSJohn Baldwin 	.free_qpair = tcp_free_qpair,
14732da066efSJohn Baldwin 	.kernel_handoff_params = tcp_kernel_handoff_params,
14742da066efSJohn Baldwin 	.allocate_capsule = tcp_allocate_capsule,
14752da066efSJohn Baldwin 	.free_capsule = tcp_free_capsule,
14762da066efSJohn Baldwin 	.transmit_capsule = tcp_transmit_capsule,
14772da066efSJohn Baldwin 	.receive_capsule = tcp_receive_capsule,
14782da066efSJohn Baldwin 	.validate_command_capsule = tcp_validate_command_capsule,
14792da066efSJohn Baldwin 	.capsule_data_len = tcp_capsule_data_len,
14802da066efSJohn Baldwin 	.receive_controller_data = tcp_receive_controller_data,
14812da066efSJohn Baldwin 	.send_controller_data = tcp_send_controller_data,
14822da066efSJohn Baldwin };
1483