xref: /freebsd/sys/dev/cxgbe/nvmf/nvmf_che.c (revision ec0cd287f55f7ea93ff4ccfa4de0f70eca5fef75)
1*ec0cd287SJohn Baldwin /*-
2*ec0cd287SJohn Baldwin  * SPDX-License-Identifier: BSD-2-Clause
3*ec0cd287SJohn Baldwin  *
4*ec0cd287SJohn Baldwin  * Copyright (c) 2023 Chelsio Communications, Inc.
5*ec0cd287SJohn Baldwin  * Written by: John Baldwin <jhb@FreeBSD.org>
6*ec0cd287SJohn Baldwin  *
7*ec0cd287SJohn Baldwin  * Redistribution and use in source and binary forms, with or without
8*ec0cd287SJohn Baldwin  * modification, are permitted provided that the following conditions
9*ec0cd287SJohn Baldwin  * are met:
10*ec0cd287SJohn Baldwin  * 1. Redistributions of source code must retain the above copyright
11*ec0cd287SJohn Baldwin  *    notice, this list of conditions and the following disclaimer.
12*ec0cd287SJohn Baldwin  * 2. Redistributions in binary form must reproduce the above copyright
13*ec0cd287SJohn Baldwin  *    notice, this list of conditions and the following disclaimer in the
14*ec0cd287SJohn Baldwin  *    documentation and/or other materials provided with the distribution.
15*ec0cd287SJohn Baldwin  *
16*ec0cd287SJohn Baldwin  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17*ec0cd287SJohn Baldwin  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*ec0cd287SJohn Baldwin  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*ec0cd287SJohn Baldwin  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*ec0cd287SJohn Baldwin  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*ec0cd287SJohn Baldwin  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*ec0cd287SJohn Baldwin  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*ec0cd287SJohn Baldwin  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*ec0cd287SJohn Baldwin  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*ec0cd287SJohn Baldwin  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*ec0cd287SJohn Baldwin  * SUCH DAMAGE.
27*ec0cd287SJohn Baldwin  */
28*ec0cd287SJohn Baldwin 
29*ec0cd287SJohn Baldwin #include "opt_inet.h"
30*ec0cd287SJohn Baldwin 
31*ec0cd287SJohn Baldwin #include <sys/param.h>
32*ec0cd287SJohn Baldwin #include <sys/libkern.h>
33*ec0cd287SJohn Baldwin #include <sys/kernel.h>
34*ec0cd287SJohn Baldwin #include <sys/module.h>
35*ec0cd287SJohn Baldwin 
36*ec0cd287SJohn Baldwin #ifdef TCP_OFFLOAD
37*ec0cd287SJohn Baldwin #include <sys/bitset.h>
38*ec0cd287SJohn Baldwin #include <sys/capsicum.h>
39*ec0cd287SJohn Baldwin #include <sys/file.h>
40*ec0cd287SJohn Baldwin #include <sys/kthread.h>
41*ec0cd287SJohn Baldwin #include <sys/malloc.h>
42*ec0cd287SJohn Baldwin #include <sys/mbuf.h>
43*ec0cd287SJohn Baldwin #include <sys/nv.h>
44*ec0cd287SJohn Baldwin #include <sys/protosw.h>
45*ec0cd287SJohn Baldwin #include <sys/socket.h>
46*ec0cd287SJohn Baldwin #include <sys/socketvar.h>
47*ec0cd287SJohn Baldwin #include <netinet/in.h>
48*ec0cd287SJohn Baldwin #include <netinet/in_pcb.h>
49*ec0cd287SJohn Baldwin #include <netinet/tcp_var.h>
50*ec0cd287SJohn Baldwin #include <netinet/toecore.h>
51*ec0cd287SJohn Baldwin 
52*ec0cd287SJohn Baldwin #include <dev/nvmf/nvmf.h>
53*ec0cd287SJohn Baldwin #include <dev/nvmf/nvmf_proto.h>
54*ec0cd287SJohn Baldwin #include <dev/nvmf/nvmf_tcp.h>
55*ec0cd287SJohn Baldwin #include <dev/nvmf/nvmf_transport.h>
56*ec0cd287SJohn Baldwin #include <dev/nvmf/nvmf_transport_internal.h>
57*ec0cd287SJohn Baldwin 
58*ec0cd287SJohn Baldwin #include <vm/pmap.h>
59*ec0cd287SJohn Baldwin #include <vm/vm_page.h>
60*ec0cd287SJohn Baldwin 
61*ec0cd287SJohn Baldwin #include "common/common.h"
62*ec0cd287SJohn Baldwin #include "common/t4_regs.h"
63*ec0cd287SJohn Baldwin #include "common/t4_tcb.h"
64*ec0cd287SJohn Baldwin #include "tom/t4_tom.h"
65*ec0cd287SJohn Baldwin 
66*ec0cd287SJohn Baldwin /* Status code values in CPL_NVMT_CMP. */
67*ec0cd287SJohn Baldwin #define	CMP_STATUS_ERROR_MASK		0x7f
68*ec0cd287SJohn Baldwin #define	CMP_STATUS_NO_ERROR		0
69*ec0cd287SJohn Baldwin #define	CMP_STATUS_HEADER_DIGEST	1
70*ec0cd287SJohn Baldwin #define	CMP_STATUS_DIRECTION_MISMATCH	2
71*ec0cd287SJohn Baldwin #define	CMP_STATUS_DIGEST_FLAG_MISMATCH	3
72*ec0cd287SJohn Baldwin #define	CMP_STATUS_SUCCESS_NOT_LAST	4
73*ec0cd287SJohn Baldwin #define	CMP_STATUS_BAD_DATA_LENGTH	5
74*ec0cd287SJohn Baldwin #define	CMP_STATUS_USER_MODE_UNALLOCATED	6
75*ec0cd287SJohn Baldwin #define	CMP_STATUS_RQT_LIMIT		7
76*ec0cd287SJohn Baldwin #define	CMP_STATUS_RQT_WRAP		8
77*ec0cd287SJohn Baldwin #define	CMP_STATUS_RQT_BOUND		9
78*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_LIMIT		16
79*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_INVALID		17
80*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_COLOUR_MISMATCH	18
81*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_MISC		19
82*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_WRAP		20
83*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_BOUND		21
84*ec0cd287SJohn Baldwin #define	CMP_STATUS_TPT_LAST_PDU_UNALIGNED	22
85*ec0cd287SJohn Baldwin #define	CMP_STATUS_PBL_LIMIT		24
86*ec0cd287SJohn Baldwin #define	CMP_STATUS_DATA_DIGEST		25
87*ec0cd287SJohn Baldwin #define	CMP_STATUS_DDP			0x80
88*ec0cd287SJohn Baldwin 
89*ec0cd287SJohn Baldwin /*
90*ec0cd287SJohn Baldwin  * Transfer tags and CIDs with the MSB set are "unallocated" tags that
91*ec0cd287SJohn Baldwin  * pass data through to the freelist without using DDP.
92*ec0cd287SJohn Baldwin  */
93*ec0cd287SJohn Baldwin #define	CHE_FL_TAG_MASK		0x8000
94*ec0cd287SJohn Baldwin #define	CHE_MAX_FL_TAG		0x7fff
95*ec0cd287SJohn Baldwin #define	CHE_NUM_FL_TAGS		(CHE_MAX_FL_TAG + 1)
96*ec0cd287SJohn Baldwin 
97*ec0cd287SJohn Baldwin #define	CHE_TAG_IS_FL(ttag)	(((ttag) & CHE_FL_TAG_MASK) == CHE_FL_TAG_MASK)
98*ec0cd287SJohn Baldwin #define	CHE_RAW_FL_TAG(ttag)	((ttag) & ~CHE_FL_TAG_MASK)
99*ec0cd287SJohn Baldwin #define	CHE_DDP_TAG(stag_idx, color)	((stag_idx) << 4 | (color))
100*ec0cd287SJohn Baldwin #define	CHE_STAG_COLOR(stag)	((stag) & 0xf)
101*ec0cd287SJohn Baldwin #define	CHE_STAG_IDX(stag)	((stag) >> 4)
102*ec0cd287SJohn Baldwin #define	CHE_DDP_MAX_COLOR	0xf
103*ec0cd287SJohn Baldwin 
104*ec0cd287SJohn Baldwin #define	CHE_DDP_NO_TAG		0xffff
105*ec0cd287SJohn Baldwin 
106*ec0cd287SJohn Baldwin /*
107*ec0cd287SJohn Baldwin  * A bitmap of non-DDP CIDs in use on the host.  Since there is no
108*ec0cd287SJohn Baldwin  * _BIT_FFC (find first clear), the bitset is inverted so that a clear
109*ec0cd287SJohn Baldwin  * bit indicates an in-use CID.
110*ec0cd287SJohn Baldwin  */
111*ec0cd287SJohn Baldwin BITSET_DEFINE(fl_cid_set, CHE_NUM_FL_TAGS);
112*ec0cd287SJohn Baldwin #define	FL_CID_INIT(p)		__BIT_FILL(CHE_NUM_FL_TAGS, p)
113*ec0cd287SJohn Baldwin #define	FL_CID_BUSY(n, p)	__BIT_CLR(CHE_NUM_FL_TAGS, n, p)
114*ec0cd287SJohn Baldwin #define	FL_CID_ISACTIVE(n, p)	!__BIT_ISSET(CHE_NUM_FL_TAGS, n, p)
115*ec0cd287SJohn Baldwin #define	FL_CID_FREE(n, p)	__BIT_SET(CHE_NUM_FL_TAGS, n, p)
116*ec0cd287SJohn Baldwin #define	FL_CID_FINDFREE_AT(p, start)	__BIT_FFS_AT(CHE_NUM_FL_TAGS, p, start)
117*ec0cd287SJohn Baldwin 
118*ec0cd287SJohn Baldwin /*
119*ec0cd287SJohn Baldwin  * The TCP sequence number of both CPL_NVMT_DATA and CPL_NVMT_CMP
120*ec0cd287SJohn Baldwin  * mbufs are saved here while the mbuf is in qp->rx_data and qp->rx_pdus.
121*ec0cd287SJohn Baldwin  */
122*ec0cd287SJohn Baldwin #define	nvmf_tcp_seq	PH_loc.thirtytwo[0]
123*ec0cd287SJohn Baldwin 
124*ec0cd287SJohn Baldwin /*
125*ec0cd287SJohn Baldwin  * The CPL status of CPL_NVMT_CMP mbufs are saved here while the mbuf
126*ec0cd287SJohn Baldwin  * is in qp->rx_pdus.
127*ec0cd287SJohn Baldwin  */
128*ec0cd287SJohn Baldwin #define	nvmf_cpl_status	PH_loc.eight[4]
129*ec0cd287SJohn Baldwin 
130*ec0cd287SJohn Baldwin struct nvmf_che_capsule;
131*ec0cd287SJohn Baldwin struct nvmf_che_qpair;
132*ec0cd287SJohn Baldwin 
133*ec0cd287SJohn Baldwin struct nvmf_che_adapter {
134*ec0cd287SJohn Baldwin 	struct adapter *sc;
135*ec0cd287SJohn Baldwin 
136*ec0cd287SJohn Baldwin 	u_int ddp_threshold;
137*ec0cd287SJohn Baldwin 	u_int max_transmit_pdu;
138*ec0cd287SJohn Baldwin 	u_int max_receive_pdu;
139*ec0cd287SJohn Baldwin 	bool nvmt_data_iqe;
140*ec0cd287SJohn Baldwin 
141*ec0cd287SJohn Baldwin 	struct sysctl_ctx_list ctx;	/* from uld_activate to deactivate */
142*ec0cd287SJohn Baldwin };
143*ec0cd287SJohn Baldwin 
144*ec0cd287SJohn Baldwin struct nvmf_che_command_buffer {
145*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp;
146*ec0cd287SJohn Baldwin 
147*ec0cd287SJohn Baldwin 	struct nvmf_io_request io;
148*ec0cd287SJohn Baldwin 	size_t	data_len;
149*ec0cd287SJohn Baldwin 	size_t	data_xfered;
150*ec0cd287SJohn Baldwin 	uint32_t data_offset;
151*ec0cd287SJohn Baldwin 
152*ec0cd287SJohn Baldwin 	u_int	refs;
153*ec0cd287SJohn Baldwin 	int	error;
154*ec0cd287SJohn Baldwin 
155*ec0cd287SJohn Baldwin 	bool	ddp_ok;
156*ec0cd287SJohn Baldwin 	uint16_t cid;
157*ec0cd287SJohn Baldwin 	uint16_t ttag;
158*ec0cd287SJohn Baldwin 	uint16_t original_cid;	/* Host only */
159*ec0cd287SJohn Baldwin 
160*ec0cd287SJohn Baldwin 	TAILQ_ENTRY(nvmf_che_command_buffer) link;
161*ec0cd287SJohn Baldwin 
162*ec0cd287SJohn Baldwin 	/* Fields used for DDP. */
163*ec0cd287SJohn Baldwin 	struct fw_ri_tpte tpte;
164*ec0cd287SJohn Baldwin 	uint64_t *pbl;
165*ec0cd287SJohn Baldwin 	uint32_t pbl_addr;
166*ec0cd287SJohn Baldwin 	uint32_t pbl_len;
167*ec0cd287SJohn Baldwin 
168*ec0cd287SJohn Baldwin 	/* Controller only */
169*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc;
170*ec0cd287SJohn Baldwin };
171*ec0cd287SJohn Baldwin 
172*ec0cd287SJohn Baldwin struct nvmf_che_command_buffer_list {
173*ec0cd287SJohn Baldwin 	TAILQ_HEAD(, nvmf_che_command_buffer) head;
174*ec0cd287SJohn Baldwin 	struct mtx lock;
175*ec0cd287SJohn Baldwin };
176*ec0cd287SJohn Baldwin 
177*ec0cd287SJohn Baldwin struct nvmf_che_qpair {
178*ec0cd287SJohn Baldwin 	struct nvmf_qpair qp;
179*ec0cd287SJohn Baldwin 
180*ec0cd287SJohn Baldwin 	struct socket *so;
181*ec0cd287SJohn Baldwin 	struct toepcb *toep;
182*ec0cd287SJohn Baldwin 	struct nvmf_che_adapter *nca;
183*ec0cd287SJohn Baldwin 
184*ec0cd287SJohn Baldwin 	volatile u_int refs;	/* Every allocated capsule holds a reference */
185*ec0cd287SJohn Baldwin 	uint8_t	txpda;
186*ec0cd287SJohn Baldwin 	uint8_t rxpda;
187*ec0cd287SJohn Baldwin 	bool header_digests;
188*ec0cd287SJohn Baldwin 	bool data_digests;
189*ec0cd287SJohn Baldwin 	uint32_t maxr2t;
190*ec0cd287SJohn Baldwin 	uint32_t maxh2cdata;	/* Controller only */
191*ec0cd287SJohn Baldwin 	uint32_t max_rx_data;
192*ec0cd287SJohn Baldwin 	uint32_t max_tx_data;
193*ec0cd287SJohn Baldwin 	uint32_t max_icd;	/* Host only */
194*ec0cd287SJohn Baldwin 	uint32_t max_ioccsz;	/* Controller only */
195*ec0cd287SJohn Baldwin 	union {
196*ec0cd287SJohn Baldwin 		uint16_t next_fl_ttag;	/* Controller only */
197*ec0cd287SJohn Baldwin 		uint16_t next_cid;	/* Host only */
198*ec0cd287SJohn Baldwin 	};
199*ec0cd287SJohn Baldwin 	uint16_t next_ddp_tag;
200*ec0cd287SJohn Baldwin 	u_int num_fl_ttags;	/* Controller only */
201*ec0cd287SJohn Baldwin 	u_int active_fl_ttags;	/* Controller only */
202*ec0cd287SJohn Baldwin 	u_int num_ddp_tags;
203*ec0cd287SJohn Baldwin 	u_int active_ddp_tags;
204*ec0cd287SJohn Baldwin 	bool send_success;	/* Controller only */
205*ec0cd287SJohn Baldwin 	uint8_t ddp_color;
206*ec0cd287SJohn Baldwin 	uint32_t tpt_offset;
207*ec0cd287SJohn Baldwin 
208*ec0cd287SJohn Baldwin 	/* Receive state. */
209*ec0cd287SJohn Baldwin 	struct thread *rx_thread;
210*ec0cd287SJohn Baldwin 	struct cv rx_cv;
211*ec0cd287SJohn Baldwin 	bool	rx_shutdown;
212*ec0cd287SJohn Baldwin 	int	rx_error;
213*ec0cd287SJohn Baldwin 	struct mbufq rx_data;	/* Data received via CPL_NVMT_DATA. */
214*ec0cd287SJohn Baldwin 	struct mbufq rx_pdus;	/* PDU headers received via CPL_NVMT_CMP. */
215*ec0cd287SJohn Baldwin 
216*ec0cd287SJohn Baldwin 	/* Transmit state. */
217*ec0cd287SJohn Baldwin 	struct thread *tx_thread;
218*ec0cd287SJohn Baldwin 	struct cv tx_cv;
219*ec0cd287SJohn Baldwin 	bool	tx_shutdown;
220*ec0cd287SJohn Baldwin 	STAILQ_HEAD(, nvmf_che_capsule) tx_capsules;
221*ec0cd287SJohn Baldwin 
222*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer_list tx_buffers;
223*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer_list rx_buffers;
224*ec0cd287SJohn Baldwin 
225*ec0cd287SJohn Baldwin 	/*
226*ec0cd287SJohn Baldwin 	 * For the controller, an RX command buffer can be in one of
227*ec0cd287SJohn Baldwin 	 * three locations, all protected by the rx_buffers.lock.  If
228*ec0cd287SJohn Baldwin 	 * a receive request is waiting for either an R2T slot for its
229*ec0cd287SJohn Baldwin 	 * command (due to exceeding MAXR2T), or a transfer tag it is
230*ec0cd287SJohn Baldwin 	 * placed on the rx_buffers list.  When a request is allocated
231*ec0cd287SJohn Baldwin 	 * an active transfer tag, it moves to either the
232*ec0cd287SJohn Baldwin 	 * open_ddp_tags[] or open_fl_ttags[] array (indexed by the
233*ec0cd287SJohn Baldwin 	 * tag) until it completes.
234*ec0cd287SJohn Baldwin 	 *
235*ec0cd287SJohn Baldwin 	 * For the host, an RX command buffer using DDP is in
236*ec0cd287SJohn Baldwin 	 * open_ddp_tags[], otherwise it is in rx_buffers.
237*ec0cd287SJohn Baldwin 	 */
238*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer **open_ddp_tags;
239*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer **open_fl_ttags;	/* Controller only */
240*ec0cd287SJohn Baldwin 
241*ec0cd287SJohn Baldwin 	/*
242*ec0cd287SJohn Baldwin 	 * For the host, CIDs submitted by nvmf(4) must be rewritten
243*ec0cd287SJohn Baldwin 	 * to either use DDP or not use DDP.  The CID in response
244*ec0cd287SJohn Baldwin 	 * capsules must be restored to their original value.  For
245*ec0cd287SJohn Baldwin 	 * DDP, the original CID is stored in the command buffer.
246*ec0cd287SJohn Baldwin 	 * These variables manage non-DDP CIDs.
247*ec0cd287SJohn Baldwin 	 */
248*ec0cd287SJohn Baldwin 	uint16_t *fl_cids;		/* Host only */
249*ec0cd287SJohn Baldwin 	struct fl_cid_set *fl_cid_set;	/* Host only */
250*ec0cd287SJohn Baldwin 	struct mtx fl_cid_lock;		/* Host only */
251*ec0cd287SJohn Baldwin };
252*ec0cd287SJohn Baldwin 
253*ec0cd287SJohn Baldwin struct nvmf_che_rxpdu {
254*ec0cd287SJohn Baldwin 	struct mbuf *m;
255*ec0cd287SJohn Baldwin 	const struct nvme_tcp_common_pdu_hdr *hdr;
256*ec0cd287SJohn Baldwin 	uint32_t data_len;
257*ec0cd287SJohn Baldwin 	bool data_digest_mismatch;
258*ec0cd287SJohn Baldwin 	bool ddp;
259*ec0cd287SJohn Baldwin };
260*ec0cd287SJohn Baldwin 
261*ec0cd287SJohn Baldwin struct nvmf_che_capsule {
262*ec0cd287SJohn Baldwin 	struct nvmf_capsule nc;
263*ec0cd287SJohn Baldwin 
264*ec0cd287SJohn Baldwin 	volatile u_int refs;
265*ec0cd287SJohn Baldwin 
266*ec0cd287SJohn Baldwin 	struct nvmf_che_rxpdu rx_pdu;
267*ec0cd287SJohn Baldwin 
268*ec0cd287SJohn Baldwin 	uint32_t active_r2ts;		/* Controller only */
269*ec0cd287SJohn Baldwin #ifdef INVARIANTS
270*ec0cd287SJohn Baldwin 	uint32_t tx_data_offset;	/* Controller only */
271*ec0cd287SJohn Baldwin 	u_int pending_r2ts;		/* Controller only */
272*ec0cd287SJohn Baldwin #endif
273*ec0cd287SJohn Baldwin 
274*ec0cd287SJohn Baldwin 	STAILQ_ENTRY(nvmf_che_capsule) link;
275*ec0cd287SJohn Baldwin };
276*ec0cd287SJohn Baldwin 
277*ec0cd287SJohn Baldwin #define	CCAP(nc)	((struct nvmf_che_capsule *)(nc))
278*ec0cd287SJohn Baldwin #define	CQP(qp)		((struct nvmf_che_qpair *)(qp))
279*ec0cd287SJohn Baldwin 
280*ec0cd287SJohn Baldwin static void	che_release_capsule(struct nvmf_che_capsule *cc);
281*ec0cd287SJohn Baldwin static void	che_free_qpair(struct nvmf_qpair *nq);
282*ec0cd287SJohn Baldwin 
283*ec0cd287SJohn Baldwin SYSCTL_NODE(_kern_nvmf, OID_AUTO, che, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
284*ec0cd287SJohn Baldwin     "Chelsio TCP offload transport");
285*ec0cd287SJohn Baldwin 
286*ec0cd287SJohn Baldwin static u_int che_max_transmit_pdu = 32 * 1024;
287*ec0cd287SJohn Baldwin SYSCTL_UINT(_kern_nvmf_che, OID_AUTO, max_transmit_pdu, CTLFLAG_RWTUN,
288*ec0cd287SJohn Baldwin     &che_max_transmit_pdu, 0,
289*ec0cd287SJohn Baldwin     "Maximum size of a transmitted PDU");
290*ec0cd287SJohn Baldwin 
291*ec0cd287SJohn Baldwin static u_int che_max_receive_pdu = 32 * 1024;
292*ec0cd287SJohn Baldwin SYSCTL_UINT(_kern_nvmf_che, OID_AUTO, max_receive_pdu, CTLFLAG_RWTUN,
293*ec0cd287SJohn Baldwin     &che_max_receive_pdu, 0,
294*ec0cd287SJohn Baldwin     "Maximum size of a received PDU");
295*ec0cd287SJohn Baldwin 
296*ec0cd287SJohn Baldwin static int use_dsgl = 1;
297*ec0cd287SJohn Baldwin SYSCTL_INT(_kern_nvmf_che, OID_AUTO, use_dsgl, CTLFLAG_RWTUN, &use_dsgl, 0,
298*ec0cd287SJohn Baldwin     "Use DSGL for PBL/FastReg (default=1)");
299*ec0cd287SJohn Baldwin 
300*ec0cd287SJohn Baldwin static int inline_threshold = 256;
301*ec0cd287SJohn Baldwin SYSCTL_INT(_kern_nvmf_che, OID_AUTO, inline_threshold, CTLFLAG_RWTUN,
302*ec0cd287SJohn Baldwin     &inline_threshold, 0,
303*ec0cd287SJohn Baldwin     "inline vs dsgl threshold (default=256)");
304*ec0cd287SJohn Baldwin 
305*ec0cd287SJohn Baldwin static int ddp_tags_per_qp = 128;
306*ec0cd287SJohn Baldwin SYSCTL_INT(_kern_nvmf_che, OID_AUTO, ddp_tags_per_qp, CTLFLAG_RWTUN,
307*ec0cd287SJohn Baldwin     &ddp_tags_per_qp, 0,
308*ec0cd287SJohn Baldwin     "Number of DDP tags to reserve for each queue pair");
309*ec0cd287SJohn Baldwin 
310*ec0cd287SJohn Baldwin static MALLOC_DEFINE(M_NVMF_CHE, "nvmf_che", "Chelsio NVMe-TCP offload");
311*ec0cd287SJohn Baldwin 
312*ec0cd287SJohn Baldwin /*
313*ec0cd287SJohn Baldwin  * PBL regions consist of N full-sized pages.  TPT entries support an
314*ec0cd287SJohn Baldwin  * initial offset into the first page (FBO) and can handle a partial
315*ec0cd287SJohn Baldwin  * length on the last page.
316*ec0cd287SJohn Baldwin  */
317*ec0cd287SJohn Baldwin static bool
che_ddp_io_check(struct nvmf_che_qpair * qp,const struct nvmf_io_request * io)318*ec0cd287SJohn Baldwin che_ddp_io_check(struct nvmf_che_qpair *qp, const struct nvmf_io_request *io)
319*ec0cd287SJohn Baldwin {
320*ec0cd287SJohn Baldwin 	const struct memdesc *mem = &io->io_mem;
321*ec0cd287SJohn Baldwin 	struct bus_dma_segment *ds;
322*ec0cd287SJohn Baldwin 	int i;
323*ec0cd287SJohn Baldwin 
324*ec0cd287SJohn Baldwin 	if (io->io_len < qp->nca->ddp_threshold) {
325*ec0cd287SJohn Baldwin 		return (false);
326*ec0cd287SJohn Baldwin 	}
327*ec0cd287SJohn Baldwin 
328*ec0cd287SJohn Baldwin 	switch (mem->md_type) {
329*ec0cd287SJohn Baldwin 	case MEMDESC_VADDR:
330*ec0cd287SJohn Baldwin 	case MEMDESC_PADDR:
331*ec0cd287SJohn Baldwin 	case MEMDESC_VMPAGES:
332*ec0cd287SJohn Baldwin 		return (true);
333*ec0cd287SJohn Baldwin 	case MEMDESC_VLIST:
334*ec0cd287SJohn Baldwin 	case MEMDESC_PLIST:
335*ec0cd287SJohn Baldwin 		/*
336*ec0cd287SJohn Baldwin 		 * Require all but the first segment to start on a
337*ec0cd287SJohn Baldwin 		 * page boundary.  Require all but the last segment to
338*ec0cd287SJohn Baldwin 		 * end on a page boundary.
339*ec0cd287SJohn Baldwin 		 */
340*ec0cd287SJohn Baldwin 		ds = mem->u.md_list;
341*ec0cd287SJohn Baldwin 		for (i = 0; i < mem->md_nseg; i++, ds++) {
342*ec0cd287SJohn Baldwin 			if (i != 0 && ds->ds_addr % PAGE_SIZE != 0)
343*ec0cd287SJohn Baldwin 				return (false);
344*ec0cd287SJohn Baldwin 			if (i != mem->md_nseg - 1 &&
345*ec0cd287SJohn Baldwin 			    (ds->ds_addr + ds->ds_len) % PAGE_SIZE != 0)
346*ec0cd287SJohn Baldwin 				return (false);
347*ec0cd287SJohn Baldwin 		}
348*ec0cd287SJohn Baldwin 		return (true);
349*ec0cd287SJohn Baldwin 	default:
350*ec0cd287SJohn Baldwin 		/*
351*ec0cd287SJohn Baldwin 		 * Other types could be validated with more work, but
352*ec0cd287SJohn Baldwin 		 * they aren't used currently by nvmf(4) or nvmft(4).
353*ec0cd287SJohn Baldwin 		 */
354*ec0cd287SJohn Baldwin 		return (false);
355*ec0cd287SJohn Baldwin 	}
356*ec0cd287SJohn Baldwin }
357*ec0cd287SJohn Baldwin 
358*ec0cd287SJohn Baldwin static u_int
che_fbo(struct nvmf_che_command_buffer * cb)359*ec0cd287SJohn Baldwin che_fbo(struct nvmf_che_command_buffer *cb)
360*ec0cd287SJohn Baldwin {
361*ec0cd287SJohn Baldwin 	struct memdesc *mem = &cb->io.io_mem;
362*ec0cd287SJohn Baldwin 
363*ec0cd287SJohn Baldwin 	switch (mem->md_type) {
364*ec0cd287SJohn Baldwin 	case MEMDESC_VADDR:
365*ec0cd287SJohn Baldwin 		return ((uintptr_t)mem->u.md_vaddr & PAGE_MASK);
366*ec0cd287SJohn Baldwin 	case MEMDESC_PADDR:
367*ec0cd287SJohn Baldwin 		return (mem->u.md_paddr & PAGE_MASK);
368*ec0cd287SJohn Baldwin 	case MEMDESC_VMPAGES:
369*ec0cd287SJohn Baldwin 		return (mem->md_offset);
370*ec0cd287SJohn Baldwin 	case MEMDESC_VLIST:
371*ec0cd287SJohn Baldwin 	case MEMDESC_PLIST:
372*ec0cd287SJohn Baldwin 		return (mem->u.md_list[0].ds_addr & PAGE_MASK);
373*ec0cd287SJohn Baldwin 	default:
374*ec0cd287SJohn Baldwin 		__assert_unreachable();
375*ec0cd287SJohn Baldwin 	}
376*ec0cd287SJohn Baldwin }
377*ec0cd287SJohn Baldwin 
378*ec0cd287SJohn Baldwin static u_int
che_npages(struct nvmf_che_command_buffer * cb)379*ec0cd287SJohn Baldwin che_npages(struct nvmf_che_command_buffer *cb)
380*ec0cd287SJohn Baldwin {
381*ec0cd287SJohn Baldwin 	return (howmany(che_fbo(cb) + cb->io.io_len, PAGE_SIZE));
382*ec0cd287SJohn Baldwin }
383*ec0cd287SJohn Baldwin 
384*ec0cd287SJohn Baldwin static struct nvmf_che_command_buffer *
che_alloc_command_buffer(struct nvmf_che_qpair * qp,const struct nvmf_io_request * io,uint32_t data_offset,size_t data_len,uint16_t cid)385*ec0cd287SJohn Baldwin che_alloc_command_buffer(struct nvmf_che_qpair *qp,
386*ec0cd287SJohn Baldwin     const struct nvmf_io_request *io, uint32_t data_offset, size_t data_len,
387*ec0cd287SJohn Baldwin     uint16_t cid)
388*ec0cd287SJohn Baldwin {
389*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
390*ec0cd287SJohn Baldwin 
391*ec0cd287SJohn Baldwin 	cb = malloc(sizeof(*cb), M_NVMF_CHE, M_WAITOK);
392*ec0cd287SJohn Baldwin 	cb->qp = qp;
393*ec0cd287SJohn Baldwin 	cb->io = *io;
394*ec0cd287SJohn Baldwin 	cb->data_offset = data_offset;
395*ec0cd287SJohn Baldwin 	cb->data_len = data_len;
396*ec0cd287SJohn Baldwin 	cb->data_xfered = 0;
397*ec0cd287SJohn Baldwin 	refcount_init(&cb->refs, 1);
398*ec0cd287SJohn Baldwin 	cb->error = 0;
399*ec0cd287SJohn Baldwin 	cb->ddp_ok = che_ddp_io_check(qp, io);
400*ec0cd287SJohn Baldwin 	cb->cid = cid;
401*ec0cd287SJohn Baldwin 	cb->ttag = 0;
402*ec0cd287SJohn Baldwin 	cb->original_cid = 0;
403*ec0cd287SJohn Baldwin 	cb->cc = NULL;
404*ec0cd287SJohn Baldwin 	cb->pbl = NULL;
405*ec0cd287SJohn Baldwin 
406*ec0cd287SJohn Baldwin 	return (cb);
407*ec0cd287SJohn Baldwin }
408*ec0cd287SJohn Baldwin 
409*ec0cd287SJohn Baldwin static void
che_hold_command_buffer(struct nvmf_che_command_buffer * cb)410*ec0cd287SJohn Baldwin che_hold_command_buffer(struct nvmf_che_command_buffer *cb)
411*ec0cd287SJohn Baldwin {
412*ec0cd287SJohn Baldwin 	refcount_acquire(&cb->refs);
413*ec0cd287SJohn Baldwin }
414*ec0cd287SJohn Baldwin 
415*ec0cd287SJohn Baldwin static void
che_free_command_buffer(struct nvmf_che_command_buffer * cb)416*ec0cd287SJohn Baldwin che_free_command_buffer(struct nvmf_che_command_buffer *cb)
417*ec0cd287SJohn Baldwin {
418*ec0cd287SJohn Baldwin 	nvmf_complete_io_request(&cb->io, cb->data_xfered, cb->error);
419*ec0cd287SJohn Baldwin 	if (cb->cc != NULL)
420*ec0cd287SJohn Baldwin 		che_release_capsule(cb->cc);
421*ec0cd287SJohn Baldwin 	MPASS(cb->pbl == NULL);
422*ec0cd287SJohn Baldwin 	free(cb, M_NVMF_CHE);
423*ec0cd287SJohn Baldwin }
424*ec0cd287SJohn Baldwin 
425*ec0cd287SJohn Baldwin static void
che_release_command_buffer(struct nvmf_che_command_buffer * cb)426*ec0cd287SJohn Baldwin che_release_command_buffer(struct nvmf_che_command_buffer *cb)
427*ec0cd287SJohn Baldwin {
428*ec0cd287SJohn Baldwin 	if (refcount_release(&cb->refs))
429*ec0cd287SJohn Baldwin 		che_free_command_buffer(cb);
430*ec0cd287SJohn Baldwin }
431*ec0cd287SJohn Baldwin 
432*ec0cd287SJohn Baldwin static void
che_add_command_buffer(struct nvmf_che_command_buffer_list * list,struct nvmf_che_command_buffer * cb)433*ec0cd287SJohn Baldwin che_add_command_buffer(struct nvmf_che_command_buffer_list *list,
434*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
435*ec0cd287SJohn Baldwin {
436*ec0cd287SJohn Baldwin 	mtx_assert(&list->lock, MA_OWNED);
437*ec0cd287SJohn Baldwin 	TAILQ_INSERT_HEAD(&list->head, cb, link);
438*ec0cd287SJohn Baldwin }
439*ec0cd287SJohn Baldwin 
440*ec0cd287SJohn Baldwin static struct nvmf_che_command_buffer *
che_find_command_buffer(struct nvmf_che_command_buffer_list * list,uint16_t cid)441*ec0cd287SJohn Baldwin che_find_command_buffer(struct nvmf_che_command_buffer_list *list,
442*ec0cd287SJohn Baldwin     uint16_t cid)
443*ec0cd287SJohn Baldwin {
444*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
445*ec0cd287SJohn Baldwin 
446*ec0cd287SJohn Baldwin 	mtx_assert(&list->lock, MA_OWNED);
447*ec0cd287SJohn Baldwin 	TAILQ_FOREACH(cb, &list->head, link) {
448*ec0cd287SJohn Baldwin 		if (cb->cid == cid)
449*ec0cd287SJohn Baldwin 			return (cb);
450*ec0cd287SJohn Baldwin 	}
451*ec0cd287SJohn Baldwin 	return (NULL);
452*ec0cd287SJohn Baldwin }
453*ec0cd287SJohn Baldwin 
454*ec0cd287SJohn Baldwin static void
che_remove_command_buffer(struct nvmf_che_command_buffer_list * list,struct nvmf_che_command_buffer * cb)455*ec0cd287SJohn Baldwin che_remove_command_buffer(struct nvmf_che_command_buffer_list *list,
456*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
457*ec0cd287SJohn Baldwin {
458*ec0cd287SJohn Baldwin 	mtx_assert(&list->lock, MA_OWNED);
459*ec0cd287SJohn Baldwin 	TAILQ_REMOVE(&list->head, cb, link);
460*ec0cd287SJohn Baldwin }
461*ec0cd287SJohn Baldwin 
462*ec0cd287SJohn Baldwin static void
che_purge_command_buffer(struct nvmf_che_command_buffer_list * list,uint16_t cid)463*ec0cd287SJohn Baldwin che_purge_command_buffer(struct nvmf_che_command_buffer_list *list,
464*ec0cd287SJohn Baldwin     uint16_t cid)
465*ec0cd287SJohn Baldwin {
466*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
467*ec0cd287SJohn Baldwin 
468*ec0cd287SJohn Baldwin 	mtx_lock(&list->lock);
469*ec0cd287SJohn Baldwin 	cb = che_find_command_buffer(list, cid);
470*ec0cd287SJohn Baldwin 	if (cb != NULL) {
471*ec0cd287SJohn Baldwin 		che_remove_command_buffer(list, cb);
472*ec0cd287SJohn Baldwin 		mtx_unlock(&list->lock);
473*ec0cd287SJohn Baldwin 		che_release_command_buffer(cb);
474*ec0cd287SJohn Baldwin 	} else
475*ec0cd287SJohn Baldwin 		mtx_unlock(&list->lock);
476*ec0cd287SJohn Baldwin }
477*ec0cd287SJohn Baldwin 
478*ec0cd287SJohn Baldwin static int
che_write_mem_inline(struct adapter * sc,struct toepcb * toep,uint32_t addr,uint32_t len,void * data,struct mbufq * wrq)479*ec0cd287SJohn Baldwin che_write_mem_inline(struct adapter *sc, struct toepcb *toep, uint32_t addr,
480*ec0cd287SJohn Baldwin     uint32_t len, void *data, struct mbufq *wrq)
481*ec0cd287SJohn Baldwin {
482*ec0cd287SJohn Baldwin 	struct mbuf *m;
483*ec0cd287SJohn Baldwin 	char *cp;
484*ec0cd287SJohn Baldwin 	int copy_len, i, num_wqe, wr_len;
485*ec0cd287SJohn Baldwin 
486*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
487*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: addr 0x%x len %u", __func__, addr << 5, len);
488*ec0cd287SJohn Baldwin #endif
489*ec0cd287SJohn Baldwin 	num_wqe = DIV_ROUND_UP(len, T4_MAX_INLINE_SIZE);
490*ec0cd287SJohn Baldwin 	cp = data;
491*ec0cd287SJohn Baldwin 	for (i = 0; i < num_wqe; i++) {
492*ec0cd287SJohn Baldwin 		copy_len = min(len, T4_MAX_INLINE_SIZE);
493*ec0cd287SJohn Baldwin 		wr_len = T4_WRITE_MEM_INLINE_LEN(copy_len);
494*ec0cd287SJohn Baldwin 
495*ec0cd287SJohn Baldwin 		m = alloc_raw_wr_mbuf(wr_len);
496*ec0cd287SJohn Baldwin 		if (m == NULL)
497*ec0cd287SJohn Baldwin 			return (ENOMEM);
498*ec0cd287SJohn Baldwin 		t4_write_mem_inline_wr(sc, mtod(m, void *), wr_len, toep->tid,
499*ec0cd287SJohn Baldwin 		    addr, copy_len, cp, 0);
500*ec0cd287SJohn Baldwin 		if (cp != NULL)
501*ec0cd287SJohn Baldwin 			cp += T4_MAX_INLINE_SIZE;
502*ec0cd287SJohn Baldwin 		addr += T4_MAX_INLINE_SIZE >> 5;
503*ec0cd287SJohn Baldwin 		len -= T4_MAX_INLINE_SIZE;
504*ec0cd287SJohn Baldwin 
505*ec0cd287SJohn Baldwin 		mbufq_enqueue(wrq, m);
506*ec0cd287SJohn Baldwin 	}
507*ec0cd287SJohn Baldwin 	return (0);
508*ec0cd287SJohn Baldwin }
509*ec0cd287SJohn Baldwin 
510*ec0cd287SJohn Baldwin static int
che_write_mem_dma_aligned(struct adapter * sc,struct toepcb * toep,uint32_t addr,uint32_t len,void * data,struct mbufq * wrq)511*ec0cd287SJohn Baldwin che_write_mem_dma_aligned(struct adapter *sc, struct toepcb *toep,
512*ec0cd287SJohn Baldwin     uint32_t addr, uint32_t len, void *data, struct mbufq *wrq)
513*ec0cd287SJohn Baldwin {
514*ec0cd287SJohn Baldwin 	struct mbuf *m;
515*ec0cd287SJohn Baldwin 	vm_offset_t va;
516*ec0cd287SJohn Baldwin 	u_int todo;
517*ec0cd287SJohn Baldwin 	int wr_len;
518*ec0cd287SJohn Baldwin 
519*ec0cd287SJohn Baldwin 	/* First page. */
520*ec0cd287SJohn Baldwin 	va = (vm_offset_t)data;
521*ec0cd287SJohn Baldwin 	todo = min(PAGE_SIZE - (va % PAGE_SIZE), len);
522*ec0cd287SJohn Baldwin 	wr_len = T4_WRITE_MEM_DMA_LEN;
523*ec0cd287SJohn Baldwin 	m = alloc_raw_wr_mbuf(wr_len);
524*ec0cd287SJohn Baldwin 	if (m == NULL)
525*ec0cd287SJohn Baldwin 		return (ENOMEM);
526*ec0cd287SJohn Baldwin 	t4_write_mem_dma_wr(sc, mtod(m, void *), wr_len, toep->tid, addr,
527*ec0cd287SJohn Baldwin 	    todo, pmap_kextract(va), 0);
528*ec0cd287SJohn Baldwin 	mbufq_enqueue(wrq, m);
529*ec0cd287SJohn Baldwin 	len -= todo;
530*ec0cd287SJohn Baldwin 	addr += todo >> 5;
531*ec0cd287SJohn Baldwin 	va += todo;
532*ec0cd287SJohn Baldwin 
533*ec0cd287SJohn Baldwin 	while (len > 0) {
534*ec0cd287SJohn Baldwin 		MPASS(va == trunc_page(va));
535*ec0cd287SJohn Baldwin 		todo = min(PAGE_SIZE, len);
536*ec0cd287SJohn Baldwin 		m = alloc_raw_wr_mbuf(wr_len);
537*ec0cd287SJohn Baldwin 		if (m == NULL)
538*ec0cd287SJohn Baldwin 			return (ENOMEM);
539*ec0cd287SJohn Baldwin 		t4_write_mem_dma_wr(sc, mtod(m, void *), wr_len, toep->tid,
540*ec0cd287SJohn Baldwin 		    addr, todo, pmap_kextract(va), 0);
541*ec0cd287SJohn Baldwin 		mbufq_enqueue(wrq, m);
542*ec0cd287SJohn Baldwin 		len -= todo;
543*ec0cd287SJohn Baldwin 		addr += todo >> 5;
544*ec0cd287SJohn Baldwin 		va += todo;
545*ec0cd287SJohn Baldwin 	}
546*ec0cd287SJohn Baldwin 	return (0);
547*ec0cd287SJohn Baldwin }
548*ec0cd287SJohn Baldwin 
549*ec0cd287SJohn Baldwin static int
che_write_adapter_mem(struct nvmf_che_qpair * qp,uint32_t addr,uint32_t len,void * data)550*ec0cd287SJohn Baldwin che_write_adapter_mem(struct nvmf_che_qpair *qp, uint32_t addr, uint32_t len,
551*ec0cd287SJohn Baldwin     void *data)
552*ec0cd287SJohn Baldwin {
553*ec0cd287SJohn Baldwin 	struct adapter *sc = qp->nca->sc;
554*ec0cd287SJohn Baldwin 	struct toepcb *toep = qp->toep;
555*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
556*ec0cd287SJohn Baldwin 	struct inpcb *inp = sotoinpcb(so);
557*ec0cd287SJohn Baldwin 	struct mbufq mq;
558*ec0cd287SJohn Baldwin 	int error;
559*ec0cd287SJohn Baldwin 
560*ec0cd287SJohn Baldwin 	mbufq_init(&mq, INT_MAX);
561*ec0cd287SJohn Baldwin 	if (!use_dsgl || len < inline_threshold || data == NULL)
562*ec0cd287SJohn Baldwin 		error = che_write_mem_inline(sc, toep, addr, len, data, &mq);
563*ec0cd287SJohn Baldwin 	else
564*ec0cd287SJohn Baldwin 		error = che_write_mem_dma_aligned(sc, toep, addr, len, data,
565*ec0cd287SJohn Baldwin 		    &mq);
566*ec0cd287SJohn Baldwin 	if (__predict_false(error != 0))
567*ec0cd287SJohn Baldwin 		goto error;
568*ec0cd287SJohn Baldwin 
569*ec0cd287SJohn Baldwin 	INP_WLOCK(inp);
570*ec0cd287SJohn Baldwin 	if ((inp->inp_flags & INP_DROPPED) != 0) {
571*ec0cd287SJohn Baldwin 		INP_WUNLOCK(inp);
572*ec0cd287SJohn Baldwin 		error = ECONNRESET;
573*ec0cd287SJohn Baldwin 		goto error;
574*ec0cd287SJohn Baldwin 	}
575*ec0cd287SJohn Baldwin 	mbufq_concat(&toep->ulp_pduq, &mq);
576*ec0cd287SJohn Baldwin 	INP_WUNLOCK(inp);
577*ec0cd287SJohn Baldwin 	return (0);
578*ec0cd287SJohn Baldwin 
579*ec0cd287SJohn Baldwin error:
580*ec0cd287SJohn Baldwin 	mbufq_drain(&mq);
581*ec0cd287SJohn Baldwin 	return (error);
582*ec0cd287SJohn Baldwin }
583*ec0cd287SJohn Baldwin 
584*ec0cd287SJohn Baldwin static bool
che_alloc_pbl(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)585*ec0cd287SJohn Baldwin che_alloc_pbl(struct nvmf_che_qpair *qp, struct nvmf_che_command_buffer *cb)
586*ec0cd287SJohn Baldwin {
587*ec0cd287SJohn Baldwin 	struct adapter *sc = qp->nca->sc;
588*ec0cd287SJohn Baldwin 	struct memdesc *mem = &cb->io.io_mem;
589*ec0cd287SJohn Baldwin 	uint64_t *pbl;
590*ec0cd287SJohn Baldwin 	uint32_t addr, len;
591*ec0cd287SJohn Baldwin 	u_int i, npages;
592*ec0cd287SJohn Baldwin 	int error;
593*ec0cd287SJohn Baldwin 
594*ec0cd287SJohn Baldwin 	MPASS(cb->pbl == NULL);
595*ec0cd287SJohn Baldwin 	MPASS(cb->ddp_ok);
596*ec0cd287SJohn Baldwin 
597*ec0cd287SJohn Baldwin 	/* Hardware limit?  iWARP only enforces this for T5. */
598*ec0cd287SJohn Baldwin 	if (cb->io.io_len >= (8 * 1024 * 1024 * 1024ULL))
599*ec0cd287SJohn Baldwin 		return (false);
600*ec0cd287SJohn Baldwin 
601*ec0cd287SJohn Baldwin 	npages = che_npages(cb);
602*ec0cd287SJohn Baldwin 	len = roundup2(npages, 4) * sizeof(*cb->pbl);
603*ec0cd287SJohn Baldwin 	addr = t4_pblpool_alloc(sc, len);
604*ec0cd287SJohn Baldwin 	if (addr == 0)
605*ec0cd287SJohn Baldwin 		return (false);
606*ec0cd287SJohn Baldwin 
607*ec0cd287SJohn Baldwin 	pbl = malloc(len, M_NVMF_CHE, M_NOWAIT | M_ZERO);
608*ec0cd287SJohn Baldwin 	if (pbl == NULL) {
609*ec0cd287SJohn Baldwin 		t4_pblpool_free(sc, addr, len);
610*ec0cd287SJohn Baldwin 		return (false);
611*ec0cd287SJohn Baldwin 	}
612*ec0cd287SJohn Baldwin 
613*ec0cd287SJohn Baldwin 	switch (mem->md_type) {
614*ec0cd287SJohn Baldwin 	case MEMDESC_VADDR:
615*ec0cd287SJohn Baldwin 	{
616*ec0cd287SJohn Baldwin 		vm_offset_t va;
617*ec0cd287SJohn Baldwin 
618*ec0cd287SJohn Baldwin 		va = trunc_page((uintptr_t)mem->u.md_vaddr);
619*ec0cd287SJohn Baldwin 		for (i = 0; i < npages; i++)
620*ec0cd287SJohn Baldwin 			pbl[i] = htobe64(pmap_kextract(va + i * PAGE_SIZE));
621*ec0cd287SJohn Baldwin 		break;
622*ec0cd287SJohn Baldwin 	}
623*ec0cd287SJohn Baldwin 	case MEMDESC_PADDR:
624*ec0cd287SJohn Baldwin 	{
625*ec0cd287SJohn Baldwin 		vm_paddr_t pa;
626*ec0cd287SJohn Baldwin 
627*ec0cd287SJohn Baldwin 		pa = trunc_page(mem->u.md_paddr);
628*ec0cd287SJohn Baldwin 		for (i = 0; i < npages; i++)
629*ec0cd287SJohn Baldwin 			pbl[i] = htobe64(pa + i * PAGE_SIZE);
630*ec0cd287SJohn Baldwin 		break;
631*ec0cd287SJohn Baldwin 	}
632*ec0cd287SJohn Baldwin 	case MEMDESC_VMPAGES:
633*ec0cd287SJohn Baldwin 		for (i = 0; i < npages; i++)
634*ec0cd287SJohn Baldwin 			pbl[i] = htobe64(VM_PAGE_TO_PHYS(mem->u.md_ma[i]));
635*ec0cd287SJohn Baldwin 		break;
636*ec0cd287SJohn Baldwin 	case MEMDESC_VLIST:
637*ec0cd287SJohn Baldwin 	{
638*ec0cd287SJohn Baldwin 		struct bus_dma_segment *ds;
639*ec0cd287SJohn Baldwin 		vm_offset_t va;
640*ec0cd287SJohn Baldwin 		vm_size_t len;
641*ec0cd287SJohn Baldwin 		u_int j, k;
642*ec0cd287SJohn Baldwin 
643*ec0cd287SJohn Baldwin 		i = 0;
644*ec0cd287SJohn Baldwin 		ds = mem->u.md_list;
645*ec0cd287SJohn Baldwin 		for (j = 0; j < mem->md_nseg; j++, ds++) {
646*ec0cd287SJohn Baldwin 			va = trunc_page((uintptr_t)ds->ds_addr);
647*ec0cd287SJohn Baldwin 			len = ds->ds_len;
648*ec0cd287SJohn Baldwin 			if (ds->ds_addr % PAGE_SIZE != 0)
649*ec0cd287SJohn Baldwin 				len += ds->ds_addr % PAGE_SIZE;
650*ec0cd287SJohn Baldwin 			for (k = 0; k < howmany(len, PAGE_SIZE); k++) {
651*ec0cd287SJohn Baldwin 				pbl[i] = htobe64(pmap_kextract(va +
652*ec0cd287SJohn Baldwin 					k * PAGE_SIZE));
653*ec0cd287SJohn Baldwin 				i++;
654*ec0cd287SJohn Baldwin 			}
655*ec0cd287SJohn Baldwin 		}
656*ec0cd287SJohn Baldwin 		MPASS(i == npages);
657*ec0cd287SJohn Baldwin 		break;
658*ec0cd287SJohn Baldwin 	}
659*ec0cd287SJohn Baldwin 	case MEMDESC_PLIST:
660*ec0cd287SJohn Baldwin 	{
661*ec0cd287SJohn Baldwin 		struct bus_dma_segment *ds;
662*ec0cd287SJohn Baldwin 		vm_paddr_t pa;
663*ec0cd287SJohn Baldwin 		vm_size_t len;
664*ec0cd287SJohn Baldwin 		u_int j, k;
665*ec0cd287SJohn Baldwin 
666*ec0cd287SJohn Baldwin 		i = 0;
667*ec0cd287SJohn Baldwin 		ds = mem->u.md_list;
668*ec0cd287SJohn Baldwin 		for (j = 0; j < mem->md_nseg; j++, ds++) {
669*ec0cd287SJohn Baldwin 			pa = trunc_page((vm_paddr_t)ds->ds_addr);
670*ec0cd287SJohn Baldwin 			len = ds->ds_len;
671*ec0cd287SJohn Baldwin 			if (ds->ds_addr % PAGE_SIZE != 0)
672*ec0cd287SJohn Baldwin 				len += ds->ds_addr % PAGE_SIZE;
673*ec0cd287SJohn Baldwin 			for (k = 0; k < howmany(len, PAGE_SIZE); k++) {
674*ec0cd287SJohn Baldwin 				pbl[i] = htobe64(pa + k * PAGE_SIZE);
675*ec0cd287SJohn Baldwin 				i++;
676*ec0cd287SJohn Baldwin 			}
677*ec0cd287SJohn Baldwin 		}
678*ec0cd287SJohn Baldwin 		MPASS(i == npages);
679*ec0cd287SJohn Baldwin 		break;
680*ec0cd287SJohn Baldwin 	}
681*ec0cd287SJohn Baldwin 	default:
682*ec0cd287SJohn Baldwin 		__assert_unreachable();
683*ec0cd287SJohn Baldwin 	}
684*ec0cd287SJohn Baldwin 
685*ec0cd287SJohn Baldwin 	error = che_write_adapter_mem(qp, addr >> 5, len, pbl);
686*ec0cd287SJohn Baldwin 	if (error != 0) {
687*ec0cd287SJohn Baldwin 		t4_pblpool_free(sc, addr, len);
688*ec0cd287SJohn Baldwin 		free(pbl, M_NVMF_CHE);
689*ec0cd287SJohn Baldwin 		return (false);
690*ec0cd287SJohn Baldwin 	}
691*ec0cd287SJohn Baldwin 
692*ec0cd287SJohn Baldwin 	cb->pbl = pbl;
693*ec0cd287SJohn Baldwin 	cb->pbl_addr = addr;
694*ec0cd287SJohn Baldwin 	cb->pbl_len = len;
695*ec0cd287SJohn Baldwin 
696*ec0cd287SJohn Baldwin 	return (true);
697*ec0cd287SJohn Baldwin }
698*ec0cd287SJohn Baldwin 
699*ec0cd287SJohn Baldwin static void
che_free_pbl(struct nvmf_che_command_buffer * cb)700*ec0cd287SJohn Baldwin che_free_pbl(struct nvmf_che_command_buffer *cb)
701*ec0cd287SJohn Baldwin {
702*ec0cd287SJohn Baldwin 	free(cb->pbl, M_NVMF_CHE);
703*ec0cd287SJohn Baldwin 	t4_pblpool_free(cb->qp->nca->sc, cb->pbl_addr, cb->pbl_len);
704*ec0cd287SJohn Baldwin 	cb->pbl = NULL;
705*ec0cd287SJohn Baldwin 	cb->pbl_addr = 0;
706*ec0cd287SJohn Baldwin 	cb->pbl_len = 0;
707*ec0cd287SJohn Baldwin }
708*ec0cd287SJohn Baldwin 
709*ec0cd287SJohn Baldwin static bool
che_write_tpt_entry(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb,uint16_t stag)710*ec0cd287SJohn Baldwin che_write_tpt_entry(struct nvmf_che_qpair *qp,
711*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb, uint16_t stag)
712*ec0cd287SJohn Baldwin {
713*ec0cd287SJohn Baldwin 	uint32_t tpt_addr;
714*ec0cd287SJohn Baldwin 	int error;
715*ec0cd287SJohn Baldwin 
716*ec0cd287SJohn Baldwin 	cb->tpte.valid_to_pdid = htobe32(F_FW_RI_TPTE_VALID |
717*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_STAGKEY(CHE_STAG_COLOR(stag)) |
718*ec0cd287SJohn Baldwin 	    F_FW_RI_TPTE_STAGSTATE |
719*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_STAGTYPE(FW_RI_STAG_NSMR) |
720*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_PDID(0));
721*ec0cd287SJohn Baldwin 	cb->tpte.locread_to_qpid = htobe32(
722*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_PERM(FW_RI_MEM_ACCESS_REM_WRITE) |
723*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_ADDRTYPE(FW_RI_ZERO_BASED_TO) |
724*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_PS(PAGE_SIZE) |
725*ec0cd287SJohn Baldwin 	    V_FW_RI_TPTE_QPID(qp->toep->tid));
726*ec0cd287SJohn Baldwin #define PBL_OFF(qp, a)	((a) - (qp)->nca->sc->vres.pbl.start)
727*ec0cd287SJohn Baldwin 	cb->tpte.nosnoop_pbladdr =
728*ec0cd287SJohn Baldwin 	    htobe32(V_FW_RI_TPTE_PBLADDR(PBL_OFF(qp, cb->pbl_addr) >> 3));
729*ec0cd287SJohn Baldwin 	cb->tpte.len_lo = htobe32(cb->data_len);
730*ec0cd287SJohn Baldwin 	cb->tpte.va_hi = 0;
731*ec0cd287SJohn Baldwin 	cb->tpte.va_lo_fbo = htobe32(che_fbo(cb));
732*ec0cd287SJohn Baldwin 	cb->tpte.dca_mwbcnt_pstag = 0;
733*ec0cd287SJohn Baldwin 	cb->tpte.len_hi = htobe32(cb->data_offset);
734*ec0cd287SJohn Baldwin 
735*ec0cd287SJohn Baldwin 	tpt_addr = qp->tpt_offset + CHE_STAG_IDX(stag) +
736*ec0cd287SJohn Baldwin 	    (qp->nca->sc->vres.stag.start >> 5);
737*ec0cd287SJohn Baldwin 
738*ec0cd287SJohn Baldwin 	error = che_write_adapter_mem(qp, tpt_addr, sizeof(cb->tpte),
739*ec0cd287SJohn Baldwin 	    &cb->tpte);
740*ec0cd287SJohn Baldwin 	return (error == 0);
741*ec0cd287SJohn Baldwin }
742*ec0cd287SJohn Baldwin 
743*ec0cd287SJohn Baldwin static void
che_clear_tpt_entry(struct nvmf_che_qpair * qp,uint16_t stag)744*ec0cd287SJohn Baldwin che_clear_tpt_entry(struct nvmf_che_qpair *qp, uint16_t stag)
745*ec0cd287SJohn Baldwin {
746*ec0cd287SJohn Baldwin 	uint32_t tpt_addr;
747*ec0cd287SJohn Baldwin 
748*ec0cd287SJohn Baldwin 	tpt_addr = qp->tpt_offset + CHE_STAG_IDX(stag) +
749*ec0cd287SJohn Baldwin 	    (qp->nca->sc->vres.stag.start >> 5);
750*ec0cd287SJohn Baldwin 
751*ec0cd287SJohn Baldwin 	(void)che_write_adapter_mem(qp, tpt_addr, sizeof(struct fw_ri_tpte),
752*ec0cd287SJohn Baldwin 	    NULL);
753*ec0cd287SJohn Baldwin }
754*ec0cd287SJohn Baldwin 
755*ec0cd287SJohn Baldwin static uint16_t
che_alloc_ddp_stag(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)756*ec0cd287SJohn Baldwin che_alloc_ddp_stag(struct nvmf_che_qpair *qp,
757*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
758*ec0cd287SJohn Baldwin {
759*ec0cd287SJohn Baldwin 	uint16_t stag_idx;
760*ec0cd287SJohn Baldwin 
761*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
762*ec0cd287SJohn Baldwin 	MPASS(cb->ddp_ok);
763*ec0cd287SJohn Baldwin 
764*ec0cd287SJohn Baldwin 	if (qp->active_ddp_tags == qp->num_ddp_tags)
765*ec0cd287SJohn Baldwin 		return (CHE_DDP_NO_TAG);
766*ec0cd287SJohn Baldwin 
767*ec0cd287SJohn Baldwin 	MPASS(qp->num_ddp_tags != 0);
768*ec0cd287SJohn Baldwin 
769*ec0cd287SJohn Baldwin 	stag_idx = qp->next_ddp_tag;
770*ec0cd287SJohn Baldwin 	for (;;) {
771*ec0cd287SJohn Baldwin 		if (qp->open_ddp_tags[stag_idx] == NULL)
772*ec0cd287SJohn Baldwin 			break;
773*ec0cd287SJohn Baldwin 		if (stag_idx == qp->num_ddp_tags - 1) {
774*ec0cd287SJohn Baldwin 			stag_idx = 0;
775*ec0cd287SJohn Baldwin 			if (qp->ddp_color == CHE_DDP_MAX_COLOR)
776*ec0cd287SJohn Baldwin 				qp->ddp_color = 0;
777*ec0cd287SJohn Baldwin 			else
778*ec0cd287SJohn Baldwin 				qp->ddp_color++;
779*ec0cd287SJohn Baldwin 		} else
780*ec0cd287SJohn Baldwin 			stag_idx++;
781*ec0cd287SJohn Baldwin 		MPASS(stag_idx != qp->next_ddp_tag);
782*ec0cd287SJohn Baldwin 	}
783*ec0cd287SJohn Baldwin 	if (stag_idx == qp->num_ddp_tags - 1)
784*ec0cd287SJohn Baldwin 		qp->next_ddp_tag = 0;
785*ec0cd287SJohn Baldwin 	else
786*ec0cd287SJohn Baldwin 		qp->next_ddp_tag = stag_idx + 1;
787*ec0cd287SJohn Baldwin 
788*ec0cd287SJohn Baldwin 	qp->active_ddp_tags++;
789*ec0cd287SJohn Baldwin 	qp->open_ddp_tags[stag_idx] = cb;
790*ec0cd287SJohn Baldwin 
791*ec0cd287SJohn Baldwin 	return (CHE_DDP_TAG(stag_idx, qp->ddp_color));
792*ec0cd287SJohn Baldwin }
793*ec0cd287SJohn Baldwin 
794*ec0cd287SJohn Baldwin static void
che_free_ddp_stag(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb,uint16_t stag)795*ec0cd287SJohn Baldwin che_free_ddp_stag(struct nvmf_che_qpair *qp, struct nvmf_che_command_buffer *cb,
796*ec0cd287SJohn Baldwin     uint16_t stag)
797*ec0cd287SJohn Baldwin {
798*ec0cd287SJohn Baldwin 	MPASS(!CHE_TAG_IS_FL(stag));
799*ec0cd287SJohn Baldwin 
800*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
801*ec0cd287SJohn Baldwin 
802*ec0cd287SJohn Baldwin 	MPASS(qp->open_ddp_tags[CHE_STAG_IDX(stag)] == cb);
803*ec0cd287SJohn Baldwin 
804*ec0cd287SJohn Baldwin 	qp->open_ddp_tags[CHE_STAG_IDX(stag)] = NULL;
805*ec0cd287SJohn Baldwin 	qp->active_ddp_tags--;
806*ec0cd287SJohn Baldwin }
807*ec0cd287SJohn Baldwin 
808*ec0cd287SJohn Baldwin static uint16_t
che_alloc_ddp_tag(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)809*ec0cd287SJohn Baldwin che_alloc_ddp_tag(struct nvmf_che_qpair *qp,
810*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
811*ec0cd287SJohn Baldwin {
812*ec0cd287SJohn Baldwin 	uint16_t stag;
813*ec0cd287SJohn Baldwin 
814*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
815*ec0cd287SJohn Baldwin 
816*ec0cd287SJohn Baldwin 	if (!cb->ddp_ok)
817*ec0cd287SJohn Baldwin 		return (CHE_DDP_NO_TAG);
818*ec0cd287SJohn Baldwin 
819*ec0cd287SJohn Baldwin 	stag = che_alloc_ddp_stag(qp, cb);
820*ec0cd287SJohn Baldwin 	if (stag == CHE_DDP_NO_TAG) {
821*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_setup_no_stag,
822*ec0cd287SJohn Baldwin 		    1);
823*ec0cd287SJohn Baldwin 		return (CHE_DDP_NO_TAG);
824*ec0cd287SJohn Baldwin 	}
825*ec0cd287SJohn Baldwin 
826*ec0cd287SJohn Baldwin 	if (!che_alloc_pbl(qp, cb)) {
827*ec0cd287SJohn Baldwin 		che_free_ddp_stag(qp, cb, stag);
828*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_setup_error, 1);
829*ec0cd287SJohn Baldwin 		return (CHE_DDP_NO_TAG);
830*ec0cd287SJohn Baldwin 	}
831*ec0cd287SJohn Baldwin 
832*ec0cd287SJohn Baldwin 	if (!che_write_tpt_entry(qp, cb, stag)) {
833*ec0cd287SJohn Baldwin 		che_free_pbl(cb);
834*ec0cd287SJohn Baldwin 		che_free_ddp_stag(qp, cb, stag);
835*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_setup_error, 1);
836*ec0cd287SJohn Baldwin 		return (CHE_DDP_NO_TAG);
837*ec0cd287SJohn Baldwin 	}
838*ec0cd287SJohn Baldwin 
839*ec0cd287SJohn Baldwin 	counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_setup_ok, 1);
840*ec0cd287SJohn Baldwin 	return (stag);
841*ec0cd287SJohn Baldwin }
842*ec0cd287SJohn Baldwin 
843*ec0cd287SJohn Baldwin static void
che_free_ddp_tag(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb,uint16_t stag)844*ec0cd287SJohn Baldwin che_free_ddp_tag(struct nvmf_che_qpair *qp, struct nvmf_che_command_buffer *cb,
845*ec0cd287SJohn Baldwin     uint16_t stag)
846*ec0cd287SJohn Baldwin {
847*ec0cd287SJohn Baldwin 	MPASS(!CHE_TAG_IS_FL(stag));
848*ec0cd287SJohn Baldwin 
849*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
850*ec0cd287SJohn Baldwin 
851*ec0cd287SJohn Baldwin 	MPASS(qp->open_ddp_tags[CHE_STAG_IDX(stag)] == cb);
852*ec0cd287SJohn Baldwin 
853*ec0cd287SJohn Baldwin 	che_clear_tpt_entry(qp, stag);
854*ec0cd287SJohn Baldwin 	che_free_pbl(cb);
855*ec0cd287SJohn Baldwin 	che_free_ddp_stag(qp, cb, stag);
856*ec0cd287SJohn Baldwin }
857*ec0cd287SJohn Baldwin 
858*ec0cd287SJohn Baldwin static void
nvmf_che_write_pdu(struct nvmf_che_qpair * qp,struct mbuf * m)859*ec0cd287SJohn Baldwin nvmf_che_write_pdu(struct nvmf_che_qpair *qp, struct mbuf *m)
860*ec0cd287SJohn Baldwin {
861*ec0cd287SJohn Baldwin 	struct epoch_tracker et;
862*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
863*ec0cd287SJohn Baldwin 	struct inpcb *inp = sotoinpcb(so);
864*ec0cd287SJohn Baldwin 	struct toepcb *toep = qp->toep;
865*ec0cd287SJohn Baldwin 
866*ec0cd287SJohn Baldwin 	CURVNET_SET(so->so_vnet);
867*ec0cd287SJohn Baldwin 	NET_EPOCH_ENTER(et);
868*ec0cd287SJohn Baldwin 	INP_WLOCK(inp);
869*ec0cd287SJohn Baldwin 	if (__predict_false(inp->inp_flags & INP_DROPPED) ||
870*ec0cd287SJohn Baldwin 	    __predict_false((toep->flags & TPF_ATTACHED) == 0)) {
871*ec0cd287SJohn Baldwin 		m_freem(m);
872*ec0cd287SJohn Baldwin 	} else {
873*ec0cd287SJohn Baldwin 		mbufq_enqueue(&toep->ulp_pduq, m);
874*ec0cd287SJohn Baldwin 		t4_push_pdus(toep->vi->adapter, toep, 0);
875*ec0cd287SJohn Baldwin 	}
876*ec0cd287SJohn Baldwin 	INP_WUNLOCK(inp);
877*ec0cd287SJohn Baldwin 	NET_EPOCH_EXIT(et);
878*ec0cd287SJohn Baldwin 	CURVNET_RESTORE();
879*ec0cd287SJohn Baldwin }
880*ec0cd287SJohn Baldwin 
881*ec0cd287SJohn Baldwin static void
nvmf_che_report_error(struct nvmf_che_qpair * qp,uint16_t fes,uint32_t fei,struct mbuf * rx_pdu,u_int hlen)882*ec0cd287SJohn Baldwin nvmf_che_report_error(struct nvmf_che_qpair *qp, uint16_t fes, uint32_t fei,
883*ec0cd287SJohn Baldwin     struct mbuf *rx_pdu, u_int hlen)
884*ec0cd287SJohn Baldwin {
885*ec0cd287SJohn Baldwin 	struct nvme_tcp_term_req_hdr *hdr;
886*ec0cd287SJohn Baldwin 	struct mbuf *m;
887*ec0cd287SJohn Baldwin 
888*ec0cd287SJohn Baldwin 	if (hlen != 0) {
889*ec0cd287SJohn Baldwin 		hlen = min(hlen, NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE);
890*ec0cd287SJohn Baldwin 		hlen = min(hlen, m_length(rx_pdu, NULL));
891*ec0cd287SJohn Baldwin 	}
892*ec0cd287SJohn Baldwin 
893*ec0cd287SJohn Baldwin 	m = m_get2(sizeof(*hdr) + hlen, M_WAITOK, MT_DATA, M_PKTHDR);
894*ec0cd287SJohn Baldwin 	m->m_len = sizeof(*hdr) + hlen;
895*ec0cd287SJohn Baldwin 	m->m_pkthdr.len = m->m_len;
896*ec0cd287SJohn Baldwin 	hdr = mtod(m, void *);
897*ec0cd287SJohn Baldwin 	memset(hdr, 0, sizeof(*hdr));
898*ec0cd287SJohn Baldwin 	hdr->common.pdu_type = qp->qp.nq_controller ?
899*ec0cd287SJohn Baldwin 	    NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ;
900*ec0cd287SJohn Baldwin 	hdr->common.hlen = sizeof(*hdr);
901*ec0cd287SJohn Baldwin 	hdr->common.plen = sizeof(*hdr) + hlen;
902*ec0cd287SJohn Baldwin 	hdr->fes = htole16(fes);
903*ec0cd287SJohn Baldwin 	le32enc(hdr->fei, fei);
904*ec0cd287SJohn Baldwin 	if (hlen != 0)
905*ec0cd287SJohn Baldwin 		m_copydata(rx_pdu, 0, hlen, (caddr_t)(hdr + 1));
906*ec0cd287SJohn Baldwin 
907*ec0cd287SJohn Baldwin 	nvmf_che_write_pdu(qp, m);
908*ec0cd287SJohn Baldwin }
909*ec0cd287SJohn Baldwin 
910*ec0cd287SJohn Baldwin static int
nvmf_che_validate_pdu(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)911*ec0cd287SJohn Baldwin nvmf_che_validate_pdu(struct nvmf_che_qpair *qp, struct nvmf_che_rxpdu *pdu)
912*ec0cd287SJohn Baldwin {
913*ec0cd287SJohn Baldwin 	const struct nvme_tcp_common_pdu_hdr *ch;
914*ec0cd287SJohn Baldwin 	struct mbuf *m = pdu->m;
915*ec0cd287SJohn Baldwin 	uint32_t data_len, fei, plen, rx_digest;
916*ec0cd287SJohn Baldwin 	u_int hlen, cpl_error;
917*ec0cd287SJohn Baldwin 	int error;
918*ec0cd287SJohn Baldwin 	uint16_t fes;
919*ec0cd287SJohn Baldwin 
920*ec0cd287SJohn Baldwin 	/* Determine how large of a PDU header to return for errors. */
921*ec0cd287SJohn Baldwin 	ch = pdu->hdr;
922*ec0cd287SJohn Baldwin 	hlen = ch->hlen;
923*ec0cd287SJohn Baldwin 	plen = le32toh(ch->plen);
924*ec0cd287SJohn Baldwin 	if (hlen < sizeof(*ch) || hlen > plen)
925*ec0cd287SJohn Baldwin 		hlen = sizeof(*ch);
926*ec0cd287SJohn Baldwin 
927*ec0cd287SJohn Baldwin 	cpl_error = m->m_pkthdr.nvmf_cpl_status & CMP_STATUS_ERROR_MASK;
928*ec0cd287SJohn Baldwin 	switch (cpl_error) {
929*ec0cd287SJohn Baldwin 	case CMP_STATUS_NO_ERROR:
930*ec0cd287SJohn Baldwin 		break;
931*ec0cd287SJohn Baldwin 	case CMP_STATUS_HEADER_DIGEST:
932*ec0cd287SJohn Baldwin 		counter_u64_add(
933*ec0cd287SJohn Baldwin 		    qp->toep->ofld_rxq->rx_nvme_header_digest_errors, 1);
934*ec0cd287SJohn Baldwin 		printf("NVMe/TCP: Header digest mismatch\n");
935*ec0cd287SJohn Baldwin 		rx_digest = le32dec(mtodo(m, ch->hlen));
936*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
937*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, m,
938*ec0cd287SJohn Baldwin 		    hlen);
939*ec0cd287SJohn Baldwin 		return (EBADMSG);
940*ec0cd287SJohn Baldwin 	case CMP_STATUS_DIRECTION_MISMATCH:
941*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_invalid_headers, 1);
942*ec0cd287SJohn Baldwin 		printf("NVMe/TCP: Invalid PDU type %u\n", ch->pdu_type);
943*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
944*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
945*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_common_pdu_hdr, pdu_type), m,
946*ec0cd287SJohn Baldwin 		    hlen);
947*ec0cd287SJohn Baldwin 		return (EBADMSG);
948*ec0cd287SJohn Baldwin 	case CMP_STATUS_SUCCESS_NOT_LAST:
949*ec0cd287SJohn Baldwin 	case CMP_STATUS_DIGEST_FLAG_MISMATCH:
950*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_invalid_headers, 1);
951*ec0cd287SJohn Baldwin 		printf("NVMe/TCP: Invalid PDU header flags %#x\n", ch->flags);
952*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
953*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
954*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_common_pdu_hdr, flags), m, hlen);
955*ec0cd287SJohn Baldwin 		return (EBADMSG);
956*ec0cd287SJohn Baldwin 	case CMP_STATUS_BAD_DATA_LENGTH:
957*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_invalid_headers, 1);
958*ec0cd287SJohn Baldwin 		printf("NVMe/TCP: Invalid PDU length %u\n", plen);
959*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
960*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
961*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_common_pdu_hdr, plen), m, hlen);
962*ec0cd287SJohn Baldwin 		return (EBADMSG);
963*ec0cd287SJohn Baldwin 	case CMP_STATUS_USER_MODE_UNALLOCATED:
964*ec0cd287SJohn Baldwin 	case CMP_STATUS_RQT_LIMIT:
965*ec0cd287SJohn Baldwin 	case CMP_STATUS_RQT_WRAP:
966*ec0cd287SJohn Baldwin 	case CMP_STATUS_RQT_BOUND:
967*ec0cd287SJohn Baldwin 		device_printf(qp->nca->sc->dev,
968*ec0cd287SJohn Baldwin 		    "received invalid NVMET error %u\n",
969*ec0cd287SJohn Baldwin 		    cpl_error);
970*ec0cd287SJohn Baldwin 		return (ECONNRESET);
971*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_LIMIT:
972*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_INVALID:
973*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_COLOUR_MISMATCH:
974*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_MISC:
975*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_WRAP:
976*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_BOUND:
977*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_invalid_headers, 1);
978*ec0cd287SJohn Baldwin 		switch (ch->pdu_type) {
979*ec0cd287SJohn Baldwin 		case NVME_TCP_PDU_TYPE_H2C_DATA:
980*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
981*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
982*ec0cd287SJohn Baldwin 			    offsetof(struct nvme_tcp_h2c_data_hdr, ttag),
983*ec0cd287SJohn Baldwin 			    pdu->m, pdu->hdr->hlen);
984*ec0cd287SJohn Baldwin 			return (EBADMSG);
985*ec0cd287SJohn Baldwin 		case NVME_TCP_PDU_TYPE_C2H_DATA:
986*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
987*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
988*ec0cd287SJohn Baldwin 			    offsetof(struct nvme_tcp_c2h_data_hdr, cccid), m,
989*ec0cd287SJohn Baldwin 			    hlen);
990*ec0cd287SJohn Baldwin 			return (EBADMSG);
991*ec0cd287SJohn Baldwin 		default:
992*ec0cd287SJohn Baldwin 			device_printf(qp->nca->sc->dev,
993*ec0cd287SJohn Baldwin 			    "received DDP NVMET error %u for PDU %u\n",
994*ec0cd287SJohn Baldwin 			    cpl_error, ch->pdu_type);
995*ec0cd287SJohn Baldwin 			return (ECONNRESET);
996*ec0cd287SJohn Baldwin 		}
997*ec0cd287SJohn Baldwin 	case CMP_STATUS_TPT_LAST_PDU_UNALIGNED:
998*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_invalid_headers, 1);
999*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1000*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, m, hlen);
1001*ec0cd287SJohn Baldwin 		return (EBADMSG);
1002*ec0cd287SJohn Baldwin 	case CMP_STATUS_PBL_LIMIT:
1003*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_invalid_headers, 1);
1004*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1005*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, m,
1006*ec0cd287SJohn Baldwin 		    hlen);
1007*ec0cd287SJohn Baldwin 		return (EBADMSG);
1008*ec0cd287SJohn Baldwin 	case CMP_STATUS_DATA_DIGEST:
1009*ec0cd287SJohn Baldwin 		/* Handled below. */
1010*ec0cd287SJohn Baldwin 		break;
1011*ec0cd287SJohn Baldwin 	default:
1012*ec0cd287SJohn Baldwin 		device_printf(qp->nca->sc->dev,
1013*ec0cd287SJohn Baldwin 		    "received unknown NVMET error %u\n",
1014*ec0cd287SJohn Baldwin 		    cpl_error);
1015*ec0cd287SJohn Baldwin 		return (ECONNRESET);
1016*ec0cd287SJohn Baldwin 	}
1017*ec0cd287SJohn Baldwin 
1018*ec0cd287SJohn Baldwin 	error = nvmf_tcp_validate_pdu_header(ch, qp->qp.nq_controller,
1019*ec0cd287SJohn Baldwin 	    qp->header_digests, qp->data_digests, qp->rxpda, &data_len, &fes,
1020*ec0cd287SJohn Baldwin 	    &fei);
1021*ec0cd287SJohn Baldwin 	if (error != 0) {
1022*ec0cd287SJohn Baldwin 		if (error != ECONNRESET)
1023*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp, fes, fei, m, hlen);
1024*ec0cd287SJohn Baldwin 		return (error);
1025*ec0cd287SJohn Baldwin 	}
1026*ec0cd287SJohn Baldwin 
1027*ec0cd287SJohn Baldwin 	/* Check data digest if present. */
1028*ec0cd287SJohn Baldwin 	pdu->data_digest_mismatch = false;
1029*ec0cd287SJohn Baldwin 	if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) {
1030*ec0cd287SJohn Baldwin 		if (cpl_error == CMP_STATUS_DATA_DIGEST) {
1031*ec0cd287SJohn Baldwin 			printf("NVMe/TCP: Data digest mismatch\n");
1032*ec0cd287SJohn Baldwin 			pdu->data_digest_mismatch = true;
1033*ec0cd287SJohn Baldwin 			counter_u64_add(
1034*ec0cd287SJohn Baldwin 			    qp->toep->ofld_rxq->rx_nvme_data_digest_errors, 1);
1035*ec0cd287SJohn Baldwin 		}
1036*ec0cd287SJohn Baldwin 	}
1037*ec0cd287SJohn Baldwin 
1038*ec0cd287SJohn Baldwin 	pdu->data_len = data_len;
1039*ec0cd287SJohn Baldwin 
1040*ec0cd287SJohn Baldwin 	return (0);
1041*ec0cd287SJohn Baldwin }
1042*ec0cd287SJohn Baldwin 
1043*ec0cd287SJohn Baldwin static void
nvmf_che_free_pdu(struct nvmf_che_rxpdu * pdu)1044*ec0cd287SJohn Baldwin nvmf_che_free_pdu(struct nvmf_che_rxpdu *pdu)
1045*ec0cd287SJohn Baldwin {
1046*ec0cd287SJohn Baldwin 	m_freem(pdu->m);
1047*ec0cd287SJohn Baldwin 	pdu->m = NULL;
1048*ec0cd287SJohn Baldwin 	pdu->hdr = NULL;
1049*ec0cd287SJohn Baldwin }
1050*ec0cd287SJohn Baldwin 
1051*ec0cd287SJohn Baldwin static int
nvmf_che_handle_term_req(struct nvmf_che_rxpdu * pdu)1052*ec0cd287SJohn Baldwin nvmf_che_handle_term_req(struct nvmf_che_rxpdu *pdu)
1053*ec0cd287SJohn Baldwin {
1054*ec0cd287SJohn Baldwin 	const struct nvme_tcp_term_req_hdr *hdr;
1055*ec0cd287SJohn Baldwin 
1056*ec0cd287SJohn Baldwin 	hdr = (const void *)pdu->hdr;
1057*ec0cd287SJohn Baldwin 
1058*ec0cd287SJohn Baldwin 	printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n",
1059*ec0cd287SJohn Baldwin 	    le16toh(hdr->fes), le32dec(hdr->fei));
1060*ec0cd287SJohn Baldwin 	nvmf_che_free_pdu(pdu);
1061*ec0cd287SJohn Baldwin 	return (ECONNRESET);
1062*ec0cd287SJohn Baldwin }
1063*ec0cd287SJohn Baldwin 
1064*ec0cd287SJohn Baldwin static int
nvmf_che_save_command_capsule(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1065*ec0cd287SJohn Baldwin nvmf_che_save_command_capsule(struct nvmf_che_qpair *qp,
1066*ec0cd287SJohn Baldwin     struct nvmf_che_rxpdu *pdu)
1067*ec0cd287SJohn Baldwin {
1068*ec0cd287SJohn Baldwin 	const struct nvme_tcp_cmd *cmd;
1069*ec0cd287SJohn Baldwin 	struct nvmf_capsule *nc;
1070*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc;
1071*ec0cd287SJohn Baldwin 
1072*ec0cd287SJohn Baldwin 	cmd = (const void *)pdu->hdr;
1073*ec0cd287SJohn Baldwin 
1074*ec0cd287SJohn Baldwin 	nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe, M_WAITOK);
1075*ec0cd287SJohn Baldwin 
1076*ec0cd287SJohn Baldwin 	cc = CCAP(nc);
1077*ec0cd287SJohn Baldwin 	cc->rx_pdu = *pdu;
1078*ec0cd287SJohn Baldwin 
1079*ec0cd287SJohn Baldwin 	nvmf_capsule_received(&qp->qp, nc);
1080*ec0cd287SJohn Baldwin 	return (0);
1081*ec0cd287SJohn Baldwin }
1082*ec0cd287SJohn Baldwin 
1083*ec0cd287SJohn Baldwin static int
nvmf_che_save_response_capsule(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1084*ec0cd287SJohn Baldwin nvmf_che_save_response_capsule(struct nvmf_che_qpair *qp,
1085*ec0cd287SJohn Baldwin     struct nvmf_che_rxpdu *pdu)
1086*ec0cd287SJohn Baldwin {
1087*ec0cd287SJohn Baldwin 	const struct nvme_tcp_rsp *rsp;
1088*ec0cd287SJohn Baldwin 	struct nvme_completion cpl;
1089*ec0cd287SJohn Baldwin 	struct nvmf_capsule *nc;
1090*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc;
1091*ec0cd287SJohn Baldwin 	uint16_t cid;
1092*ec0cd287SJohn Baldwin 
1093*ec0cd287SJohn Baldwin 	rsp = (const void *)pdu->hdr;
1094*ec0cd287SJohn Baldwin 
1095*ec0cd287SJohn Baldwin 	/*
1096*ec0cd287SJohn Baldwin 	 * Restore the original CID and ensure any command buffers
1097*ec0cd287SJohn Baldwin 	 * associated with this CID have been released.  Once the CQE
1098*ec0cd287SJohn Baldwin 	 * has been received, no further transfers to the command
1099*ec0cd287SJohn Baldwin 	 * buffer for the associated CID can occur.
1100*ec0cd287SJohn Baldwin 	 */
1101*ec0cd287SJohn Baldwin 	cpl = rsp->rccqe;
1102*ec0cd287SJohn Baldwin 	cid = le16toh(cpl.cid);
1103*ec0cd287SJohn Baldwin 	if (CHE_TAG_IS_FL(cid)) {
1104*ec0cd287SJohn Baldwin 		cid = CHE_RAW_FL_TAG(cid);
1105*ec0cd287SJohn Baldwin 		mtx_lock(&qp->fl_cid_lock);
1106*ec0cd287SJohn Baldwin 		MPASS(FL_CID_ISACTIVE(cid, qp->fl_cid_set));
1107*ec0cd287SJohn Baldwin 		cpl.cid = qp->fl_cids[cid];
1108*ec0cd287SJohn Baldwin 		FL_CID_FREE(cid, qp->fl_cid_set);
1109*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->fl_cid_lock);
1110*ec0cd287SJohn Baldwin 
1111*ec0cd287SJohn Baldwin 		che_purge_command_buffer(&qp->rx_buffers, rsp->rccqe.cid);
1112*ec0cd287SJohn Baldwin 		che_purge_command_buffer(&qp->tx_buffers, rsp->rccqe.cid);
1113*ec0cd287SJohn Baldwin 	} else {
1114*ec0cd287SJohn Baldwin 		struct nvmf_che_command_buffer *cb;
1115*ec0cd287SJohn Baldwin 
1116*ec0cd287SJohn Baldwin 		mtx_lock(&qp->rx_buffers.lock);
1117*ec0cd287SJohn Baldwin 		cb = qp->open_ddp_tags[CHE_STAG_IDX(cid)];
1118*ec0cd287SJohn Baldwin 		MPASS(cb != NULL);
1119*ec0cd287SJohn Baldwin 		MPASS(cb->cid == rsp->rccqe.cid);
1120*ec0cd287SJohn Baldwin 		cpl.cid = cb->original_cid;
1121*ec0cd287SJohn Baldwin 		che_free_ddp_tag(qp, cb, cid);
1122*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1123*ec0cd287SJohn Baldwin 		che_release_command_buffer(cb);
1124*ec0cd287SJohn Baldwin 	}
1125*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
1126*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u freed cid 0x%04x for 0x%04x", __func__,
1127*ec0cd287SJohn Baldwin 	    qp->toep->tid, le16toh(rsp->rccqe.cid), cpl.cid);
1128*ec0cd287SJohn Baldwin #endif
1129*ec0cd287SJohn Baldwin 
1130*ec0cd287SJohn Baldwin 	nc = nvmf_allocate_response(&qp->qp, &cpl, M_WAITOK);
1131*ec0cd287SJohn Baldwin 
1132*ec0cd287SJohn Baldwin 	nc->nc_sqhd_valid = true;
1133*ec0cd287SJohn Baldwin 	cc = CCAP(nc);
1134*ec0cd287SJohn Baldwin 	cc->rx_pdu = *pdu;
1135*ec0cd287SJohn Baldwin 
1136*ec0cd287SJohn Baldwin 	nvmf_capsule_received(&qp->qp, nc);
1137*ec0cd287SJohn Baldwin 	return (0);
1138*ec0cd287SJohn Baldwin }
1139*ec0cd287SJohn Baldwin 
1140*ec0cd287SJohn Baldwin /*
1141*ec0cd287SJohn Baldwin  * Construct a PDU that contains an optional data payload.  This
1142*ec0cd287SJohn Baldwin  * includes dealing with the length fields in the common header.  The
1143*ec0cd287SJohn Baldwin  * adapter inserts digests and padding when the PDU is transmitted.
1144*ec0cd287SJohn Baldwin  */
1145*ec0cd287SJohn Baldwin static struct mbuf *
nvmf_che_construct_pdu(struct nvmf_che_qpair * qp,void * hdr,size_t hlen,struct mbuf * data,uint32_t data_len)1146*ec0cd287SJohn Baldwin nvmf_che_construct_pdu(struct nvmf_che_qpair *qp, void *hdr, size_t hlen,
1147*ec0cd287SJohn Baldwin     struct mbuf *data, uint32_t data_len)
1148*ec0cd287SJohn Baldwin {
1149*ec0cd287SJohn Baldwin 	struct nvme_tcp_common_pdu_hdr *ch;
1150*ec0cd287SJohn Baldwin 	struct mbuf *top;
1151*ec0cd287SJohn Baldwin 	uint32_t pdo, plen;
1152*ec0cd287SJohn Baldwin 	uint8_t ulp_submode;
1153*ec0cd287SJohn Baldwin 
1154*ec0cd287SJohn Baldwin 	plen = hlen;
1155*ec0cd287SJohn Baldwin 	if (qp->header_digests)
1156*ec0cd287SJohn Baldwin 		plen += sizeof(uint32_t);
1157*ec0cd287SJohn Baldwin 	if (data_len != 0) {
1158*ec0cd287SJohn Baldwin 		KASSERT(m_length(data, NULL) == data_len, ("length mismatch"));
1159*ec0cd287SJohn Baldwin 		pdo = roundup(plen, qp->txpda);
1160*ec0cd287SJohn Baldwin 		plen = pdo + data_len;
1161*ec0cd287SJohn Baldwin 		if (qp->data_digests)
1162*ec0cd287SJohn Baldwin 			plen += sizeof(uint32_t);
1163*ec0cd287SJohn Baldwin 	} else {
1164*ec0cd287SJohn Baldwin 		KASSERT(data == NULL, ("payload mbuf with zero length"));
1165*ec0cd287SJohn Baldwin 		pdo = 0;
1166*ec0cd287SJohn Baldwin 	}
1167*ec0cd287SJohn Baldwin 
1168*ec0cd287SJohn Baldwin 	top = m_get2(hlen, M_WAITOK, MT_DATA, M_PKTHDR);
1169*ec0cd287SJohn Baldwin 	top->m_len = hlen;
1170*ec0cd287SJohn Baldwin 	top->m_pkthdr.len = hlen;
1171*ec0cd287SJohn Baldwin 	ch = mtod(top, void *);
1172*ec0cd287SJohn Baldwin 	memcpy(ch, hdr, hlen);
1173*ec0cd287SJohn Baldwin 	ch->hlen = hlen;
1174*ec0cd287SJohn Baldwin 	ulp_submode = 0;
1175*ec0cd287SJohn Baldwin 	if (qp->header_digests) {
1176*ec0cd287SJohn Baldwin 		ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF;
1177*ec0cd287SJohn Baldwin 		ulp_submode |= ULP_CRC_HEADER;
1178*ec0cd287SJohn Baldwin 	}
1179*ec0cd287SJohn Baldwin 	if (qp->data_digests && data_len != 0) {
1180*ec0cd287SJohn Baldwin 		ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF;
1181*ec0cd287SJohn Baldwin 		ulp_submode |= ULP_CRC_DATA;
1182*ec0cd287SJohn Baldwin 	}
1183*ec0cd287SJohn Baldwin 	ch->pdo = pdo;
1184*ec0cd287SJohn Baldwin 	ch->plen = htole32(plen);
1185*ec0cd287SJohn Baldwin 	set_mbuf_ulp_submode(top, ulp_submode);
1186*ec0cd287SJohn Baldwin 
1187*ec0cd287SJohn Baldwin 	if (data_len != 0) {
1188*ec0cd287SJohn Baldwin 		top->m_pkthdr.len += data_len;
1189*ec0cd287SJohn Baldwin 		top->m_next = data;
1190*ec0cd287SJohn Baldwin 	}
1191*ec0cd287SJohn Baldwin 
1192*ec0cd287SJohn Baldwin 	return (top);
1193*ec0cd287SJohn Baldwin }
1194*ec0cd287SJohn Baldwin 
1195*ec0cd287SJohn Baldwin /* Allocate the next free freelist transfer tag. */
1196*ec0cd287SJohn Baldwin static bool
nvmf_che_allocate_fl_ttag(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)1197*ec0cd287SJohn Baldwin nvmf_che_allocate_fl_ttag(struct nvmf_che_qpair *qp,
1198*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
1199*ec0cd287SJohn Baldwin {
1200*ec0cd287SJohn Baldwin 	uint16_t ttag;
1201*ec0cd287SJohn Baldwin 
1202*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
1203*ec0cd287SJohn Baldwin 
1204*ec0cd287SJohn Baldwin 	if (qp->active_fl_ttags == qp->num_fl_ttags)
1205*ec0cd287SJohn Baldwin 		return (false);
1206*ec0cd287SJohn Baldwin 
1207*ec0cd287SJohn Baldwin 	ttag = qp->next_fl_ttag;
1208*ec0cd287SJohn Baldwin 	for (;;) {
1209*ec0cd287SJohn Baldwin 		if (qp->open_fl_ttags[ttag] == NULL)
1210*ec0cd287SJohn Baldwin 			break;
1211*ec0cd287SJohn Baldwin 		if (ttag == qp->num_fl_ttags - 1)
1212*ec0cd287SJohn Baldwin 			ttag = 0;
1213*ec0cd287SJohn Baldwin 		else
1214*ec0cd287SJohn Baldwin 			ttag++;
1215*ec0cd287SJohn Baldwin 		MPASS(ttag != qp->next_fl_ttag);
1216*ec0cd287SJohn Baldwin 	}
1217*ec0cd287SJohn Baldwin 	if (ttag == qp->num_fl_ttags - 1)
1218*ec0cd287SJohn Baldwin 		qp->next_fl_ttag = 0;
1219*ec0cd287SJohn Baldwin 	else
1220*ec0cd287SJohn Baldwin 		qp->next_fl_ttag = ttag + 1;
1221*ec0cd287SJohn Baldwin 
1222*ec0cd287SJohn Baldwin 	qp->active_fl_ttags++;
1223*ec0cd287SJohn Baldwin 	qp->open_fl_ttags[ttag] = cb;
1224*ec0cd287SJohn Baldwin 
1225*ec0cd287SJohn Baldwin 	cb->ttag = ttag | CHE_FL_TAG_MASK;
1226*ec0cd287SJohn Baldwin 	return (true);
1227*ec0cd287SJohn Baldwin }
1228*ec0cd287SJohn Baldwin 
1229*ec0cd287SJohn Baldwin /* Attempt to allocate a free transfer tag and assign it to cb. */
1230*ec0cd287SJohn Baldwin static bool
nvmf_che_allocate_ttag(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)1231*ec0cd287SJohn Baldwin nvmf_che_allocate_ttag(struct nvmf_che_qpair *qp,
1232*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
1233*ec0cd287SJohn Baldwin {
1234*ec0cd287SJohn Baldwin 	uint16_t stag;
1235*ec0cd287SJohn Baldwin 
1236*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
1237*ec0cd287SJohn Baldwin 
1238*ec0cd287SJohn Baldwin 	stag = che_alloc_ddp_tag(qp, cb);
1239*ec0cd287SJohn Baldwin 	if (stag == CHE_DDP_NO_TAG) {
1240*ec0cd287SJohn Baldwin 		if (!nvmf_che_allocate_fl_ttag(qp, cb))
1241*ec0cd287SJohn Baldwin 			return (false);
1242*ec0cd287SJohn Baldwin 	} else {
1243*ec0cd287SJohn Baldwin 		cb->ttag = stag;
1244*ec0cd287SJohn Baldwin 	}
1245*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
1246*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u allocated ttag 0x%04x", __func__,
1247*ec0cd287SJohn Baldwin 	    qp->toep->tid, cb->ttag);
1248*ec0cd287SJohn Baldwin #endif
1249*ec0cd287SJohn Baldwin 	cb->cc->active_r2ts++;
1250*ec0cd287SJohn Baldwin 	return (true);
1251*ec0cd287SJohn Baldwin }
1252*ec0cd287SJohn Baldwin 
1253*ec0cd287SJohn Baldwin /* Find the next command buffer eligible to schedule for R2T. */
1254*ec0cd287SJohn Baldwin static struct nvmf_che_command_buffer *
nvmf_che_next_r2t(struct nvmf_che_qpair * qp)1255*ec0cd287SJohn Baldwin nvmf_che_next_r2t(struct nvmf_che_qpair *qp)
1256*ec0cd287SJohn Baldwin {
1257*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
1258*ec0cd287SJohn Baldwin 
1259*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
1260*ec0cd287SJohn Baldwin 
1261*ec0cd287SJohn Baldwin 	TAILQ_FOREACH(cb, &qp->rx_buffers.head, link) {
1262*ec0cd287SJohn Baldwin 		/* NB: maxr2t is 0's based. */
1263*ec0cd287SJohn Baldwin 		if (cb->cc->active_r2ts > qp->maxr2t)
1264*ec0cd287SJohn Baldwin 			continue;
1265*ec0cd287SJohn Baldwin 
1266*ec0cd287SJohn Baldwin 		if (!nvmf_che_allocate_ttag(qp, cb))
1267*ec0cd287SJohn Baldwin 			return (NULL);
1268*ec0cd287SJohn Baldwin #ifdef INVARIANTS
1269*ec0cd287SJohn Baldwin 		cb->cc->pending_r2ts--;
1270*ec0cd287SJohn Baldwin #endif
1271*ec0cd287SJohn Baldwin 		TAILQ_REMOVE(&qp->rx_buffers.head, cb, link);
1272*ec0cd287SJohn Baldwin 		return (cb);
1273*ec0cd287SJohn Baldwin 	}
1274*ec0cd287SJohn Baldwin 	return (NULL);
1275*ec0cd287SJohn Baldwin }
1276*ec0cd287SJohn Baldwin 
1277*ec0cd287SJohn Baldwin /* NB: cid and is little-endian already. */
1278*ec0cd287SJohn Baldwin static void
che_send_r2t(struct nvmf_che_qpair * qp,uint16_t cid,uint16_t ttag,uint32_t data_offset,uint32_t data_len)1279*ec0cd287SJohn Baldwin che_send_r2t(struct nvmf_che_qpair *qp, uint16_t cid, uint16_t ttag,
1280*ec0cd287SJohn Baldwin     uint32_t data_offset, uint32_t data_len)
1281*ec0cd287SJohn Baldwin {
1282*ec0cd287SJohn Baldwin 	struct nvme_tcp_r2t_hdr r2t;
1283*ec0cd287SJohn Baldwin 	struct mbuf *m;
1284*ec0cd287SJohn Baldwin 
1285*ec0cd287SJohn Baldwin 	memset(&r2t, 0, sizeof(r2t));
1286*ec0cd287SJohn Baldwin 	r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T;
1287*ec0cd287SJohn Baldwin 	r2t.cccid = cid;
1288*ec0cd287SJohn Baldwin 	r2t.ttag = htole16(ttag);
1289*ec0cd287SJohn Baldwin 	r2t.r2to = htole32(data_offset);
1290*ec0cd287SJohn Baldwin 	r2t.r2tl = htole32(data_len);
1291*ec0cd287SJohn Baldwin 
1292*ec0cd287SJohn Baldwin 	m = nvmf_che_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0);
1293*ec0cd287SJohn Baldwin 	nvmf_che_write_pdu(qp, m);
1294*ec0cd287SJohn Baldwin }
1295*ec0cd287SJohn Baldwin 
1296*ec0cd287SJohn Baldwin /*
1297*ec0cd287SJohn Baldwin  * Release a transfer tag and schedule another R2T.
1298*ec0cd287SJohn Baldwin  *
1299*ec0cd287SJohn Baldwin  * NB: This drops the rx_buffers.lock mutex.
1300*ec0cd287SJohn Baldwin  */
1301*ec0cd287SJohn Baldwin static void
nvmf_che_send_next_r2t(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)1302*ec0cd287SJohn Baldwin nvmf_che_send_next_r2t(struct nvmf_che_qpair *qp,
1303*ec0cd287SJohn Baldwin     struct nvmf_che_command_buffer *cb)
1304*ec0cd287SJohn Baldwin {
1305*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *ncb;
1306*ec0cd287SJohn Baldwin 
1307*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
1308*ec0cd287SJohn Baldwin 
1309*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
1310*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u freed ttag 0x%04x", __func__, qp->toep->tid,
1311*ec0cd287SJohn Baldwin 	    cb->ttag);
1312*ec0cd287SJohn Baldwin #endif
1313*ec0cd287SJohn Baldwin 	if (CHE_TAG_IS_FL(cb->ttag)) {
1314*ec0cd287SJohn Baldwin 		uint16_t ttag;
1315*ec0cd287SJohn Baldwin 
1316*ec0cd287SJohn Baldwin 		ttag = CHE_RAW_FL_TAG(cb->ttag);
1317*ec0cd287SJohn Baldwin 		MPASS(qp->open_fl_ttags[ttag] == cb);
1318*ec0cd287SJohn Baldwin 
1319*ec0cd287SJohn Baldwin 		/* Release this transfer tag. */
1320*ec0cd287SJohn Baldwin 		qp->open_fl_ttags[ttag] = NULL;
1321*ec0cd287SJohn Baldwin 		qp->active_fl_ttags--;
1322*ec0cd287SJohn Baldwin 	} else
1323*ec0cd287SJohn Baldwin 		che_free_ddp_tag(qp, cb, cb->ttag);
1324*ec0cd287SJohn Baldwin 
1325*ec0cd287SJohn Baldwin 	cb->cc->active_r2ts--;
1326*ec0cd287SJohn Baldwin 
1327*ec0cd287SJohn Baldwin 	/* Schedule another R2T. */
1328*ec0cd287SJohn Baldwin 	ncb = nvmf_che_next_r2t(qp);
1329*ec0cd287SJohn Baldwin 	mtx_unlock(&qp->rx_buffers.lock);
1330*ec0cd287SJohn Baldwin 	if (ncb != NULL)
1331*ec0cd287SJohn Baldwin 		che_send_r2t(qp, ncb->cid, ncb->ttag, ncb->data_offset,
1332*ec0cd287SJohn Baldwin 		    ncb->data_len);
1333*ec0cd287SJohn Baldwin }
1334*ec0cd287SJohn Baldwin 
1335*ec0cd287SJohn Baldwin /*
1336*ec0cd287SJohn Baldwin  * Copy len bytes starting at offset skip from an mbuf chain into an
1337*ec0cd287SJohn Baldwin  * I/O buffer at destination offset io_offset.
1338*ec0cd287SJohn Baldwin  */
1339*ec0cd287SJohn Baldwin static void
mbuf_copyto_io(struct mbuf * m,u_int skip,u_int len,struct nvmf_io_request * io,u_int io_offset)1340*ec0cd287SJohn Baldwin mbuf_copyto_io(struct mbuf *m, u_int skip, u_int len,
1341*ec0cd287SJohn Baldwin     struct nvmf_io_request *io, u_int io_offset)
1342*ec0cd287SJohn Baldwin {
1343*ec0cd287SJohn Baldwin 	u_int todo;
1344*ec0cd287SJohn Baldwin 
1345*ec0cd287SJohn Baldwin 	while (m->m_len <= skip) {
1346*ec0cd287SJohn Baldwin 		skip -= m->m_len;
1347*ec0cd287SJohn Baldwin 		m = m->m_next;
1348*ec0cd287SJohn Baldwin 	}
1349*ec0cd287SJohn Baldwin 	while (len != 0) {
1350*ec0cd287SJohn Baldwin 		MPASS((m->m_flags & M_EXTPG) == 0);
1351*ec0cd287SJohn Baldwin 
1352*ec0cd287SJohn Baldwin 		todo = min(m->m_len - skip, len);
1353*ec0cd287SJohn Baldwin 		memdesc_copyback(&io->io_mem, io_offset, todo, mtodo(m, skip));
1354*ec0cd287SJohn Baldwin 		skip = 0;
1355*ec0cd287SJohn Baldwin 		io_offset += todo;
1356*ec0cd287SJohn Baldwin 		len -= todo;
1357*ec0cd287SJohn Baldwin 		m = m->m_next;
1358*ec0cd287SJohn Baldwin 	}
1359*ec0cd287SJohn Baldwin }
1360*ec0cd287SJohn Baldwin 
1361*ec0cd287SJohn Baldwin static int
nvmf_che_handle_h2c_data(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1362*ec0cd287SJohn Baldwin nvmf_che_handle_h2c_data(struct nvmf_che_qpair *qp, struct nvmf_che_rxpdu *pdu)
1363*ec0cd287SJohn Baldwin {
1364*ec0cd287SJohn Baldwin 	const struct nvme_tcp_h2c_data_hdr *h2c;
1365*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
1366*ec0cd287SJohn Baldwin 	uint32_t data_len, data_offset;
1367*ec0cd287SJohn Baldwin 	uint16_t ttag, fl_ttag;
1368*ec0cd287SJohn Baldwin 
1369*ec0cd287SJohn Baldwin 	h2c = (const void *)pdu->hdr;
1370*ec0cd287SJohn Baldwin 	if (le32toh(h2c->datal) > qp->maxh2cdata) {
1371*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1372*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0,
1373*ec0cd287SJohn Baldwin 		    pdu->m, pdu->hdr->hlen);
1374*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1375*ec0cd287SJohn Baldwin 		return (EBADMSG);
1376*ec0cd287SJohn Baldwin 	}
1377*ec0cd287SJohn Baldwin 
1378*ec0cd287SJohn Baldwin 	ttag = le16toh(h2c->ttag);
1379*ec0cd287SJohn Baldwin 	if (CHE_TAG_IS_FL(ttag)) {
1380*ec0cd287SJohn Baldwin 		fl_ttag = CHE_RAW_FL_TAG(ttag);
1381*ec0cd287SJohn Baldwin 		if (fl_ttag >= qp->num_fl_ttags) {
1382*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
1383*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1384*ec0cd287SJohn Baldwin 			    offsetof(struct nvme_tcp_h2c_data_hdr, ttag),
1385*ec0cd287SJohn Baldwin 			    pdu->m, pdu->hdr->hlen);
1386*ec0cd287SJohn Baldwin 			nvmf_che_free_pdu(pdu);
1387*ec0cd287SJohn Baldwin 			return (EBADMSG);
1388*ec0cd287SJohn Baldwin 		}
1389*ec0cd287SJohn Baldwin 
1390*ec0cd287SJohn Baldwin 		mtx_lock(&qp->rx_buffers.lock);
1391*ec0cd287SJohn Baldwin 		cb = qp->open_fl_ttags[fl_ttag];
1392*ec0cd287SJohn Baldwin 	} else {
1393*ec0cd287SJohn Baldwin 		if (CHE_STAG_IDX(ttag) >= qp->num_ddp_tags) {
1394*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
1395*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1396*ec0cd287SJohn Baldwin 			    offsetof(struct nvme_tcp_h2c_data_hdr, ttag),
1397*ec0cd287SJohn Baldwin 			    pdu->m, pdu->hdr->hlen);
1398*ec0cd287SJohn Baldwin 			nvmf_che_free_pdu(pdu);
1399*ec0cd287SJohn Baldwin 			return (EBADMSG);
1400*ec0cd287SJohn Baldwin 		}
1401*ec0cd287SJohn Baldwin 
1402*ec0cd287SJohn Baldwin 		mtx_lock(&qp->rx_buffers.lock);
1403*ec0cd287SJohn Baldwin 		cb = qp->open_ddp_tags[CHE_STAG_IDX(ttag)];
1404*ec0cd287SJohn Baldwin 	}
1405*ec0cd287SJohn Baldwin 
1406*ec0cd287SJohn Baldwin 	if (cb == NULL) {
1407*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1408*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1409*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1410*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->m,
1411*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1412*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1413*ec0cd287SJohn Baldwin 		return (EBADMSG);
1414*ec0cd287SJohn Baldwin 	}
1415*ec0cd287SJohn Baldwin 	MPASS(cb->ttag == ttag);
1416*ec0cd287SJohn Baldwin 
1417*ec0cd287SJohn Baldwin 	/* For a data digest mismatch, fail the I/O request. */
1418*ec0cd287SJohn Baldwin 	if (pdu->data_digest_mismatch) {
1419*ec0cd287SJohn Baldwin 		nvmf_che_send_next_r2t(qp, cb);
1420*ec0cd287SJohn Baldwin 		cb->error = EINTEGRITY;
1421*ec0cd287SJohn Baldwin 		che_release_command_buffer(cb);
1422*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1423*ec0cd287SJohn Baldwin 		return (0);
1424*ec0cd287SJohn Baldwin 	}
1425*ec0cd287SJohn Baldwin 
1426*ec0cd287SJohn Baldwin 	data_len = le32toh(h2c->datal);
1427*ec0cd287SJohn Baldwin 	if (data_len != pdu->data_len) {
1428*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1429*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1430*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1431*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->m,
1432*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1433*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1434*ec0cd287SJohn Baldwin 		return (EBADMSG);
1435*ec0cd287SJohn Baldwin 	}
1436*ec0cd287SJohn Baldwin 
1437*ec0cd287SJohn Baldwin 	data_offset = le32toh(h2c->datao);
1438*ec0cd287SJohn Baldwin 	if (data_offset < cb->data_offset ||
1439*ec0cd287SJohn Baldwin 	    data_offset + data_len > cb->data_offset + cb->data_len) {
1440*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1441*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1442*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, pdu->m,
1443*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1444*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1445*ec0cd287SJohn Baldwin 		return (EBADMSG);
1446*ec0cd287SJohn Baldwin 	}
1447*ec0cd287SJohn Baldwin 
1448*ec0cd287SJohn Baldwin 	if (data_offset != cb->data_offset + cb->data_xfered) {
1449*ec0cd287SJohn Baldwin 		if (CHE_TAG_IS_FL(ttag)) {
1450*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
1451*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
1452*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m,
1453*ec0cd287SJohn Baldwin 			    pdu->hdr->hlen);
1454*ec0cd287SJohn Baldwin 			nvmf_che_free_pdu(pdu);
1455*ec0cd287SJohn Baldwin 			return (EBADMSG);
1456*ec0cd287SJohn Baldwin 		} else {
1457*ec0cd287SJohn Baldwin 			uint32_t ddp_bytes;
1458*ec0cd287SJohn Baldwin 
1459*ec0cd287SJohn Baldwin 			/* Account for PDUs silently received via DDP. */
1460*ec0cd287SJohn Baldwin 			ddp_bytes = data_offset -
1461*ec0cd287SJohn Baldwin 			    (cb->data_offset + cb->data_xfered);
1462*ec0cd287SJohn Baldwin 			cb->data_xfered += ddp_bytes;
1463*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
1464*ec0cd287SJohn Baldwin 			CTR(KTR_CXGBE, "%s: tid %u previous ddp_bytes %u",
1465*ec0cd287SJohn Baldwin 			    __func__, qp->toep->tid, ddp_bytes);
1466*ec0cd287SJohn Baldwin #endif
1467*ec0cd287SJohn Baldwin 			counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_octets,
1468*ec0cd287SJohn Baldwin 			    ddp_bytes);
1469*ec0cd287SJohn Baldwin 		}
1470*ec0cd287SJohn Baldwin 	}
1471*ec0cd287SJohn Baldwin 
1472*ec0cd287SJohn Baldwin 	if ((cb->data_xfered + data_len == cb->data_len) !=
1473*ec0cd287SJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) {
1474*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1475*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1476*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m,
1477*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1478*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1479*ec0cd287SJohn Baldwin 		return (EBADMSG);
1480*ec0cd287SJohn Baldwin 	}
1481*ec0cd287SJohn Baldwin 
1482*ec0cd287SJohn Baldwin 	cb->data_xfered += data_len;
1483*ec0cd287SJohn Baldwin 	data_offset -= cb->data_offset;
1484*ec0cd287SJohn Baldwin 	if (cb->data_xfered == cb->data_len) {
1485*ec0cd287SJohn Baldwin 		nvmf_che_send_next_r2t(qp, cb);
1486*ec0cd287SJohn Baldwin 	} else {
1487*ec0cd287SJohn Baldwin 		che_hold_command_buffer(cb);
1488*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1489*ec0cd287SJohn Baldwin 	}
1490*ec0cd287SJohn Baldwin 
1491*ec0cd287SJohn Baldwin 	if (CHE_TAG_IS_FL(ttag))
1492*ec0cd287SJohn Baldwin 		mbuf_copyto_io(pdu->m->m_next, 0, data_len, &cb->io,
1493*ec0cd287SJohn Baldwin 		    data_offset);
1494*ec0cd287SJohn Baldwin 
1495*ec0cd287SJohn Baldwin 	che_release_command_buffer(cb);
1496*ec0cd287SJohn Baldwin 	nvmf_che_free_pdu(pdu);
1497*ec0cd287SJohn Baldwin 	return (0);
1498*ec0cd287SJohn Baldwin }
1499*ec0cd287SJohn Baldwin 
1500*ec0cd287SJohn Baldwin static int
nvmf_che_handle_c2h_data(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1501*ec0cd287SJohn Baldwin nvmf_che_handle_c2h_data(struct nvmf_che_qpair *qp, struct nvmf_che_rxpdu *pdu)
1502*ec0cd287SJohn Baldwin {
1503*ec0cd287SJohn Baldwin 	const struct nvme_tcp_c2h_data_hdr *c2h;
1504*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
1505*ec0cd287SJohn Baldwin 	uint32_t data_len, data_offset;
1506*ec0cd287SJohn Baldwin 	uint16_t cid, original_cid;
1507*ec0cd287SJohn Baldwin 
1508*ec0cd287SJohn Baldwin 	/*
1509*ec0cd287SJohn Baldwin 	 * Unlike freelist command buffers, DDP command buffers are
1510*ec0cd287SJohn Baldwin 	 * not released until the response capsule is received to keep
1511*ec0cd287SJohn Baldwin 	 * the STAG allocated until the command has completed.
1512*ec0cd287SJohn Baldwin 	 */
1513*ec0cd287SJohn Baldwin 	c2h = (const void *)pdu->hdr;
1514*ec0cd287SJohn Baldwin 
1515*ec0cd287SJohn Baldwin 	cid = le16toh(c2h->cccid);
1516*ec0cd287SJohn Baldwin 	if (CHE_TAG_IS_FL(cid)) {
1517*ec0cd287SJohn Baldwin 		mtx_lock(&qp->rx_buffers.lock);
1518*ec0cd287SJohn Baldwin 		cb = che_find_command_buffer(&qp->rx_buffers, c2h->cccid);
1519*ec0cd287SJohn Baldwin 	} else {
1520*ec0cd287SJohn Baldwin 		if (CHE_STAG_IDX(cid) >= qp->num_ddp_tags) {
1521*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
1522*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1523*ec0cd287SJohn Baldwin 			    offsetof(struct nvme_tcp_c2h_data_hdr, cccid),
1524*ec0cd287SJohn Baldwin 			    pdu->m, pdu->hdr->hlen);
1525*ec0cd287SJohn Baldwin 			nvmf_che_free_pdu(pdu);
1526*ec0cd287SJohn Baldwin 			return (EBADMSG);
1527*ec0cd287SJohn Baldwin 		}
1528*ec0cd287SJohn Baldwin 
1529*ec0cd287SJohn Baldwin 		mtx_lock(&qp->rx_buffers.lock);
1530*ec0cd287SJohn Baldwin 		cb = qp->open_ddp_tags[CHE_STAG_IDX(cid)];
1531*ec0cd287SJohn Baldwin 	}
1532*ec0cd287SJohn Baldwin 
1533*ec0cd287SJohn Baldwin 	if (cb == NULL) {
1534*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1535*ec0cd287SJohn Baldwin 		/*
1536*ec0cd287SJohn Baldwin 		 * XXX: Could be PDU sequence error if cccid is for a
1537*ec0cd287SJohn Baldwin 		 * command that doesn't use a command buffer.
1538*ec0cd287SJohn Baldwin 		 */
1539*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1540*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1541*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->m,
1542*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1543*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1544*ec0cd287SJohn Baldwin 		return (EBADMSG);
1545*ec0cd287SJohn Baldwin 	}
1546*ec0cd287SJohn Baldwin 
1547*ec0cd287SJohn Baldwin 	/* For a data digest mismatch, fail the I/O request. */
1548*ec0cd287SJohn Baldwin 	if (pdu->data_digest_mismatch) {
1549*ec0cd287SJohn Baldwin 		cb->error = EINTEGRITY;
1550*ec0cd287SJohn Baldwin 		if (CHE_TAG_IS_FL(cid)) {
1551*ec0cd287SJohn Baldwin 			che_remove_command_buffer(&qp->rx_buffers, cb);
1552*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
1553*ec0cd287SJohn Baldwin 			che_release_command_buffer(cb);
1554*ec0cd287SJohn Baldwin 		} else
1555*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
1556*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1557*ec0cd287SJohn Baldwin 		return (0);
1558*ec0cd287SJohn Baldwin 	}
1559*ec0cd287SJohn Baldwin 
1560*ec0cd287SJohn Baldwin 	data_len = le32toh(c2h->datal);
1561*ec0cd287SJohn Baldwin 	if (data_len != pdu->data_len) {
1562*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1563*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1564*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1565*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->m,
1566*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1567*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1568*ec0cd287SJohn Baldwin 		return (EBADMSG);
1569*ec0cd287SJohn Baldwin 	}
1570*ec0cd287SJohn Baldwin 
1571*ec0cd287SJohn Baldwin 	data_offset = le32toh(c2h->datao);
1572*ec0cd287SJohn Baldwin 	if (data_offset < cb->data_offset ||
1573*ec0cd287SJohn Baldwin 	    data_offset + data_len > cb->data_offset + cb->data_len) {
1574*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1575*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1576*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
1577*ec0cd287SJohn Baldwin 		    pdu->m, pdu->hdr->hlen);
1578*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1579*ec0cd287SJohn Baldwin 		return (EBADMSG);
1580*ec0cd287SJohn Baldwin 	}
1581*ec0cd287SJohn Baldwin 
1582*ec0cd287SJohn Baldwin 	if (data_offset != cb->data_offset + cb->data_xfered) {
1583*ec0cd287SJohn Baldwin 		if (CHE_TAG_IS_FL(cid)) {
1584*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
1585*ec0cd287SJohn Baldwin 			nvmf_che_report_error(qp,
1586*ec0cd287SJohn Baldwin 			    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m,
1587*ec0cd287SJohn Baldwin 			    pdu->hdr->hlen);
1588*ec0cd287SJohn Baldwin 			nvmf_che_free_pdu(pdu);
1589*ec0cd287SJohn Baldwin 			return (EBADMSG);
1590*ec0cd287SJohn Baldwin 		} else {
1591*ec0cd287SJohn Baldwin 			uint32_t ddp_bytes;
1592*ec0cd287SJohn Baldwin 
1593*ec0cd287SJohn Baldwin 			/* Account for PDUs silently received via DDP. */
1594*ec0cd287SJohn Baldwin 			ddp_bytes = data_offset -
1595*ec0cd287SJohn Baldwin 			    (cb->data_offset + cb->data_xfered);
1596*ec0cd287SJohn Baldwin 			cb->data_xfered += ddp_bytes;
1597*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
1598*ec0cd287SJohn Baldwin 			CTR(KTR_CXGBE, "%s: tid %u previous ddp_bytes %u",
1599*ec0cd287SJohn Baldwin 			    __func__, qp->toep->tid, ddp_bytes);
1600*ec0cd287SJohn Baldwin #endif
1601*ec0cd287SJohn Baldwin 			counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_octets,
1602*ec0cd287SJohn Baldwin 			    ddp_bytes);
1603*ec0cd287SJohn Baldwin 		}
1604*ec0cd287SJohn Baldwin 	}
1605*ec0cd287SJohn Baldwin 
1606*ec0cd287SJohn Baldwin 	if ((cb->data_xfered + data_len == cb->data_len) !=
1607*ec0cd287SJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) {
1608*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1609*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1610*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m,
1611*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1612*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1613*ec0cd287SJohn Baldwin 		return (EBADMSG);
1614*ec0cd287SJohn Baldwin 	}
1615*ec0cd287SJohn Baldwin 
1616*ec0cd287SJohn Baldwin 	cb->data_xfered += data_len;
1617*ec0cd287SJohn Baldwin 	original_cid = cb->original_cid;
1618*ec0cd287SJohn Baldwin 
1619*ec0cd287SJohn Baldwin 	if (CHE_TAG_IS_FL(cid)) {
1620*ec0cd287SJohn Baldwin 		data_offset -= cb->data_offset;
1621*ec0cd287SJohn Baldwin 		if (cb->data_xfered == cb->data_len)
1622*ec0cd287SJohn Baldwin 			che_remove_command_buffer(&qp->rx_buffers, cb);
1623*ec0cd287SJohn Baldwin 		else
1624*ec0cd287SJohn Baldwin 			che_hold_command_buffer(cb);
1625*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
1626*ec0cd287SJohn Baldwin 
1627*ec0cd287SJohn Baldwin 		if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
1628*ec0cd287SJohn Baldwin 			/*
1629*ec0cd287SJohn Baldwin 			 * Free the CID as the command has now been
1630*ec0cd287SJohn Baldwin 			 * completed.
1631*ec0cd287SJohn Baldwin 			 */
1632*ec0cd287SJohn Baldwin 			cid = CHE_RAW_FL_TAG(cid);
1633*ec0cd287SJohn Baldwin 			mtx_lock(&qp->fl_cid_lock);
1634*ec0cd287SJohn Baldwin 			MPASS(FL_CID_ISACTIVE(cid, qp->fl_cid_set));
1635*ec0cd287SJohn Baldwin 			MPASS(original_cid == qp->fl_cids[cid]);
1636*ec0cd287SJohn Baldwin 			FL_CID_FREE(cid, qp->fl_cid_set);
1637*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->fl_cid_lock);
1638*ec0cd287SJohn Baldwin 		}
1639*ec0cd287SJohn Baldwin 
1640*ec0cd287SJohn Baldwin 		mbuf_copyto_io(pdu->m->m_next, 0, data_len, &cb->io,
1641*ec0cd287SJohn Baldwin 		    data_offset);
1642*ec0cd287SJohn Baldwin 
1643*ec0cd287SJohn Baldwin 		che_release_command_buffer(cb);
1644*ec0cd287SJohn Baldwin 	} else {
1645*ec0cd287SJohn Baldwin 		if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
1646*ec0cd287SJohn Baldwin 			/*
1647*ec0cd287SJohn Baldwin 			 * Free the command buffer and STAG as the
1648*ec0cd287SJohn Baldwin 			 * command has now been completed.
1649*ec0cd287SJohn Baldwin 			 */
1650*ec0cd287SJohn Baldwin 			che_free_ddp_tag(qp, cb, cid);
1651*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
1652*ec0cd287SJohn Baldwin 			che_release_command_buffer(cb);
1653*ec0cd287SJohn Baldwin 		} else
1654*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
1655*ec0cd287SJohn Baldwin 	}
1656*ec0cd287SJohn Baldwin 
1657*ec0cd287SJohn Baldwin 	if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) {
1658*ec0cd287SJohn Baldwin 		struct nvme_completion cqe;
1659*ec0cd287SJohn Baldwin 		struct nvmf_capsule *nc;
1660*ec0cd287SJohn Baldwin 
1661*ec0cd287SJohn Baldwin 		memset(&cqe, 0, sizeof(cqe));
1662*ec0cd287SJohn Baldwin 		cqe.cid = original_cid;
1663*ec0cd287SJohn Baldwin 
1664*ec0cd287SJohn Baldwin 		nc = nvmf_allocate_response(&qp->qp, &cqe, M_WAITOK);
1665*ec0cd287SJohn Baldwin 		nc->nc_sqhd_valid = false;
1666*ec0cd287SJohn Baldwin 
1667*ec0cd287SJohn Baldwin 		nvmf_capsule_received(&qp->qp, nc);
1668*ec0cd287SJohn Baldwin 	}
1669*ec0cd287SJohn Baldwin 
1670*ec0cd287SJohn Baldwin 	nvmf_che_free_pdu(pdu);
1671*ec0cd287SJohn Baldwin 	return (0);
1672*ec0cd287SJohn Baldwin }
1673*ec0cd287SJohn Baldwin 
1674*ec0cd287SJohn Baldwin /* Called when m_free drops refcount to 0. */
1675*ec0cd287SJohn Baldwin static void
nvmf_che_mbuf_done(struct mbuf * m)1676*ec0cd287SJohn Baldwin nvmf_che_mbuf_done(struct mbuf *m)
1677*ec0cd287SJohn Baldwin {
1678*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb = m->m_ext.ext_arg1;
1679*ec0cd287SJohn Baldwin 
1680*ec0cd287SJohn Baldwin 	che_free_command_buffer(cb);
1681*ec0cd287SJohn Baldwin }
1682*ec0cd287SJohn Baldwin 
1683*ec0cd287SJohn Baldwin static struct mbuf *
nvmf_che_mbuf(void * arg,int how,void * data,size_t len)1684*ec0cd287SJohn Baldwin nvmf_che_mbuf(void *arg, int how, void *data, size_t len)
1685*ec0cd287SJohn Baldwin {
1686*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb = arg;
1687*ec0cd287SJohn Baldwin 	struct mbuf *m;
1688*ec0cd287SJohn Baldwin 
1689*ec0cd287SJohn Baldwin 	m = m_get(how, MT_DATA);
1690*ec0cd287SJohn Baldwin 	m->m_flags |= M_RDONLY;
1691*ec0cd287SJohn Baldwin 	m_extaddref(m, data, len, &cb->refs, nvmf_che_mbuf_done, cb, NULL);
1692*ec0cd287SJohn Baldwin 	m->m_len = len;
1693*ec0cd287SJohn Baldwin 	return (m);
1694*ec0cd287SJohn Baldwin }
1695*ec0cd287SJohn Baldwin 
1696*ec0cd287SJohn Baldwin static void
nvmf_che_free_mext_pg(struct mbuf * m)1697*ec0cd287SJohn Baldwin nvmf_che_free_mext_pg(struct mbuf *m)
1698*ec0cd287SJohn Baldwin {
1699*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb = m->m_ext.ext_arg1;
1700*ec0cd287SJohn Baldwin 
1701*ec0cd287SJohn Baldwin 	M_ASSERTEXTPG(m);
1702*ec0cd287SJohn Baldwin 	che_release_command_buffer(cb);
1703*ec0cd287SJohn Baldwin }
1704*ec0cd287SJohn Baldwin 
1705*ec0cd287SJohn Baldwin static struct mbuf *
nvmf_che_mext_pg(void * arg,int how)1706*ec0cd287SJohn Baldwin nvmf_che_mext_pg(void *arg, int how)
1707*ec0cd287SJohn Baldwin {
1708*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb = arg;
1709*ec0cd287SJohn Baldwin 	struct mbuf *m;
1710*ec0cd287SJohn Baldwin 
1711*ec0cd287SJohn Baldwin 	m = mb_alloc_ext_pgs(how, nvmf_che_free_mext_pg, M_RDONLY);
1712*ec0cd287SJohn Baldwin 	m->m_ext.ext_arg1 = cb;
1713*ec0cd287SJohn Baldwin 	che_hold_command_buffer(cb);
1714*ec0cd287SJohn Baldwin 	return (m);
1715*ec0cd287SJohn Baldwin }
1716*ec0cd287SJohn Baldwin 
1717*ec0cd287SJohn Baldwin /*
1718*ec0cd287SJohn Baldwin  * Return an mbuf chain for a range of data belonging to a command
1719*ec0cd287SJohn Baldwin  * buffer.
1720*ec0cd287SJohn Baldwin  *
1721*ec0cd287SJohn Baldwin  * The mbuf chain uses M_EXT mbufs which hold references on the
1722*ec0cd287SJohn Baldwin  * command buffer so that it remains "alive" until the data has been
1723*ec0cd287SJohn Baldwin  * fully transmitted.  If truncate_ok is true, then the mbuf chain
1724*ec0cd287SJohn Baldwin  * might return a short chain to avoid gratuitously splitting up a
1725*ec0cd287SJohn Baldwin  * page.
1726*ec0cd287SJohn Baldwin  */
1727*ec0cd287SJohn Baldwin static struct mbuf *
nvmf_che_command_buffer_mbuf(struct nvmf_che_command_buffer * cb,uint32_t data_offset,uint32_t data_len,uint32_t * actual_len,bool can_truncate)1728*ec0cd287SJohn Baldwin nvmf_che_command_buffer_mbuf(struct nvmf_che_command_buffer *cb,
1729*ec0cd287SJohn Baldwin     uint32_t data_offset, uint32_t data_len, uint32_t *actual_len,
1730*ec0cd287SJohn Baldwin     bool can_truncate)
1731*ec0cd287SJohn Baldwin {
1732*ec0cd287SJohn Baldwin 	struct mbuf *m;
1733*ec0cd287SJohn Baldwin 	size_t len;
1734*ec0cd287SJohn Baldwin 
1735*ec0cd287SJohn Baldwin 	m = memdesc_alloc_ext_mbufs(&cb->io.io_mem, nvmf_che_mbuf,
1736*ec0cd287SJohn Baldwin 	    nvmf_che_mext_pg, cb, M_WAITOK, data_offset, data_len, &len,
1737*ec0cd287SJohn Baldwin 	    can_truncate);
1738*ec0cd287SJohn Baldwin 	if (actual_len != NULL)
1739*ec0cd287SJohn Baldwin 		*actual_len = len;
1740*ec0cd287SJohn Baldwin 	return (m);
1741*ec0cd287SJohn Baldwin }
1742*ec0cd287SJohn Baldwin 
1743*ec0cd287SJohn Baldwin /* NB: cid and ttag and little-endian already. */
1744*ec0cd287SJohn Baldwin static void
che_send_h2c_pdu(struct nvmf_che_qpair * qp,uint16_t cid,uint16_t ttag,uint32_t data_offset,struct mbuf * m,size_t len,bool last_pdu)1745*ec0cd287SJohn Baldwin che_send_h2c_pdu(struct nvmf_che_qpair *qp, uint16_t cid, uint16_t ttag,
1746*ec0cd287SJohn Baldwin     uint32_t data_offset, struct mbuf *m, size_t len, bool last_pdu)
1747*ec0cd287SJohn Baldwin {
1748*ec0cd287SJohn Baldwin 	struct nvme_tcp_h2c_data_hdr h2c;
1749*ec0cd287SJohn Baldwin 	struct mbuf *top;
1750*ec0cd287SJohn Baldwin 
1751*ec0cd287SJohn Baldwin 	memset(&h2c, 0, sizeof(h2c));
1752*ec0cd287SJohn Baldwin 	h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA;
1753*ec0cd287SJohn Baldwin 	if (last_pdu)
1754*ec0cd287SJohn Baldwin 		h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU;
1755*ec0cd287SJohn Baldwin 	h2c.cccid = cid;
1756*ec0cd287SJohn Baldwin 	h2c.ttag = ttag;
1757*ec0cd287SJohn Baldwin 	h2c.datao = htole32(data_offset);
1758*ec0cd287SJohn Baldwin 	h2c.datal = htole32(len);
1759*ec0cd287SJohn Baldwin 
1760*ec0cd287SJohn Baldwin 	top = nvmf_che_construct_pdu(qp, &h2c, sizeof(h2c), m, len);
1761*ec0cd287SJohn Baldwin 	nvmf_che_write_pdu(qp, top);
1762*ec0cd287SJohn Baldwin }
1763*ec0cd287SJohn Baldwin 
1764*ec0cd287SJohn Baldwin static int
nvmf_che_handle_r2t(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1765*ec0cd287SJohn Baldwin nvmf_che_handle_r2t(struct nvmf_che_qpair *qp, struct nvmf_che_rxpdu *pdu)
1766*ec0cd287SJohn Baldwin {
1767*ec0cd287SJohn Baldwin 	const struct nvme_tcp_r2t_hdr *r2t;
1768*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
1769*ec0cd287SJohn Baldwin 	uint32_t data_len, data_offset;
1770*ec0cd287SJohn Baldwin 
1771*ec0cd287SJohn Baldwin 	r2t = (const void *)pdu->hdr;
1772*ec0cd287SJohn Baldwin 
1773*ec0cd287SJohn Baldwin 	mtx_lock(&qp->tx_buffers.lock);
1774*ec0cd287SJohn Baldwin 	cb = che_find_command_buffer(&qp->tx_buffers, r2t->cccid);
1775*ec0cd287SJohn Baldwin 	if (cb == NULL) {
1776*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->tx_buffers.lock);
1777*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1778*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD,
1779*ec0cd287SJohn Baldwin 		    offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->m,
1780*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1781*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1782*ec0cd287SJohn Baldwin 		return (EBADMSG);
1783*ec0cd287SJohn Baldwin 	}
1784*ec0cd287SJohn Baldwin 
1785*ec0cd287SJohn Baldwin 	data_offset = le32toh(r2t->r2to);
1786*ec0cd287SJohn Baldwin 	if (data_offset != cb->data_xfered) {
1787*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->tx_buffers.lock);
1788*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1789*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m,
1790*ec0cd287SJohn Baldwin 		    pdu->hdr->hlen);
1791*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1792*ec0cd287SJohn Baldwin 		return (EBADMSG);
1793*ec0cd287SJohn Baldwin 	}
1794*ec0cd287SJohn Baldwin 
1795*ec0cd287SJohn Baldwin 	/*
1796*ec0cd287SJohn Baldwin 	 * XXX: The spec does not specify how to handle R2T tranfers
1797*ec0cd287SJohn Baldwin 	 * out of range of the original command.
1798*ec0cd287SJohn Baldwin 	 */
1799*ec0cd287SJohn Baldwin 	data_len = le32toh(r2t->r2tl);
1800*ec0cd287SJohn Baldwin 	if (data_offset + data_len > cb->data_len) {
1801*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->tx_buffers.lock);
1802*ec0cd287SJohn Baldwin 		nvmf_che_report_error(qp,
1803*ec0cd287SJohn Baldwin 		    NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0,
1804*ec0cd287SJohn Baldwin 		    pdu->m, pdu->hdr->hlen);
1805*ec0cd287SJohn Baldwin 		nvmf_che_free_pdu(pdu);
1806*ec0cd287SJohn Baldwin 		return (EBADMSG);
1807*ec0cd287SJohn Baldwin 	}
1808*ec0cd287SJohn Baldwin 
1809*ec0cd287SJohn Baldwin 	cb->data_xfered += data_len;
1810*ec0cd287SJohn Baldwin 	if (cb->data_xfered == cb->data_len)
1811*ec0cd287SJohn Baldwin 		che_remove_command_buffer(&qp->tx_buffers, cb);
1812*ec0cd287SJohn Baldwin 	else
1813*ec0cd287SJohn Baldwin 		che_hold_command_buffer(cb);
1814*ec0cd287SJohn Baldwin 	mtx_unlock(&qp->tx_buffers.lock);
1815*ec0cd287SJohn Baldwin 
1816*ec0cd287SJohn Baldwin 	/*
1817*ec0cd287SJohn Baldwin 	 * Queue one or more H2C_DATA PDUs containing the requested
1818*ec0cd287SJohn Baldwin 	 * data.
1819*ec0cd287SJohn Baldwin 	 */
1820*ec0cd287SJohn Baldwin 	while (data_len > 0) {
1821*ec0cd287SJohn Baldwin 		struct mbuf *m;
1822*ec0cd287SJohn Baldwin 		uint32_t sent, todo;
1823*ec0cd287SJohn Baldwin 
1824*ec0cd287SJohn Baldwin 		todo = min(data_len, qp->max_tx_data);
1825*ec0cd287SJohn Baldwin 		m = nvmf_che_command_buffer_mbuf(cb, data_offset, todo, &sent,
1826*ec0cd287SJohn Baldwin 		    todo < data_len);
1827*ec0cd287SJohn Baldwin 		che_send_h2c_pdu(qp, r2t->cccid, r2t->ttag, data_offset, m,
1828*ec0cd287SJohn Baldwin 		    sent, sent == data_len);
1829*ec0cd287SJohn Baldwin 
1830*ec0cd287SJohn Baldwin 		data_offset += sent;
1831*ec0cd287SJohn Baldwin 		data_len -= sent;
1832*ec0cd287SJohn Baldwin 	}
1833*ec0cd287SJohn Baldwin 
1834*ec0cd287SJohn Baldwin 	che_release_command_buffer(cb);
1835*ec0cd287SJohn Baldwin 	nvmf_che_free_pdu(pdu);
1836*ec0cd287SJohn Baldwin 	return (0);
1837*ec0cd287SJohn Baldwin }
1838*ec0cd287SJohn Baldwin 
1839*ec0cd287SJohn Baldwin static int
nvmf_che_dispatch_pdu(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1840*ec0cd287SJohn Baldwin nvmf_che_dispatch_pdu(struct nvmf_che_qpair *qp, struct nvmf_che_rxpdu *pdu)
1841*ec0cd287SJohn Baldwin {
1842*ec0cd287SJohn Baldwin 	/*
1843*ec0cd287SJohn Baldwin 	 * The PDU header should always be contiguous in the mbuf from
1844*ec0cd287SJohn Baldwin 	 * CPL_NVMT_CMP.
1845*ec0cd287SJohn Baldwin 	 */
1846*ec0cd287SJohn Baldwin 	pdu->hdr = mtod(pdu->m, void *);
1847*ec0cd287SJohn Baldwin 	KASSERT(pdu->m->m_len == pdu->hdr->hlen +
1848*ec0cd287SJohn Baldwin 	    ((pdu->hdr->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0 ?
1849*ec0cd287SJohn Baldwin 	    sizeof(uint32_t) : 0),
1850*ec0cd287SJohn Baldwin 	    ("%s: mismatched PDU header mbuf length", __func__));
1851*ec0cd287SJohn Baldwin 
1852*ec0cd287SJohn Baldwin 	switch (pdu->hdr->pdu_type) {
1853*ec0cd287SJohn Baldwin 	default:
1854*ec0cd287SJohn Baldwin 		__assert_unreachable();
1855*ec0cd287SJohn Baldwin 		break;
1856*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_H2C_TERM_REQ:
1857*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_C2H_TERM_REQ:
1858*ec0cd287SJohn Baldwin 		return (nvmf_che_handle_term_req(pdu));
1859*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_CAPSULE_CMD:
1860*ec0cd287SJohn Baldwin 		return (nvmf_che_save_command_capsule(qp, pdu));
1861*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_CAPSULE_RESP:
1862*ec0cd287SJohn Baldwin 		return (nvmf_che_save_response_capsule(qp, pdu));
1863*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_H2C_DATA:
1864*ec0cd287SJohn Baldwin 		return (nvmf_che_handle_h2c_data(qp, pdu));
1865*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_C2H_DATA:
1866*ec0cd287SJohn Baldwin 		return (nvmf_che_handle_c2h_data(qp, pdu));
1867*ec0cd287SJohn Baldwin 	case NVME_TCP_PDU_TYPE_R2T:
1868*ec0cd287SJohn Baldwin 		return (nvmf_che_handle_r2t(qp, pdu));
1869*ec0cd287SJohn Baldwin 	}
1870*ec0cd287SJohn Baldwin }
1871*ec0cd287SJohn Baldwin 
1872*ec0cd287SJohn Baldwin static int
nvmf_che_attach_pdu_data(struct nvmf_che_qpair * qp,struct nvmf_che_rxpdu * pdu)1873*ec0cd287SJohn Baldwin nvmf_che_attach_pdu_data(struct nvmf_che_qpair *qp, struct nvmf_che_rxpdu *pdu)
1874*ec0cd287SJohn Baldwin {
1875*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
1876*ec0cd287SJohn Baldwin 	struct mbuf *m, *n;
1877*ec0cd287SJohn Baldwin 	uint32_t tcp_seq;
1878*ec0cd287SJohn Baldwin 	size_t len;
1879*ec0cd287SJohn Baldwin 	int error;
1880*ec0cd287SJohn Baldwin 
1881*ec0cd287SJohn Baldwin 	/* Check for DDP data. */
1882*ec0cd287SJohn Baldwin 	if (pdu->ddp) {
1883*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_pdus, 1);
1884*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_ddp_octets,
1885*ec0cd287SJohn Baldwin 		    pdu->data_len);
1886*ec0cd287SJohn Baldwin 		return (0);
1887*ec0cd287SJohn Baldwin 	}
1888*ec0cd287SJohn Baldwin 
1889*ec0cd287SJohn Baldwin 	error = 0;
1890*ec0cd287SJohn Baldwin 	len = pdu->data_len;
1891*ec0cd287SJohn Baldwin 	tcp_seq = pdu->m->m_pkthdr.nvmf_tcp_seq;
1892*ec0cd287SJohn Baldwin 	m = pdu->m;
1893*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
1894*ec0cd287SJohn Baldwin 	while (len > 0) {
1895*ec0cd287SJohn Baldwin 		n = mbufq_dequeue(&qp->rx_data);
1896*ec0cd287SJohn Baldwin 		KASSERT(n != NULL, ("%s: missing %zu data", __func__, len));
1897*ec0cd287SJohn Baldwin 		if (n == NULL) {
1898*ec0cd287SJohn Baldwin 			error = ENOBUFS;
1899*ec0cd287SJohn Baldwin 			break;
1900*ec0cd287SJohn Baldwin 		}
1901*ec0cd287SJohn Baldwin 
1902*ec0cd287SJohn Baldwin 		KASSERT(n->m_pkthdr.nvmf_tcp_seq == tcp_seq,
1903*ec0cd287SJohn Baldwin 		    ("%s: TCP seq mismatch", __func__));
1904*ec0cd287SJohn Baldwin 		KASSERT(n->m_pkthdr.len <= len,
1905*ec0cd287SJohn Baldwin 		    ("%s: too much data", __func__));
1906*ec0cd287SJohn Baldwin 		if (n->m_pkthdr.nvmf_tcp_seq != tcp_seq ||
1907*ec0cd287SJohn Baldwin 		    n->m_pkthdr.len > len) {
1908*ec0cd287SJohn Baldwin 			m_freem(n);
1909*ec0cd287SJohn Baldwin 			error = ENOBUFS;
1910*ec0cd287SJohn Baldwin 			break;
1911*ec0cd287SJohn Baldwin 		}
1912*ec0cd287SJohn Baldwin 
1913*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
1914*ec0cd287SJohn Baldwin 		CTR(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__,
1915*ec0cd287SJohn Baldwin 		    qp->toep->tid, n->m_pkthdr.len, n->m_pkthdr.nvmf_tcp_seq);
1916*ec0cd287SJohn Baldwin #endif
1917*ec0cd287SJohn Baldwin 		pdu->m->m_pkthdr.len += n->m_pkthdr.len;
1918*ec0cd287SJohn Baldwin 		len -= n->m_pkthdr.len;
1919*ec0cd287SJohn Baldwin 		tcp_seq += n->m_pkthdr.len;
1920*ec0cd287SJohn Baldwin 		m_demote_pkthdr(n);
1921*ec0cd287SJohn Baldwin 		m->m_next = n;
1922*ec0cd287SJohn Baldwin 		m = m_last(n);
1923*ec0cd287SJohn Baldwin 	}
1924*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
1925*ec0cd287SJohn Baldwin 
1926*ec0cd287SJohn Baldwin 	if (error == 0) {
1927*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_fl_pdus, 1);
1928*ec0cd287SJohn Baldwin 		counter_u64_add(qp->toep->ofld_rxq->rx_nvme_fl_octets,
1929*ec0cd287SJohn Baldwin 		    pdu->data_len);
1930*ec0cd287SJohn Baldwin 	}
1931*ec0cd287SJohn Baldwin 	return (error);
1932*ec0cd287SJohn Baldwin }
1933*ec0cd287SJohn Baldwin 
1934*ec0cd287SJohn Baldwin static void
nvmf_che_receive(void * arg)1935*ec0cd287SJohn Baldwin nvmf_che_receive(void *arg)
1936*ec0cd287SJohn Baldwin {
1937*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = arg;
1938*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
1939*ec0cd287SJohn Baldwin 	struct nvmf_che_rxpdu pdu;
1940*ec0cd287SJohn Baldwin 	struct mbuf *m;
1941*ec0cd287SJohn Baldwin 	int error, terror;
1942*ec0cd287SJohn Baldwin 
1943*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
1944*ec0cd287SJohn Baldwin 	while (!qp->rx_shutdown) {
1945*ec0cd287SJohn Baldwin 		/* Wait for a PDU. */
1946*ec0cd287SJohn Baldwin 		if (so->so_error != 0 || so->so_rerror != 0) {
1947*ec0cd287SJohn Baldwin 			if (so->so_error != 0)
1948*ec0cd287SJohn Baldwin 				error = so->so_error;
1949*ec0cd287SJohn Baldwin 			else
1950*ec0cd287SJohn Baldwin 				error = so->so_rerror;
1951*ec0cd287SJohn Baldwin 			SOCKBUF_UNLOCK(&so->so_rcv);
1952*ec0cd287SJohn Baldwin 		error:
1953*ec0cd287SJohn Baldwin 			nvmf_qpair_error(&qp->qp, error);
1954*ec0cd287SJohn Baldwin 			SOCKBUF_LOCK(&so->so_rcv);
1955*ec0cd287SJohn Baldwin 			while (!qp->rx_shutdown)
1956*ec0cd287SJohn Baldwin 				cv_wait(&qp->rx_cv, SOCKBUF_MTX(&so->so_rcv));
1957*ec0cd287SJohn Baldwin 			break;
1958*ec0cd287SJohn Baldwin 		}
1959*ec0cd287SJohn Baldwin 
1960*ec0cd287SJohn Baldwin 		m = mbufq_dequeue(&qp->rx_pdus);
1961*ec0cd287SJohn Baldwin 		if (m == NULL) {
1962*ec0cd287SJohn Baldwin 			if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0) {
1963*ec0cd287SJohn Baldwin 				error = 0;
1964*ec0cd287SJohn Baldwin 				SOCKBUF_UNLOCK(&so->so_rcv);
1965*ec0cd287SJohn Baldwin 				goto error;
1966*ec0cd287SJohn Baldwin 			}
1967*ec0cd287SJohn Baldwin 			cv_wait(&qp->rx_cv, SOCKBUF_MTX(&so->so_rcv));
1968*ec0cd287SJohn Baldwin 			continue;
1969*ec0cd287SJohn Baldwin 		}
1970*ec0cd287SJohn Baldwin 		SOCKBUF_UNLOCK(&so->so_rcv);
1971*ec0cd287SJohn Baldwin 
1972*ec0cd287SJohn Baldwin 		pdu.m = m;
1973*ec0cd287SJohn Baldwin 		pdu.hdr = mtod(m, const void *);
1974*ec0cd287SJohn Baldwin 		pdu.ddp = (m->m_pkthdr.nvmf_cpl_status & CMP_STATUS_DDP) != 0;
1975*ec0cd287SJohn Baldwin 
1976*ec0cd287SJohn Baldwin 		error = nvmf_che_validate_pdu(qp, &pdu);
1977*ec0cd287SJohn Baldwin 		if (error == 0 && pdu.data_len != 0)
1978*ec0cd287SJohn Baldwin 			error = nvmf_che_attach_pdu_data(qp, &pdu);
1979*ec0cd287SJohn Baldwin 		if (error != 0)
1980*ec0cd287SJohn Baldwin 			nvmf_che_free_pdu(&pdu);
1981*ec0cd287SJohn Baldwin 		else
1982*ec0cd287SJohn Baldwin 			error = nvmf_che_dispatch_pdu(qp, &pdu);
1983*ec0cd287SJohn Baldwin 		if (error != 0) {
1984*ec0cd287SJohn Baldwin 			/*
1985*ec0cd287SJohn Baldwin 			 * If we received a termination request, close
1986*ec0cd287SJohn Baldwin 			 * the connection immediately.
1987*ec0cd287SJohn Baldwin 			 */
1988*ec0cd287SJohn Baldwin 			if (error == ECONNRESET)
1989*ec0cd287SJohn Baldwin 				goto error;
1990*ec0cd287SJohn Baldwin 
1991*ec0cd287SJohn Baldwin 			/*
1992*ec0cd287SJohn Baldwin 			 * Wait for up to 30 seconds for the socket to
1993*ec0cd287SJohn Baldwin 			 * be closed by the other end.
1994*ec0cd287SJohn Baldwin 			 */
1995*ec0cd287SJohn Baldwin 			SOCKBUF_LOCK(&so->so_rcv);
1996*ec0cd287SJohn Baldwin 			if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
1997*ec0cd287SJohn Baldwin 				terror = cv_timedwait(&qp->rx_cv,
1998*ec0cd287SJohn Baldwin 				    SOCKBUF_MTX(&so->so_rcv), 30 * hz);
1999*ec0cd287SJohn Baldwin 				if (terror == ETIMEDOUT)
2000*ec0cd287SJohn Baldwin 					printf("NVMe/TCP: Timed out after sending terminate request\n");
2001*ec0cd287SJohn Baldwin 			}
2002*ec0cd287SJohn Baldwin 			SOCKBUF_UNLOCK(&so->so_rcv);
2003*ec0cd287SJohn Baldwin 			goto error;
2004*ec0cd287SJohn Baldwin 		}
2005*ec0cd287SJohn Baldwin 
2006*ec0cd287SJohn Baldwin 		SOCKBUF_LOCK(&so->so_rcv);
2007*ec0cd287SJohn Baldwin 	}
2008*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2009*ec0cd287SJohn Baldwin 	kthread_exit();
2010*ec0cd287SJohn Baldwin }
2011*ec0cd287SJohn Baldwin 
2012*ec0cd287SJohn Baldwin static int
nvmf_che_soupcall_receive(struct socket * so,void * arg,int waitflag)2013*ec0cd287SJohn Baldwin nvmf_che_soupcall_receive(struct socket *so, void *arg, int waitflag)
2014*ec0cd287SJohn Baldwin {
2015*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = arg;
2016*ec0cd287SJohn Baldwin 
2017*ec0cd287SJohn Baldwin 	cv_signal(&qp->rx_cv);
2018*ec0cd287SJohn Baldwin 	return (SU_OK);
2019*ec0cd287SJohn Baldwin }
2020*ec0cd287SJohn Baldwin 
2021*ec0cd287SJohn Baldwin static int
do_nvmt_data(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)2022*ec0cd287SJohn Baldwin do_nvmt_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
2023*ec0cd287SJohn Baldwin {
2024*ec0cd287SJohn Baldwin 	struct adapter *sc = iq->adapter;
2025*ec0cd287SJohn Baldwin 	struct nvmf_che_adapter *nca = sc->nvme_ulp_softc;
2026*ec0cd287SJohn Baldwin 	const struct cpl_nvmt_data *cpl;
2027*ec0cd287SJohn Baldwin 	u_int tid;
2028*ec0cd287SJohn Baldwin 	struct toepcb *toep;
2029*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp;
2030*ec0cd287SJohn Baldwin 	struct socket *so;
2031*ec0cd287SJohn Baldwin 	struct inpcb *inp;
2032*ec0cd287SJohn Baldwin 	struct tcpcb *tp;
2033*ec0cd287SJohn Baldwin 	int len __diagused;
2034*ec0cd287SJohn Baldwin 
2035*ec0cd287SJohn Baldwin 	if (nca->nvmt_data_iqe) {
2036*ec0cd287SJohn Baldwin 		cpl = (const void *)(rss + 1);
2037*ec0cd287SJohn Baldwin 	} else {
2038*ec0cd287SJohn Baldwin 		cpl = mtod(m, const void *);
2039*ec0cd287SJohn Baldwin 
2040*ec0cd287SJohn Baldwin 		/* strip off CPL header */
2041*ec0cd287SJohn Baldwin 		m_adj(m, sizeof(*cpl));
2042*ec0cd287SJohn Baldwin 	}
2043*ec0cd287SJohn Baldwin 	tid = GET_TID(cpl);
2044*ec0cd287SJohn Baldwin 	toep = lookup_tid(sc, tid);
2045*ec0cd287SJohn Baldwin 
2046*ec0cd287SJohn Baldwin 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
2047*ec0cd287SJohn Baldwin 
2048*ec0cd287SJohn Baldwin 	len = m->m_pkthdr.len;
2049*ec0cd287SJohn Baldwin 
2050*ec0cd287SJohn Baldwin 	KASSERT(len == be16toh(cpl->length),
2051*ec0cd287SJohn Baldwin 	    ("%s: payload length mismatch", __func__));
2052*ec0cd287SJohn Baldwin 
2053*ec0cd287SJohn Baldwin 	inp = toep->inp;
2054*ec0cd287SJohn Baldwin 	INP_WLOCK(inp);
2055*ec0cd287SJohn Baldwin 	if (inp->inp_flags & INP_DROPPED) {
2056*ec0cd287SJohn Baldwin 		CTR(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
2057*ec0cd287SJohn Baldwin 		    __func__, tid, len, inp->inp_flags);
2058*ec0cd287SJohn Baldwin 		INP_WUNLOCK(inp);
2059*ec0cd287SJohn Baldwin 		m_freem(m);
2060*ec0cd287SJohn Baldwin 		return (0);
2061*ec0cd287SJohn Baldwin 	}
2062*ec0cd287SJohn Baldwin 
2063*ec0cd287SJohn Baldwin 	/* Save TCP sequence number. */
2064*ec0cd287SJohn Baldwin 	m->m_pkthdr.nvmf_tcp_seq = be32toh(cpl->seq);
2065*ec0cd287SJohn Baldwin 
2066*ec0cd287SJohn Baldwin 	qp = toep->ulpcb;
2067*ec0cd287SJohn Baldwin 	so = qp->so;
2068*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
2069*ec0cd287SJohn Baldwin 	mbufq_enqueue(&qp->rx_data, m);
2070*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2071*ec0cd287SJohn Baldwin 
2072*ec0cd287SJohn Baldwin 	tp = intotcpcb(inp);
2073*ec0cd287SJohn Baldwin 	tp->t_rcvtime = ticks;
2074*ec0cd287SJohn Baldwin 
2075*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
2076*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u len %d seq %u", __func__, tid, len,
2077*ec0cd287SJohn Baldwin 	    be32toh(cpl->seq));
2078*ec0cd287SJohn Baldwin #endif
2079*ec0cd287SJohn Baldwin 
2080*ec0cd287SJohn Baldwin 	INP_WUNLOCK(inp);
2081*ec0cd287SJohn Baldwin 	return (0);
2082*ec0cd287SJohn Baldwin }
2083*ec0cd287SJohn Baldwin 
2084*ec0cd287SJohn Baldwin static int
do_nvmt_cmp(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)2085*ec0cd287SJohn Baldwin do_nvmt_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
2086*ec0cd287SJohn Baldwin {
2087*ec0cd287SJohn Baldwin 	struct adapter *sc = iq->adapter;
2088*ec0cd287SJohn Baldwin 	const struct cpl_nvmt_cmp *cpl = mtod(m, const void *);
2089*ec0cd287SJohn Baldwin 	u_int tid = GET_TID(cpl);
2090*ec0cd287SJohn Baldwin 	struct toepcb *toep = lookup_tid(sc, tid);
2091*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = toep->ulpcb;
2092*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
2093*ec0cd287SJohn Baldwin 	struct inpcb *inp = toep->inp;
2094*ec0cd287SJohn Baldwin 	u_int hlen __diagused;
2095*ec0cd287SJohn Baldwin 	bool empty;
2096*ec0cd287SJohn Baldwin 
2097*ec0cd287SJohn Baldwin 	KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__));
2098*ec0cd287SJohn Baldwin 	KASSERT(!(toep->flags & TPF_SYNQE),
2099*ec0cd287SJohn Baldwin 	    ("%s: toep %p claims to be a synq entry", __func__, toep));
2100*ec0cd287SJohn Baldwin 
2101*ec0cd287SJohn Baldwin 	/* strip off CPL header */
2102*ec0cd287SJohn Baldwin 	m_adj(m, sizeof(*cpl));
2103*ec0cd287SJohn Baldwin 	hlen = m->m_pkthdr.len;
2104*ec0cd287SJohn Baldwin 
2105*ec0cd287SJohn Baldwin 	KASSERT(hlen == be16toh(cpl->length),
2106*ec0cd287SJohn Baldwin 	    ("%s: payload length mismatch", __func__));
2107*ec0cd287SJohn Baldwin 
2108*ec0cd287SJohn Baldwin 	INP_WLOCK(inp);
2109*ec0cd287SJohn Baldwin 	if (inp->inp_flags & INP_DROPPED) {
2110*ec0cd287SJohn Baldwin 		CTR(KTR_CXGBE, "%s: tid %u, rx (hlen %u), inp_flags 0x%x",
2111*ec0cd287SJohn Baldwin 		    __func__, tid, hlen, inp->inp_flags);
2112*ec0cd287SJohn Baldwin 		INP_WUNLOCK(inp);
2113*ec0cd287SJohn Baldwin 		m_freem(m);
2114*ec0cd287SJohn Baldwin 		return (0);
2115*ec0cd287SJohn Baldwin 	}
2116*ec0cd287SJohn Baldwin 
2117*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
2118*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u hlen %u seq %u status %u", __func__, tid,
2119*ec0cd287SJohn Baldwin 	    hlen, be32toh(cpl->seq), cpl->status);
2120*ec0cd287SJohn Baldwin #endif
2121*ec0cd287SJohn Baldwin 
2122*ec0cd287SJohn Baldwin 	/* Save TCP sequence number and CPL status. */
2123*ec0cd287SJohn Baldwin 	m->m_pkthdr.nvmf_tcp_seq = be32toh(cpl->seq);
2124*ec0cd287SJohn Baldwin 	m->m_pkthdr.nvmf_cpl_status = cpl->status;
2125*ec0cd287SJohn Baldwin 
2126*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
2127*ec0cd287SJohn Baldwin 	empty = mbufq_len(&qp->rx_pdus) == 0;
2128*ec0cd287SJohn Baldwin 	mbufq_enqueue(&qp->rx_pdus, m);
2129*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2130*ec0cd287SJohn Baldwin 	INP_WUNLOCK(inp);
2131*ec0cd287SJohn Baldwin 	if (empty)
2132*ec0cd287SJohn Baldwin 		cv_signal(&qp->rx_cv);
2133*ec0cd287SJohn Baldwin 	return (0);
2134*ec0cd287SJohn Baldwin }
2135*ec0cd287SJohn Baldwin 
2136*ec0cd287SJohn Baldwin static uint16_t
che_alloc_fl_cid(struct nvmf_che_qpair * qp,uint16_t original_cid)2137*ec0cd287SJohn Baldwin che_alloc_fl_cid(struct nvmf_che_qpair *qp, uint16_t original_cid)
2138*ec0cd287SJohn Baldwin {
2139*ec0cd287SJohn Baldwin 	uint16_t new_cid;
2140*ec0cd287SJohn Baldwin 
2141*ec0cd287SJohn Baldwin 	mtx_lock(&qp->fl_cid_lock);
2142*ec0cd287SJohn Baldwin 	new_cid = FL_CID_FINDFREE_AT(qp->fl_cid_set, qp->next_cid);
2143*ec0cd287SJohn Baldwin 	if (new_cid == 0) {
2144*ec0cd287SJohn Baldwin 		new_cid = FL_CID_FINDFREE_AT(qp->fl_cid_set, 0);
2145*ec0cd287SJohn Baldwin 		MPASS(new_cid != 0);
2146*ec0cd287SJohn Baldwin 	}
2147*ec0cd287SJohn Baldwin 	new_cid--;
2148*ec0cd287SJohn Baldwin 	FL_CID_BUSY(new_cid, qp->fl_cid_set);
2149*ec0cd287SJohn Baldwin 	if (new_cid == CHE_MAX_FL_TAG)
2150*ec0cd287SJohn Baldwin 		qp->next_cid = 0;
2151*ec0cd287SJohn Baldwin 	else
2152*ec0cd287SJohn Baldwin 		qp->next_cid = new_cid + 1;
2153*ec0cd287SJohn Baldwin 	qp->fl_cids[new_cid] = original_cid;
2154*ec0cd287SJohn Baldwin 	mtx_unlock(&qp->fl_cid_lock);
2155*ec0cd287SJohn Baldwin 
2156*ec0cd287SJohn Baldwin 	return (new_cid | CHE_FL_TAG_MASK);
2157*ec0cd287SJohn Baldwin }
2158*ec0cd287SJohn Baldwin 
2159*ec0cd287SJohn Baldwin static uint16_t
che_alloc_ddp_cid(struct nvmf_che_qpair * qp,struct nvmf_che_command_buffer * cb)2160*ec0cd287SJohn Baldwin che_alloc_ddp_cid(struct nvmf_che_qpair *qp, struct nvmf_che_command_buffer *cb)
2161*ec0cd287SJohn Baldwin {
2162*ec0cd287SJohn Baldwin 	mtx_assert(&qp->rx_buffers.lock, MA_OWNED);
2163*ec0cd287SJohn Baldwin 
2164*ec0cd287SJohn Baldwin 	return (che_alloc_ddp_tag(qp, cb));
2165*ec0cd287SJohn Baldwin }
2166*ec0cd287SJohn Baldwin 
2167*ec0cd287SJohn Baldwin static struct mbuf *
che_command_pdu(struct nvmf_che_qpair * qp,struct nvmf_che_capsule * cc)2168*ec0cd287SJohn Baldwin che_command_pdu(struct nvmf_che_qpair *qp, struct nvmf_che_capsule *cc)
2169*ec0cd287SJohn Baldwin {
2170*ec0cd287SJohn Baldwin 	struct nvmf_capsule *nc = &cc->nc;
2171*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
2172*ec0cd287SJohn Baldwin 	struct nvme_sgl_descriptor *sgl;
2173*ec0cd287SJohn Baldwin 	struct nvme_tcp_cmd cmd;
2174*ec0cd287SJohn Baldwin 	struct mbuf *top, *m;
2175*ec0cd287SJohn Baldwin 	uint16_t cid;
2176*ec0cd287SJohn Baldwin 	bool use_icd;
2177*ec0cd287SJohn Baldwin 
2178*ec0cd287SJohn Baldwin 	use_icd = false;
2179*ec0cd287SJohn Baldwin 	cb = NULL;
2180*ec0cd287SJohn Baldwin 	m = NULL;
2181*ec0cd287SJohn Baldwin 
2182*ec0cd287SJohn Baldwin 	if (nc->nc_data.io_len != 0) {
2183*ec0cd287SJohn Baldwin 		cb = che_alloc_command_buffer(qp, &nc->nc_data, 0,
2184*ec0cd287SJohn Baldwin 		    nc->nc_data.io_len, nc->nc_sqe.cid);
2185*ec0cd287SJohn Baldwin 		cb->original_cid = nc->nc_sqe.cid;
2186*ec0cd287SJohn Baldwin 
2187*ec0cd287SJohn Baldwin 		if (nc->nc_send_data && nc->nc_data.io_len <= qp->max_icd) {
2188*ec0cd287SJohn Baldwin 			cid = che_alloc_fl_cid(qp, nc->nc_sqe.cid);
2189*ec0cd287SJohn Baldwin 			use_icd = true;
2190*ec0cd287SJohn Baldwin 			m = nvmf_che_command_buffer_mbuf(cb, 0,
2191*ec0cd287SJohn Baldwin 			    nc->nc_data.io_len, NULL, false);
2192*ec0cd287SJohn Baldwin 			cb->data_xfered = nc->nc_data.io_len;
2193*ec0cd287SJohn Baldwin 			che_release_command_buffer(cb);
2194*ec0cd287SJohn Baldwin 		} else if (nc->nc_send_data) {
2195*ec0cd287SJohn Baldwin 			cid = che_alloc_fl_cid(qp, nc->nc_sqe.cid);
2196*ec0cd287SJohn Baldwin 			cb->cid = htole16(cid);
2197*ec0cd287SJohn Baldwin 			mtx_lock(&qp->tx_buffers.lock);
2198*ec0cd287SJohn Baldwin 			che_add_command_buffer(&qp->tx_buffers, cb);
2199*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->tx_buffers.lock);
2200*ec0cd287SJohn Baldwin 		} else {
2201*ec0cd287SJohn Baldwin 			mtx_lock(&qp->rx_buffers.lock);
2202*ec0cd287SJohn Baldwin 			cid = che_alloc_ddp_cid(qp, cb);
2203*ec0cd287SJohn Baldwin 			if (cid == CHE_DDP_NO_TAG) {
2204*ec0cd287SJohn Baldwin 				cid = che_alloc_fl_cid(qp, nc->nc_sqe.cid);
2205*ec0cd287SJohn Baldwin 				che_add_command_buffer(&qp->rx_buffers, cb);
2206*ec0cd287SJohn Baldwin 			}
2207*ec0cd287SJohn Baldwin 			cb->cid = htole16(cid);
2208*ec0cd287SJohn Baldwin 			mtx_unlock(&qp->rx_buffers.lock);
2209*ec0cd287SJohn Baldwin 		}
2210*ec0cd287SJohn Baldwin 	} else
2211*ec0cd287SJohn Baldwin 		cid = che_alloc_fl_cid(qp, nc->nc_sqe.cid);
2212*ec0cd287SJohn Baldwin 
2213*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
2214*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u allocated cid 0x%04x for 0x%04x", __func__,
2215*ec0cd287SJohn Baldwin 	    qp->toep->tid, cid, nc->nc_sqe.cid);
2216*ec0cd287SJohn Baldwin #endif
2217*ec0cd287SJohn Baldwin 	memset(&cmd, 0, sizeof(cmd));
2218*ec0cd287SJohn Baldwin 	cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD;
2219*ec0cd287SJohn Baldwin 	cmd.ccsqe = nc->nc_sqe;
2220*ec0cd287SJohn Baldwin 	cmd.ccsqe.cid = htole16(cid);
2221*ec0cd287SJohn Baldwin 
2222*ec0cd287SJohn Baldwin 	/* Populate SGL in SQE. */
2223*ec0cd287SJohn Baldwin 	sgl = &cmd.ccsqe.sgl;
2224*ec0cd287SJohn Baldwin 	memset(sgl, 0, sizeof(*sgl));
2225*ec0cd287SJohn Baldwin 	sgl->address = 0;
2226*ec0cd287SJohn Baldwin 	sgl->length = htole32(nc->nc_data.io_len);
2227*ec0cd287SJohn Baldwin 	if (use_icd) {
2228*ec0cd287SJohn Baldwin 		/* Use in-capsule data. */
2229*ec0cd287SJohn Baldwin 		sgl->type = NVME_SGL_TYPE_ICD;
2230*ec0cd287SJohn Baldwin 	} else {
2231*ec0cd287SJohn Baldwin 		/* Use a command buffer. */
2232*ec0cd287SJohn Baldwin 		sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER;
2233*ec0cd287SJohn Baldwin 	}
2234*ec0cd287SJohn Baldwin 
2235*ec0cd287SJohn Baldwin 	top = nvmf_che_construct_pdu(qp, &cmd, sizeof(cmd), m, m != NULL ?
2236*ec0cd287SJohn Baldwin 	    nc->nc_data.io_len : 0);
2237*ec0cd287SJohn Baldwin 	return (top);
2238*ec0cd287SJohn Baldwin }
2239*ec0cd287SJohn Baldwin 
2240*ec0cd287SJohn Baldwin static struct mbuf *
che_response_pdu(struct nvmf_che_qpair * qp,struct nvmf_che_capsule * cc)2241*ec0cd287SJohn Baldwin che_response_pdu(struct nvmf_che_qpair *qp, struct nvmf_che_capsule *cc)
2242*ec0cd287SJohn Baldwin {
2243*ec0cd287SJohn Baldwin 	struct nvmf_capsule *nc = &cc->nc;
2244*ec0cd287SJohn Baldwin 	struct nvme_tcp_rsp rsp;
2245*ec0cd287SJohn Baldwin 
2246*ec0cd287SJohn Baldwin 	memset(&rsp, 0, sizeof(rsp));
2247*ec0cd287SJohn Baldwin 	rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP;
2248*ec0cd287SJohn Baldwin 	rsp.rccqe = nc->nc_cqe;
2249*ec0cd287SJohn Baldwin 
2250*ec0cd287SJohn Baldwin 	return (nvmf_che_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0));
2251*ec0cd287SJohn Baldwin }
2252*ec0cd287SJohn Baldwin 
2253*ec0cd287SJohn Baldwin static struct mbuf *
capsule_to_pdu(struct nvmf_che_qpair * qp,struct nvmf_che_capsule * cc)2254*ec0cd287SJohn Baldwin capsule_to_pdu(struct nvmf_che_qpair *qp, struct nvmf_che_capsule *cc)
2255*ec0cd287SJohn Baldwin {
2256*ec0cd287SJohn Baldwin 	if (cc->nc.nc_qe_len == sizeof(struct nvme_command))
2257*ec0cd287SJohn Baldwin 		return (che_command_pdu(qp, cc));
2258*ec0cd287SJohn Baldwin 	else
2259*ec0cd287SJohn Baldwin 		return (che_response_pdu(qp, cc));
2260*ec0cd287SJohn Baldwin }
2261*ec0cd287SJohn Baldwin 
2262*ec0cd287SJohn Baldwin static void
nvmf_che_send(void * arg)2263*ec0cd287SJohn Baldwin nvmf_che_send(void *arg)
2264*ec0cd287SJohn Baldwin {
2265*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = arg;
2266*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc;
2267*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
2268*ec0cd287SJohn Baldwin 	struct mbuf *m;
2269*ec0cd287SJohn Baldwin 	int error;
2270*ec0cd287SJohn Baldwin 
2271*ec0cd287SJohn Baldwin 	m = NULL;
2272*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_snd);
2273*ec0cd287SJohn Baldwin 	while (!qp->tx_shutdown) {
2274*ec0cd287SJohn Baldwin 		if (so->so_error != 0) {
2275*ec0cd287SJohn Baldwin 			error = so->so_error;
2276*ec0cd287SJohn Baldwin 			SOCKBUF_UNLOCK(&so->so_snd);
2277*ec0cd287SJohn Baldwin 			m_freem(m);
2278*ec0cd287SJohn Baldwin 			nvmf_qpair_error(&qp->qp, error);
2279*ec0cd287SJohn Baldwin 			SOCKBUF_LOCK(&so->so_snd);
2280*ec0cd287SJohn Baldwin 			while (!qp->tx_shutdown)
2281*ec0cd287SJohn Baldwin 				cv_wait(&qp->tx_cv, SOCKBUF_MTX(&so->so_snd));
2282*ec0cd287SJohn Baldwin 			break;
2283*ec0cd287SJohn Baldwin 		}
2284*ec0cd287SJohn Baldwin 
2285*ec0cd287SJohn Baldwin 		if (STAILQ_EMPTY(&qp->tx_capsules)) {
2286*ec0cd287SJohn Baldwin 			cv_wait(&qp->tx_cv, SOCKBUF_MTX(&so->so_snd));
2287*ec0cd287SJohn Baldwin 			continue;
2288*ec0cd287SJohn Baldwin 		}
2289*ec0cd287SJohn Baldwin 
2290*ec0cd287SJohn Baldwin 		/* Convert a capsule into a PDU. */
2291*ec0cd287SJohn Baldwin 		cc = STAILQ_FIRST(&qp->tx_capsules);
2292*ec0cd287SJohn Baldwin 		STAILQ_REMOVE_HEAD(&qp->tx_capsules, link);
2293*ec0cd287SJohn Baldwin 		SOCKBUF_UNLOCK(&so->so_snd);
2294*ec0cd287SJohn Baldwin 
2295*ec0cd287SJohn Baldwin 		m = capsule_to_pdu(qp, cc);
2296*ec0cd287SJohn Baldwin 		che_release_capsule(cc);
2297*ec0cd287SJohn Baldwin 
2298*ec0cd287SJohn Baldwin 		nvmf_che_write_pdu(qp, m);
2299*ec0cd287SJohn Baldwin 
2300*ec0cd287SJohn Baldwin 		SOCKBUF_LOCK(&so->so_snd);
2301*ec0cd287SJohn Baldwin 	}
2302*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_snd);
2303*ec0cd287SJohn Baldwin 	kthread_exit();
2304*ec0cd287SJohn Baldwin }
2305*ec0cd287SJohn Baldwin 
2306*ec0cd287SJohn Baldwin static int
nvmf_che_setsockopt(struct socket * so,u_int sspace,u_int rspace)2307*ec0cd287SJohn Baldwin nvmf_che_setsockopt(struct socket *so, u_int sspace, u_int rspace)
2308*ec0cd287SJohn Baldwin {
2309*ec0cd287SJohn Baldwin 	struct sockopt opt;
2310*ec0cd287SJohn Baldwin 	int error, one = 1;
2311*ec0cd287SJohn Baldwin 
2312*ec0cd287SJohn Baldwin 	/* Don't lower the buffer sizes, just enforce a minimum. */
2313*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_snd);
2314*ec0cd287SJohn Baldwin 	if (sspace < so->so_snd.sb_hiwat)
2315*ec0cd287SJohn Baldwin 		sspace = so->so_snd.sb_hiwat;
2316*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_snd);
2317*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
2318*ec0cd287SJohn Baldwin 	if (rspace < so->so_rcv.sb_hiwat)
2319*ec0cd287SJohn Baldwin 		rspace = so->so_rcv.sb_hiwat;
2320*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2321*ec0cd287SJohn Baldwin 
2322*ec0cd287SJohn Baldwin 	error = soreserve(so, sspace, rspace);
2323*ec0cd287SJohn Baldwin 	if (error != 0)
2324*ec0cd287SJohn Baldwin 		return (error);
2325*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_snd);
2326*ec0cd287SJohn Baldwin 	so->so_snd.sb_flags |= SB_AUTOSIZE;
2327*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_snd);
2328*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
2329*ec0cd287SJohn Baldwin 	so->so_rcv.sb_flags |= SB_AUTOSIZE;
2330*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2331*ec0cd287SJohn Baldwin 
2332*ec0cd287SJohn Baldwin 	/*
2333*ec0cd287SJohn Baldwin 	 * Disable Nagle.
2334*ec0cd287SJohn Baldwin 	 */
2335*ec0cd287SJohn Baldwin 	bzero(&opt, sizeof(opt));
2336*ec0cd287SJohn Baldwin 	opt.sopt_dir = SOPT_SET;
2337*ec0cd287SJohn Baldwin 	opt.sopt_level = IPPROTO_TCP;
2338*ec0cd287SJohn Baldwin 	opt.sopt_name = TCP_NODELAY;
2339*ec0cd287SJohn Baldwin 	opt.sopt_val = &one;
2340*ec0cd287SJohn Baldwin 	opt.sopt_valsize = sizeof(one);
2341*ec0cd287SJohn Baldwin 	error = sosetopt(so, &opt);
2342*ec0cd287SJohn Baldwin 	if (error != 0)
2343*ec0cd287SJohn Baldwin 		return (error);
2344*ec0cd287SJohn Baldwin 
2345*ec0cd287SJohn Baldwin 	return (0);
2346*ec0cd287SJohn Baldwin }
2347*ec0cd287SJohn Baldwin 
2348*ec0cd287SJohn Baldwin static void
t4_nvme_set_tcb_field(struct toepcb * toep,uint16_t word,uint64_t mask,uint64_t val)2349*ec0cd287SJohn Baldwin t4_nvme_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask,
2350*ec0cd287SJohn Baldwin     uint64_t val)
2351*ec0cd287SJohn Baldwin {
2352*ec0cd287SJohn Baldwin 	struct adapter *sc = td_adapter(toep->td);
2353*ec0cd287SJohn Baldwin 
2354*ec0cd287SJohn Baldwin 	t4_set_tcb_field(sc, &toep->ofld_txq->wrq, toep, word, mask, val, 0, 0);
2355*ec0cd287SJohn Baldwin }
2356*ec0cd287SJohn Baldwin 
2357*ec0cd287SJohn Baldwin static void
set_ulp_mode_nvme(struct toepcb * toep,u_int ulp_submode,uint8_t rxpda)2358*ec0cd287SJohn Baldwin set_ulp_mode_nvme(struct toepcb *toep, u_int ulp_submode, uint8_t rxpda)
2359*ec0cd287SJohn Baldwin {
2360*ec0cd287SJohn Baldwin 	uint64_t val;
2361*ec0cd287SJohn Baldwin 
2362*ec0cd287SJohn Baldwin 	CTR(KTR_CXGBE, "%s: tid %u, ULP_MODE_NVMET, submode=%#x, rxpda=%u",
2363*ec0cd287SJohn Baldwin 	    __func__, toep->tid, ulp_submode, rxpda);
2364*ec0cd287SJohn Baldwin 
2365*ec0cd287SJohn Baldwin 	val = V_TCB_ULP_TYPE(ULP_MODE_NVMET) | V_TCB_ULP_RAW(ulp_submode);
2366*ec0cd287SJohn Baldwin 	t4_nvme_set_tcb_field(toep, W_TCB_ULP_TYPE,
2367*ec0cd287SJohn Baldwin 	    V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val);
2368*ec0cd287SJohn Baldwin 
2369*ec0cd287SJohn Baldwin 	val = V_TF_RX_FLOW_CONTROL_DISABLE(1ULL);
2370*ec0cd287SJohn Baldwin 	t4_nvme_set_tcb_field(toep, W_TCB_T_FLAGS, val, val);
2371*ec0cd287SJohn Baldwin 
2372*ec0cd287SJohn Baldwin 	val = V_TCB_RSVD((rxpda / 4) - 1);
2373*ec0cd287SJohn Baldwin 	t4_nvme_set_tcb_field(toep, W_TCB_RSVD, V_TCB_RSVD(M_TCB_RSVD), val);
2374*ec0cd287SJohn Baldwin 
2375*ec0cd287SJohn Baldwin 	/* 0 disables CPL_NVMT_CMP_IMM which is not useful in this driver. */
2376*ec0cd287SJohn Baldwin 	val = 0;
2377*ec0cd287SJohn Baldwin 	t4_nvme_set_tcb_field(toep, W_TCB_CMP_IMM_SZ,
2378*ec0cd287SJohn Baldwin 	    V_TCB_CMP_IMM_SZ(M_TCB_CMP_IMM_SZ), val);
2379*ec0cd287SJohn Baldwin }
2380*ec0cd287SJohn Baldwin 
2381*ec0cd287SJohn Baldwin static u_int
pdu_max_data_len(const nvlist_t * nvl,u_int max_pdu_len,u_int hlen,uint8_t pda)2382*ec0cd287SJohn Baldwin pdu_max_data_len(const nvlist_t *nvl, u_int max_pdu_len, u_int hlen,
2383*ec0cd287SJohn Baldwin     uint8_t pda)
2384*ec0cd287SJohn Baldwin {
2385*ec0cd287SJohn Baldwin 	u_int max_data_len;
2386*ec0cd287SJohn Baldwin 
2387*ec0cd287SJohn Baldwin 	if (nvlist_get_bool(nvl, "header_digests"))
2388*ec0cd287SJohn Baldwin 		hlen += sizeof(uint32_t);
2389*ec0cd287SJohn Baldwin 	hlen = roundup(hlen, pda);
2390*ec0cd287SJohn Baldwin 	max_data_len = max_pdu_len - hlen;
2391*ec0cd287SJohn Baldwin 	if (nvlist_get_bool(nvl, "data_digests"))
2392*ec0cd287SJohn Baldwin 		max_data_len -= sizeof(uint32_t);
2393*ec0cd287SJohn Baldwin 	return (max_data_len);
2394*ec0cd287SJohn Baldwin }
2395*ec0cd287SJohn Baldwin 
2396*ec0cd287SJohn Baldwin static struct nvmf_qpair *
che_allocate_qpair(bool controller,const nvlist_t * nvl)2397*ec0cd287SJohn Baldwin che_allocate_qpair(bool controller, const nvlist_t *nvl)
2398*ec0cd287SJohn Baldwin {
2399*ec0cd287SJohn Baldwin 	struct nvmf_che_adapter *nca;
2400*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp;
2401*ec0cd287SJohn Baldwin 	struct adapter *sc;
2402*ec0cd287SJohn Baldwin 	struct file *fp;
2403*ec0cd287SJohn Baldwin 	struct socket *so;
2404*ec0cd287SJohn Baldwin 	struct inpcb *inp;
2405*ec0cd287SJohn Baldwin 	struct tcpcb *tp;
2406*ec0cd287SJohn Baldwin 	struct toepcb *toep;
2407*ec0cd287SJohn Baldwin 	cap_rights_t rights;
2408*ec0cd287SJohn Baldwin 	u_int max_tx_pdu_len, num_ddp_tags;
2409*ec0cd287SJohn Baldwin 	int error, ulp_submode;
2410*ec0cd287SJohn Baldwin 
2411*ec0cd287SJohn Baldwin 	if (!nvlist_exists_number(nvl, "fd") ||
2412*ec0cd287SJohn Baldwin 	    !nvlist_exists_number(nvl, "rxpda") ||
2413*ec0cd287SJohn Baldwin 	    !nvlist_exists_number(nvl, "txpda") ||
2414*ec0cd287SJohn Baldwin 	    !nvlist_exists_bool(nvl, "header_digests") ||
2415*ec0cd287SJohn Baldwin 	    !nvlist_exists_bool(nvl, "data_digests") ||
2416*ec0cd287SJohn Baldwin 	    !nvlist_exists_number(nvl, "maxr2t") ||
2417*ec0cd287SJohn Baldwin 	    !nvlist_exists_number(nvl, "maxh2cdata") ||
2418*ec0cd287SJohn Baldwin 	    !nvlist_exists_number(nvl, "max_icd"))
2419*ec0cd287SJohn Baldwin 		return (NULL);
2420*ec0cd287SJohn Baldwin 
2421*ec0cd287SJohn Baldwin 	error = fget(curthread, nvlist_get_number(nvl, "fd"),
2422*ec0cd287SJohn Baldwin 	    cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp);
2423*ec0cd287SJohn Baldwin 	if (error != 0)
2424*ec0cd287SJohn Baldwin 		return (NULL);
2425*ec0cd287SJohn Baldwin 	if (fp->f_type != DTYPE_SOCKET) {
2426*ec0cd287SJohn Baldwin 		fdrop(fp, curthread);
2427*ec0cd287SJohn Baldwin 		return (NULL);
2428*ec0cd287SJohn Baldwin 	}
2429*ec0cd287SJohn Baldwin 	so = fp->f_data;
2430*ec0cd287SJohn Baldwin 	if (so->so_type != SOCK_STREAM ||
2431*ec0cd287SJohn Baldwin 	    so->so_proto->pr_protocol != IPPROTO_TCP) {
2432*ec0cd287SJohn Baldwin 		fdrop(fp, curthread);
2433*ec0cd287SJohn Baldwin 		return (NULL);
2434*ec0cd287SJohn Baldwin 	}
2435*ec0cd287SJohn Baldwin 
2436*ec0cd287SJohn Baldwin 	sc = find_offload_adapter(so);
2437*ec0cd287SJohn Baldwin 	if (sc == NULL) {
2438*ec0cd287SJohn Baldwin 		fdrop(fp, curthread);
2439*ec0cd287SJohn Baldwin 		return (NULL);
2440*ec0cd287SJohn Baldwin 	}
2441*ec0cd287SJohn Baldwin 	nca = sc->nvme_ulp_softc;
2442*ec0cd287SJohn Baldwin 
2443*ec0cd287SJohn Baldwin 	/*
2444*ec0cd287SJohn Baldwin 	 * Controller: Require advertised MAXH2CDATA to be small
2445*ec0cd287SJohn Baldwin 	 * enough.
2446*ec0cd287SJohn Baldwin 	 */
2447*ec0cd287SJohn Baldwin 	if (controller) {
2448*ec0cd287SJohn Baldwin 		u_int max_rx_data;
2449*ec0cd287SJohn Baldwin 
2450*ec0cd287SJohn Baldwin 		max_rx_data = pdu_max_data_len(nvl, nca->max_receive_pdu,
2451*ec0cd287SJohn Baldwin 		    sizeof(struct nvme_tcp_h2c_data_hdr),
2452*ec0cd287SJohn Baldwin 		    nvlist_get_number(nvl, "rxpda"));
2453*ec0cd287SJohn Baldwin 		if (nvlist_get_number(nvl, "maxh2cdata") > max_rx_data) {
2454*ec0cd287SJohn Baldwin 			fdrop(fp, curthread);
2455*ec0cd287SJohn Baldwin 			return (NULL);
2456*ec0cd287SJohn Baldwin 		}
2457*ec0cd287SJohn Baldwin 	}
2458*ec0cd287SJohn Baldwin 
2459*ec0cd287SJohn Baldwin 	/*
2460*ec0cd287SJohn Baldwin 	 * Host: Require the queue size to be small enough that all of
2461*ec0cd287SJohn Baldwin 	 * the command ids allocated by nvmf(4) will fit in the
2462*ec0cd287SJohn Baldwin 	 * unallocated range.
2463*ec0cd287SJohn Baldwin 	 *
2464*ec0cd287SJohn Baldwin 	 * XXX: Alternatively this driver could just queue commands
2465*ec0cd287SJohn Baldwin 	 * when an unallocated ID isn't available.
2466*ec0cd287SJohn Baldwin 	 */
2467*ec0cd287SJohn Baldwin 	if (!controller) {
2468*ec0cd287SJohn Baldwin 		u_int num_commands;
2469*ec0cd287SJohn Baldwin 
2470*ec0cd287SJohn Baldwin 		num_commands = nvlist_get_number(nvl, "qsize") - 1;
2471*ec0cd287SJohn Baldwin 		if (nvlist_get_bool(nvl, "admin"))
2472*ec0cd287SJohn Baldwin 			num_commands += 8;	/* Max AER */
2473*ec0cd287SJohn Baldwin 		if (num_commands > CHE_NUM_FL_TAGS) {
2474*ec0cd287SJohn Baldwin 			fdrop(fp, curthread);
2475*ec0cd287SJohn Baldwin 			return (NULL);
2476*ec0cd287SJohn Baldwin 		}
2477*ec0cd287SJohn Baldwin 	}
2478*ec0cd287SJohn Baldwin 
2479*ec0cd287SJohn Baldwin 	qp = malloc(sizeof(*qp), M_NVMF_CHE, M_WAITOK | M_ZERO);
2480*ec0cd287SJohn Baldwin 	qp->txpda = nvlist_get_number(nvl, "txpda");
2481*ec0cd287SJohn Baldwin 	qp->rxpda = nvlist_get_number(nvl, "rxpda");
2482*ec0cd287SJohn Baldwin 	qp->header_digests = nvlist_get_bool(nvl, "header_digests");
2483*ec0cd287SJohn Baldwin 	qp->data_digests = nvlist_get_bool(nvl, "data_digests");
2484*ec0cd287SJohn Baldwin 	qp->maxr2t = nvlist_get_number(nvl, "maxr2t");
2485*ec0cd287SJohn Baldwin 	if (controller)
2486*ec0cd287SJohn Baldwin 		qp->maxh2cdata = nvlist_get_number(nvl, "maxh2cdata");
2487*ec0cd287SJohn Baldwin 
2488*ec0cd287SJohn Baldwin 	if (controller) {
2489*ec0cd287SJohn Baldwin 		/* NB: maxr2t is 0's based. */
2490*ec0cd287SJohn Baldwin 		qp->num_fl_ttags = MIN(CHE_NUM_FL_TAGS,
2491*ec0cd287SJohn Baldwin 		    nvlist_get_number(nvl, "qsize") *
2492*ec0cd287SJohn Baldwin 		    ((uint64_t)qp->maxr2t + 1));
2493*ec0cd287SJohn Baldwin 		qp->open_fl_ttags = mallocarray(qp->num_fl_ttags,
2494*ec0cd287SJohn Baldwin 		    sizeof(*qp->open_fl_ttags), M_NVMF_CHE, M_WAITOK | M_ZERO);
2495*ec0cd287SJohn Baldwin 	} else {
2496*ec0cd287SJohn Baldwin 		qp->fl_cids = mallocarray(CHE_NUM_FL_TAGS,
2497*ec0cd287SJohn Baldwin 		    sizeof(*qp->fl_cids), M_NVMF_CHE, M_WAITOK | M_ZERO);
2498*ec0cd287SJohn Baldwin 		qp->fl_cid_set = malloc(sizeof(*qp->fl_cid_set), M_NVMF_CHE,
2499*ec0cd287SJohn Baldwin 		    M_WAITOK);
2500*ec0cd287SJohn Baldwin 		FL_CID_INIT(qp->fl_cid_set);
2501*ec0cd287SJohn Baldwin 		mtx_init(&qp->fl_cid_lock,  "nvmf/che fl cids", NULL, MTX_DEF);
2502*ec0cd287SJohn Baldwin 	}
2503*ec0cd287SJohn Baldwin 
2504*ec0cd287SJohn Baldwin 	inp = sotoinpcb(so);
2505*ec0cd287SJohn Baldwin 	INP_WLOCK(inp);
2506*ec0cd287SJohn Baldwin 	tp = intotcpcb(inp);
2507*ec0cd287SJohn Baldwin 	if (inp->inp_flags & INP_DROPPED) {
2508*ec0cd287SJohn Baldwin 		INP_WUNLOCK(inp);
2509*ec0cd287SJohn Baldwin 		free(qp->fl_cid_set, M_NVMF_CHE);
2510*ec0cd287SJohn Baldwin 		free(qp->fl_cids, M_NVMF_CHE);
2511*ec0cd287SJohn Baldwin 		free(qp->open_fl_ttags, M_NVMF_CHE);
2512*ec0cd287SJohn Baldwin 		free(qp, M_NVMF_CHE);
2513*ec0cd287SJohn Baldwin 		fdrop(fp, curthread);
2514*ec0cd287SJohn Baldwin 		return (NULL);
2515*ec0cd287SJohn Baldwin 	}
2516*ec0cd287SJohn Baldwin 
2517*ec0cd287SJohn Baldwin 	MPASS(tp->t_flags & TF_TOE);
2518*ec0cd287SJohn Baldwin 	MPASS(tp->tod != NULL);
2519*ec0cd287SJohn Baldwin 	MPASS(tp->t_toe != NULL);
2520*ec0cd287SJohn Baldwin 	toep = tp->t_toe;
2521*ec0cd287SJohn Baldwin 	MPASS(toep->vi->adapter == sc);
2522*ec0cd287SJohn Baldwin 
2523*ec0cd287SJohn Baldwin 	if (ulp_mode(toep) != ULP_MODE_NONE) {
2524*ec0cd287SJohn Baldwin 		INP_WUNLOCK(inp);
2525*ec0cd287SJohn Baldwin 		free(qp->fl_cid_set, M_NVMF_CHE);
2526*ec0cd287SJohn Baldwin 		free(qp->fl_cids, M_NVMF_CHE);
2527*ec0cd287SJohn Baldwin 		free(qp->open_fl_ttags, M_NVMF_CHE);
2528*ec0cd287SJohn Baldwin 		free(qp, M_NVMF_CHE);
2529*ec0cd287SJohn Baldwin 		fdrop(fp, curthread);
2530*ec0cd287SJohn Baldwin 		return (NULL);
2531*ec0cd287SJohn Baldwin 	}
2532*ec0cd287SJohn Baldwin 
2533*ec0cd287SJohn Baldwin 	/* Claim socket from file descriptor. */
2534*ec0cd287SJohn Baldwin 	fp->f_ops = &badfileops;
2535*ec0cd287SJohn Baldwin 	fp->f_data = NULL;
2536*ec0cd287SJohn Baldwin 
2537*ec0cd287SJohn Baldwin 	qp->so = so;
2538*ec0cd287SJohn Baldwin 	qp->toep = toep;
2539*ec0cd287SJohn Baldwin 	qp->nca = nca;
2540*ec0cd287SJohn Baldwin 	refcount_init(&qp->refs, 1);
2541*ec0cd287SJohn Baldwin 
2542*ec0cd287SJohn Baldwin 	/* NB: C2H and H2C headers are the same size. */
2543*ec0cd287SJohn Baldwin 	qp->max_rx_data = pdu_max_data_len(nvl, nca->max_receive_pdu,
2544*ec0cd287SJohn Baldwin 	    sizeof(struct nvme_tcp_c2h_data_hdr), qp->rxpda);
2545*ec0cd287SJohn Baldwin 	qp->max_tx_data = pdu_max_data_len(nvl, nca->max_transmit_pdu,
2546*ec0cd287SJohn Baldwin 	    sizeof(struct nvme_tcp_c2h_data_hdr), qp->txpda);
2547*ec0cd287SJohn Baldwin 	if (!controller) {
2548*ec0cd287SJohn Baldwin 		qp->max_tx_data = min(qp->max_tx_data,
2549*ec0cd287SJohn Baldwin 		    nvlist_get_number(nvl, "maxh2cdata"));
2550*ec0cd287SJohn Baldwin 		qp->max_icd = min(nvlist_get_number(nvl, "max_icd"),
2551*ec0cd287SJohn Baldwin 		    pdu_max_data_len(nvl, nca->max_transmit_pdu,
2552*ec0cd287SJohn Baldwin 		    sizeof(struct nvme_tcp_cmd), qp->txpda));
2553*ec0cd287SJohn Baldwin 	} else {
2554*ec0cd287SJohn Baldwin 		/*
2555*ec0cd287SJohn Baldwin 		 * IOCCSZ represents the size of a logical command
2556*ec0cd287SJohn Baldwin 		 * capsule including the 64 byte SQE and the
2557*ec0cd287SJohn Baldwin 		 * in-capsule data.  Use pdu_max_data_len to compute
2558*ec0cd287SJohn Baldwin 		 * the maximum supported ICD length.
2559*ec0cd287SJohn Baldwin 		 */
2560*ec0cd287SJohn Baldwin 		qp->max_ioccsz = rounddown(pdu_max_data_len(nvl,
2561*ec0cd287SJohn Baldwin 		    nca->max_receive_pdu, sizeof(struct nvme_tcp_cmd),
2562*ec0cd287SJohn Baldwin 		    qp->rxpda), 16) + sizeof(struct nvme_command);
2563*ec0cd287SJohn Baldwin 	}
2564*ec0cd287SJohn Baldwin 
2565*ec0cd287SJohn Baldwin 	ulp_submode = 0;
2566*ec0cd287SJohn Baldwin 	if (qp->header_digests)
2567*ec0cd287SJohn Baldwin 		ulp_submode |= FW_NVMET_ULPSUBMODE_HCRC;
2568*ec0cd287SJohn Baldwin 	if (qp->data_digests)
2569*ec0cd287SJohn Baldwin 		ulp_submode |= FW_NVMET_ULPSUBMODE_DCRC;
2570*ec0cd287SJohn Baldwin 	if (!controller)
2571*ec0cd287SJohn Baldwin 		ulp_submode |= FW_NVMET_ULPSUBMODE_ING_DIR;
2572*ec0cd287SJohn Baldwin 
2573*ec0cd287SJohn Baldwin 	max_tx_pdu_len = sizeof(struct nvme_tcp_h2c_data_hdr);
2574*ec0cd287SJohn Baldwin 	if (qp->header_digests)
2575*ec0cd287SJohn Baldwin 		max_tx_pdu_len += sizeof(uint32_t);
2576*ec0cd287SJohn Baldwin 	max_tx_pdu_len = roundup(max_tx_pdu_len, qp->txpda);
2577*ec0cd287SJohn Baldwin 	max_tx_pdu_len += qp->max_tx_data;
2578*ec0cd287SJohn Baldwin 	if (qp->data_digests)
2579*ec0cd287SJohn Baldwin 		max_tx_pdu_len += sizeof(uint32_t);
2580*ec0cd287SJohn Baldwin 
2581*ec0cd287SJohn Baldwin 	/* TODO: ISO limits */
2582*ec0cd287SJohn Baldwin 
2583*ec0cd287SJohn Baldwin 	if (controller) {
2584*ec0cd287SJohn Baldwin 		/* Use the SUCCESS flag if SQ flow control is disabled. */
2585*ec0cd287SJohn Baldwin 		qp->send_success = !nvlist_get_bool(nvl, "sq_flow_control");
2586*ec0cd287SJohn Baldwin 	}
2587*ec0cd287SJohn Baldwin 
2588*ec0cd287SJohn Baldwin 	toep->params.ulp_mode = ULP_MODE_NVMET;
2589*ec0cd287SJohn Baldwin 	toep->ulpcb = qp;
2590*ec0cd287SJohn Baldwin 
2591*ec0cd287SJohn Baldwin 	send_txdataplen_max_flowc_wr(sc, toep,
2592*ec0cd287SJohn Baldwin 	    roundup(/* max_iso_pdus * */ max_tx_pdu_len, tp->t_maxseg));
2593*ec0cd287SJohn Baldwin 	set_ulp_mode_nvme(toep, ulp_submode, qp->rxpda);
2594*ec0cd287SJohn Baldwin 	INP_WUNLOCK(inp);
2595*ec0cd287SJohn Baldwin 
2596*ec0cd287SJohn Baldwin 	fdrop(fp, curthread);
2597*ec0cd287SJohn Baldwin 
2598*ec0cd287SJohn Baldwin 	error = nvmf_che_setsockopt(so, max_tx_pdu_len, nca->max_receive_pdu);
2599*ec0cd287SJohn Baldwin 	if (error != 0) {
2600*ec0cd287SJohn Baldwin 		free(qp->fl_cid_set, M_NVMF_CHE);
2601*ec0cd287SJohn Baldwin 		free(qp->fl_cids, M_NVMF_CHE);
2602*ec0cd287SJohn Baldwin 		free(qp->open_fl_ttags, M_NVMF_CHE);
2603*ec0cd287SJohn Baldwin 		free(qp, M_NVMF_CHE);
2604*ec0cd287SJohn Baldwin 		return (NULL);
2605*ec0cd287SJohn Baldwin 	}
2606*ec0cd287SJohn Baldwin 
2607*ec0cd287SJohn Baldwin 	num_ddp_tags = ddp_tags_per_qp;
2608*ec0cd287SJohn Baldwin 	if (num_ddp_tags > 0) {
2609*ec0cd287SJohn Baldwin 		qp->tpt_offset = t4_stag_alloc(sc, num_ddp_tags);
2610*ec0cd287SJohn Baldwin 		if (qp->tpt_offset != T4_STAG_UNSET) {
2611*ec0cd287SJohn Baldwin #ifdef VERBOSE_TRACES
2612*ec0cd287SJohn Baldwin 			CTR(KTR_CXGBE,
2613*ec0cd287SJohn Baldwin 			    "%s: tid %u using %u tags at offset 0x%x",
2614*ec0cd287SJohn Baldwin 			    __func__, toep->tid, num_ddp_tags, qp->tpt_offset);
2615*ec0cd287SJohn Baldwin #endif
2616*ec0cd287SJohn Baldwin 			qp->num_ddp_tags = num_ddp_tags;
2617*ec0cd287SJohn Baldwin 			qp->open_ddp_tags = mallocarray(qp->num_ddp_tags,
2618*ec0cd287SJohn Baldwin 			    sizeof(*qp->open_ddp_tags), M_NVMF_CHE, M_WAITOK |
2619*ec0cd287SJohn Baldwin 			    M_ZERO);
2620*ec0cd287SJohn Baldwin 
2621*ec0cd287SJohn Baldwin 			t4_nvme_set_tcb_field(toep, W_TCB_TPT_OFFSET,
2622*ec0cd287SJohn Baldwin 			    M_TCB_TPT_OFFSET, V_TCB_TPT_OFFSET(qp->tpt_offset));
2623*ec0cd287SJohn Baldwin 		}
2624*ec0cd287SJohn Baldwin 	}
2625*ec0cd287SJohn Baldwin 
2626*ec0cd287SJohn Baldwin 	TAILQ_INIT(&qp->rx_buffers.head);
2627*ec0cd287SJohn Baldwin 	TAILQ_INIT(&qp->tx_buffers.head);
2628*ec0cd287SJohn Baldwin 	mtx_init(&qp->rx_buffers.lock, "nvmf/che rx buffers", NULL, MTX_DEF);
2629*ec0cd287SJohn Baldwin 	mtx_init(&qp->tx_buffers.lock, "nvmf/che tx buffers", NULL, MTX_DEF);
2630*ec0cd287SJohn Baldwin 
2631*ec0cd287SJohn Baldwin 	cv_init(&qp->rx_cv, "-");
2632*ec0cd287SJohn Baldwin 	cv_init(&qp->tx_cv, "-");
2633*ec0cd287SJohn Baldwin 	mbufq_init(&qp->rx_data, 0);
2634*ec0cd287SJohn Baldwin 	mbufq_init(&qp->rx_pdus, 0);
2635*ec0cd287SJohn Baldwin 	STAILQ_INIT(&qp->tx_capsules);
2636*ec0cd287SJohn Baldwin 
2637*ec0cd287SJohn Baldwin 	/* Register socket upcall for receive to handle remote FIN. */
2638*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
2639*ec0cd287SJohn Baldwin 	soupcall_set(so, SO_RCV, nvmf_che_soupcall_receive, qp);
2640*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2641*ec0cd287SJohn Baldwin 
2642*ec0cd287SJohn Baldwin 	/* Spin up kthreads. */
2643*ec0cd287SJohn Baldwin 	error = kthread_add(nvmf_che_receive, qp, NULL, &qp->rx_thread, 0, 0,
2644*ec0cd287SJohn Baldwin 	    "nvmef che rx");
2645*ec0cd287SJohn Baldwin 	if (error != 0) {
2646*ec0cd287SJohn Baldwin 		che_free_qpair(&qp->qp);
2647*ec0cd287SJohn Baldwin 		return (NULL);
2648*ec0cd287SJohn Baldwin 	}
2649*ec0cd287SJohn Baldwin 	error = kthread_add(nvmf_che_send, qp, NULL, &qp->tx_thread, 0, 0,
2650*ec0cd287SJohn Baldwin 	    "nvmef che tx");
2651*ec0cd287SJohn Baldwin 	if (error != 0) {
2652*ec0cd287SJohn Baldwin 		che_free_qpair(&qp->qp);
2653*ec0cd287SJohn Baldwin 		return (NULL);
2654*ec0cd287SJohn Baldwin 	}
2655*ec0cd287SJohn Baldwin 
2656*ec0cd287SJohn Baldwin 	return (&qp->qp);
2657*ec0cd287SJohn Baldwin }
2658*ec0cd287SJohn Baldwin 
2659*ec0cd287SJohn Baldwin static void
che_release_qpair(struct nvmf_che_qpair * qp)2660*ec0cd287SJohn Baldwin che_release_qpair(struct nvmf_che_qpair *qp)
2661*ec0cd287SJohn Baldwin {
2662*ec0cd287SJohn Baldwin 	if (refcount_release(&qp->refs))
2663*ec0cd287SJohn Baldwin 		free(qp, M_NVMF_CHE);
2664*ec0cd287SJohn Baldwin }
2665*ec0cd287SJohn Baldwin 
2666*ec0cd287SJohn Baldwin static void
che_free_qpair(struct nvmf_qpair * nq)2667*ec0cd287SJohn Baldwin che_free_qpair(struct nvmf_qpair *nq)
2668*ec0cd287SJohn Baldwin {
2669*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nq);
2670*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *ncb, *cb;
2671*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *ncc, *cc;
2672*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
2673*ec0cd287SJohn Baldwin 	struct toepcb *toep = qp->toep;
2674*ec0cd287SJohn Baldwin 	struct inpcb *inp = sotoinpcb(so);
2675*ec0cd287SJohn Baldwin 
2676*ec0cd287SJohn Baldwin 	/* Shut down kthreads. */
2677*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_snd);
2678*ec0cd287SJohn Baldwin 	qp->tx_shutdown = true;
2679*ec0cd287SJohn Baldwin 	if (qp->tx_thread != NULL) {
2680*ec0cd287SJohn Baldwin 		cv_signal(&qp->tx_cv);
2681*ec0cd287SJohn Baldwin 		mtx_sleep(qp->tx_thread, SOCKBUF_MTX(&so->so_snd), 0,
2682*ec0cd287SJohn Baldwin 		    "nvchetx", 0);
2683*ec0cd287SJohn Baldwin 	}
2684*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_snd);
2685*ec0cd287SJohn Baldwin 
2686*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_rcv);
2687*ec0cd287SJohn Baldwin 	qp->rx_shutdown = true;
2688*ec0cd287SJohn Baldwin 	if (qp->rx_thread != NULL) {
2689*ec0cd287SJohn Baldwin 		cv_signal(&qp->rx_cv);
2690*ec0cd287SJohn Baldwin 		mtx_sleep(qp->rx_thread, SOCKBUF_MTX(&so->so_rcv), 0,
2691*ec0cd287SJohn Baldwin 		    "nvcherx", 0);
2692*ec0cd287SJohn Baldwin 	}
2693*ec0cd287SJohn Baldwin 	soupcall_clear(so, SO_RCV);
2694*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_rcv);
2695*ec0cd287SJohn Baldwin 	mbufq_drain(&qp->rx_data);
2696*ec0cd287SJohn Baldwin 	mbufq_drain(&qp->rx_pdus);
2697*ec0cd287SJohn Baldwin 
2698*ec0cd287SJohn Baldwin 	STAILQ_FOREACH_SAFE(cc, &qp->tx_capsules, link, ncc) {
2699*ec0cd287SJohn Baldwin 		nvmf_abort_capsule_data(&cc->nc, ECONNABORTED);
2700*ec0cd287SJohn Baldwin 		che_release_capsule(cc);
2701*ec0cd287SJohn Baldwin 	}
2702*ec0cd287SJohn Baldwin 
2703*ec0cd287SJohn Baldwin 	cv_destroy(&qp->tx_cv);
2704*ec0cd287SJohn Baldwin 	cv_destroy(&qp->rx_cv);
2705*ec0cd287SJohn Baldwin 
2706*ec0cd287SJohn Baldwin 	if (qp->open_fl_ttags != NULL) {
2707*ec0cd287SJohn Baldwin 		for (u_int i = 0; i < qp->num_fl_ttags; i++) {
2708*ec0cd287SJohn Baldwin 			cb = qp->open_fl_ttags[i];
2709*ec0cd287SJohn Baldwin 			if (cb != NULL) {
2710*ec0cd287SJohn Baldwin 				cb->cc->active_r2ts--;
2711*ec0cd287SJohn Baldwin 				cb->error = ECONNABORTED;
2712*ec0cd287SJohn Baldwin 				che_release_command_buffer(cb);
2713*ec0cd287SJohn Baldwin 			}
2714*ec0cd287SJohn Baldwin 		}
2715*ec0cd287SJohn Baldwin 		free(qp->open_fl_ttags, M_NVMF_CHE);
2716*ec0cd287SJohn Baldwin 	}
2717*ec0cd287SJohn Baldwin 	if (qp->num_ddp_tags != 0) {
2718*ec0cd287SJohn Baldwin 		for (u_int i = 0; i < qp->num_ddp_tags; i++) {
2719*ec0cd287SJohn Baldwin 			cb = qp->open_ddp_tags[i];
2720*ec0cd287SJohn Baldwin 			if (cb != NULL) {
2721*ec0cd287SJohn Baldwin 				if (cb->cc != NULL)
2722*ec0cd287SJohn Baldwin 					cb->cc->active_r2ts--;
2723*ec0cd287SJohn Baldwin 				cb->error = ECONNABORTED;
2724*ec0cd287SJohn Baldwin 				mtx_lock(&qp->rx_buffers.lock);
2725*ec0cd287SJohn Baldwin 				che_free_ddp_tag(qp, cb, cb->ttag);
2726*ec0cd287SJohn Baldwin 				mtx_unlock(&qp->rx_buffers.lock);
2727*ec0cd287SJohn Baldwin 				che_release_command_buffer(cb);
2728*ec0cd287SJohn Baldwin 			}
2729*ec0cd287SJohn Baldwin 		}
2730*ec0cd287SJohn Baldwin 		free(qp->open_ddp_tags, M_NVMF_CHE);
2731*ec0cd287SJohn Baldwin 	}
2732*ec0cd287SJohn Baldwin 
2733*ec0cd287SJohn Baldwin 	mtx_lock(&qp->rx_buffers.lock);
2734*ec0cd287SJohn Baldwin 	TAILQ_FOREACH_SAFE(cb, &qp->rx_buffers.head, link, ncb) {
2735*ec0cd287SJohn Baldwin 		che_remove_command_buffer(&qp->rx_buffers, cb);
2736*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
2737*ec0cd287SJohn Baldwin #ifdef INVARIANTS
2738*ec0cd287SJohn Baldwin 		if (cb->cc != NULL)
2739*ec0cd287SJohn Baldwin 			cb->cc->pending_r2ts--;
2740*ec0cd287SJohn Baldwin #endif
2741*ec0cd287SJohn Baldwin 		cb->error = ECONNABORTED;
2742*ec0cd287SJohn Baldwin 		che_release_command_buffer(cb);
2743*ec0cd287SJohn Baldwin 		mtx_lock(&qp->rx_buffers.lock);
2744*ec0cd287SJohn Baldwin 	}
2745*ec0cd287SJohn Baldwin 	mtx_destroy(&qp->rx_buffers.lock);
2746*ec0cd287SJohn Baldwin 
2747*ec0cd287SJohn Baldwin 	mtx_lock(&qp->tx_buffers.lock);
2748*ec0cd287SJohn Baldwin 	TAILQ_FOREACH_SAFE(cb, &qp->tx_buffers.head, link, ncb) {
2749*ec0cd287SJohn Baldwin 		che_remove_command_buffer(&qp->tx_buffers, cb);
2750*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->tx_buffers.lock);
2751*ec0cd287SJohn Baldwin 		cb->error = ECONNABORTED;
2752*ec0cd287SJohn Baldwin 		che_release_command_buffer(cb);
2753*ec0cd287SJohn Baldwin 		mtx_lock(&qp->tx_buffers.lock);
2754*ec0cd287SJohn Baldwin 	}
2755*ec0cd287SJohn Baldwin 	mtx_destroy(&qp->tx_buffers.lock);
2756*ec0cd287SJohn Baldwin 
2757*ec0cd287SJohn Baldwin 	if (qp->num_ddp_tags != 0)
2758*ec0cd287SJohn Baldwin 		t4_stag_free(qp->nca->sc, qp->tpt_offset, qp->num_ddp_tags);
2759*ec0cd287SJohn Baldwin 
2760*ec0cd287SJohn Baldwin 	if (!qp->qp.nq_controller) {
2761*ec0cd287SJohn Baldwin 		free(qp->fl_cids, M_NVMF_CHE);
2762*ec0cd287SJohn Baldwin 		free(qp->fl_cid_set, M_NVMF_CHE);
2763*ec0cd287SJohn Baldwin 		mtx_destroy(&qp->fl_cid_lock);
2764*ec0cd287SJohn Baldwin 	}
2765*ec0cd287SJohn Baldwin 
2766*ec0cd287SJohn Baldwin 	INP_WLOCK(inp);
2767*ec0cd287SJohn Baldwin 	toep->ulpcb = NULL;
2768*ec0cd287SJohn Baldwin 	mbufq_drain(&toep->ulp_pduq);
2769*ec0cd287SJohn Baldwin 
2770*ec0cd287SJohn Baldwin 	/*
2771*ec0cd287SJohn Baldwin 	 * Grab a reference to use when waiting for the final CPL to
2772*ec0cd287SJohn Baldwin 	 * be received.  If toep->inp is NULL, then
2773*ec0cd287SJohn Baldwin 	 * final_cpl_received() has already been called (e.g.  due to
2774*ec0cd287SJohn Baldwin 	 * the peer sending a RST).
2775*ec0cd287SJohn Baldwin 	 */
2776*ec0cd287SJohn Baldwin 	if (toep->inp != NULL) {
2777*ec0cd287SJohn Baldwin 		toep = hold_toepcb(toep);
2778*ec0cd287SJohn Baldwin 		toep->flags |= TPF_WAITING_FOR_FINAL;
2779*ec0cd287SJohn Baldwin 	} else
2780*ec0cd287SJohn Baldwin 		toep = NULL;
2781*ec0cd287SJohn Baldwin 	INP_WUNLOCK(inp);
2782*ec0cd287SJohn Baldwin 
2783*ec0cd287SJohn Baldwin 	soclose(so);
2784*ec0cd287SJohn Baldwin 
2785*ec0cd287SJohn Baldwin 	/*
2786*ec0cd287SJohn Baldwin 	 * Wait for the socket to fully close.  This ensures any
2787*ec0cd287SJohn Baldwin 	 * pending received data has been received (and in particular,
2788*ec0cd287SJohn Baldwin 	 * any data that would be received by DDP has been handled).
2789*ec0cd287SJohn Baldwin 	 */
2790*ec0cd287SJohn Baldwin 	if (toep != NULL) {
2791*ec0cd287SJohn Baldwin 		struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep);
2792*ec0cd287SJohn Baldwin 
2793*ec0cd287SJohn Baldwin 		mtx_lock(lock);
2794*ec0cd287SJohn Baldwin 		while ((toep->flags & TPF_WAITING_FOR_FINAL) != 0)
2795*ec0cd287SJohn Baldwin 			mtx_sleep(toep, lock, PSOCK, "conclo2", 0);
2796*ec0cd287SJohn Baldwin 		mtx_unlock(lock);
2797*ec0cd287SJohn Baldwin 		free_toepcb(toep);
2798*ec0cd287SJohn Baldwin 	}
2799*ec0cd287SJohn Baldwin 
2800*ec0cd287SJohn Baldwin 	che_release_qpair(qp);
2801*ec0cd287SJohn Baldwin }
2802*ec0cd287SJohn Baldwin 
2803*ec0cd287SJohn Baldwin static uint32_t
che_max_ioccsz(struct nvmf_qpair * nq)2804*ec0cd287SJohn Baldwin che_max_ioccsz(struct nvmf_qpair *nq)
2805*ec0cd287SJohn Baldwin {
2806*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nq);
2807*ec0cd287SJohn Baldwin 
2808*ec0cd287SJohn Baldwin 	/*
2809*ec0cd287SJohn Baldwin 	 * Limit the command capsule size so that with maximum ICD it
2810*ec0cd287SJohn Baldwin 	 * fits within the limit of the largest PDU the adapter can
2811*ec0cd287SJohn Baldwin 	 * receive.
2812*ec0cd287SJohn Baldwin 	 */
2813*ec0cd287SJohn Baldwin 	return (qp->max_ioccsz);
2814*ec0cd287SJohn Baldwin }
2815*ec0cd287SJohn Baldwin 
2816*ec0cd287SJohn Baldwin static uint64_t
che_max_xfer_size(struct nvmf_qpair * nq)2817*ec0cd287SJohn Baldwin che_max_xfer_size(struct nvmf_qpair *nq)
2818*ec0cd287SJohn Baldwin {
2819*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nq);
2820*ec0cd287SJohn Baldwin 
2821*ec0cd287SJohn Baldwin 	/*
2822*ec0cd287SJohn Baldwin 	 * Limit host transfers to the size of the data payload in the
2823*ec0cd287SJohn Baldwin 	 * largest PDU the adapter can receive.
2824*ec0cd287SJohn Baldwin 	 */
2825*ec0cd287SJohn Baldwin 	return (qp->max_rx_data);
2826*ec0cd287SJohn Baldwin }
2827*ec0cd287SJohn Baldwin 
2828*ec0cd287SJohn Baldwin static struct nvmf_capsule *
che_allocate_capsule(struct nvmf_qpair * nq,int how)2829*ec0cd287SJohn Baldwin che_allocate_capsule(struct nvmf_qpair *nq, int how)
2830*ec0cd287SJohn Baldwin {
2831*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nq);
2832*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc;
2833*ec0cd287SJohn Baldwin 
2834*ec0cd287SJohn Baldwin 	cc = malloc(sizeof(*cc), M_NVMF_CHE, how | M_ZERO);
2835*ec0cd287SJohn Baldwin 	if (cc == NULL)
2836*ec0cd287SJohn Baldwin 		return (NULL);
2837*ec0cd287SJohn Baldwin 	refcount_init(&cc->refs, 1);
2838*ec0cd287SJohn Baldwin 	refcount_acquire(&qp->refs);
2839*ec0cd287SJohn Baldwin 	return (&cc->nc);
2840*ec0cd287SJohn Baldwin }
2841*ec0cd287SJohn Baldwin 
2842*ec0cd287SJohn Baldwin static void
che_release_capsule(struct nvmf_che_capsule * cc)2843*ec0cd287SJohn Baldwin che_release_capsule(struct nvmf_che_capsule *cc)
2844*ec0cd287SJohn Baldwin {
2845*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(cc->nc.nc_qpair);
2846*ec0cd287SJohn Baldwin 
2847*ec0cd287SJohn Baldwin 	if (!refcount_release(&cc->refs))
2848*ec0cd287SJohn Baldwin 		return;
2849*ec0cd287SJohn Baldwin 
2850*ec0cd287SJohn Baldwin 	MPASS(cc->active_r2ts == 0);
2851*ec0cd287SJohn Baldwin 	MPASS(cc->pending_r2ts == 0);
2852*ec0cd287SJohn Baldwin 
2853*ec0cd287SJohn Baldwin 	nvmf_che_free_pdu(&cc->rx_pdu);
2854*ec0cd287SJohn Baldwin 	free(cc, M_NVMF_CHE);
2855*ec0cd287SJohn Baldwin 	che_release_qpair(qp);
2856*ec0cd287SJohn Baldwin }
2857*ec0cd287SJohn Baldwin 
2858*ec0cd287SJohn Baldwin static void
che_free_capsule(struct nvmf_capsule * nc)2859*ec0cd287SJohn Baldwin che_free_capsule(struct nvmf_capsule *nc)
2860*ec0cd287SJohn Baldwin {
2861*ec0cd287SJohn Baldwin 	che_release_capsule(CCAP(nc));
2862*ec0cd287SJohn Baldwin }
2863*ec0cd287SJohn Baldwin 
2864*ec0cd287SJohn Baldwin static int
che_transmit_capsule(struct nvmf_capsule * nc)2865*ec0cd287SJohn Baldwin che_transmit_capsule(struct nvmf_capsule *nc)
2866*ec0cd287SJohn Baldwin {
2867*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nc->nc_qpair);
2868*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc = CCAP(nc);
2869*ec0cd287SJohn Baldwin 	struct socket *so = qp->so;
2870*ec0cd287SJohn Baldwin 
2871*ec0cd287SJohn Baldwin 	refcount_acquire(&cc->refs);
2872*ec0cd287SJohn Baldwin 	SOCKBUF_LOCK(&so->so_snd);
2873*ec0cd287SJohn Baldwin 	STAILQ_INSERT_TAIL(&qp->tx_capsules, cc, link);
2874*ec0cd287SJohn Baldwin 	cv_signal(&qp->tx_cv);
2875*ec0cd287SJohn Baldwin 	SOCKBUF_UNLOCK(&so->so_snd);
2876*ec0cd287SJohn Baldwin 	return (0);
2877*ec0cd287SJohn Baldwin }
2878*ec0cd287SJohn Baldwin 
2879*ec0cd287SJohn Baldwin static uint8_t
che_validate_command_capsule(struct nvmf_capsule * nc)2880*ec0cd287SJohn Baldwin che_validate_command_capsule(struct nvmf_capsule *nc)
2881*ec0cd287SJohn Baldwin {
2882*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc = CCAP(nc);
2883*ec0cd287SJohn Baldwin 	struct nvme_sgl_descriptor *sgl;
2884*ec0cd287SJohn Baldwin 
2885*ec0cd287SJohn Baldwin 	KASSERT(cc->rx_pdu.hdr != NULL, ("capsule wasn't received"));
2886*ec0cd287SJohn Baldwin 
2887*ec0cd287SJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
2888*ec0cd287SJohn Baldwin 	switch (sgl->type) {
2889*ec0cd287SJohn Baldwin 	case NVME_SGL_TYPE_ICD:
2890*ec0cd287SJohn Baldwin 		if (cc->rx_pdu.data_len != le32toh(sgl->length)) {
2891*ec0cd287SJohn Baldwin 			printf("NVMe/TCP: Command Capsule with mismatched ICD length\n");
2892*ec0cd287SJohn Baldwin 			return (NVME_SC_DATA_SGL_LENGTH_INVALID);
2893*ec0cd287SJohn Baldwin 		}
2894*ec0cd287SJohn Baldwin 		break;
2895*ec0cd287SJohn Baldwin 	case NVME_SGL_TYPE_COMMAND_BUFFER:
2896*ec0cd287SJohn Baldwin 		if (cc->rx_pdu.data_len != 0) {
2897*ec0cd287SJohn Baldwin 			printf("NVMe/TCP: Command Buffer SGL with ICD\n");
2898*ec0cd287SJohn Baldwin 			return (NVME_SC_INVALID_FIELD);
2899*ec0cd287SJohn Baldwin 		}
2900*ec0cd287SJohn Baldwin 		break;
2901*ec0cd287SJohn Baldwin 	default:
2902*ec0cd287SJohn Baldwin 		printf("NVMe/TCP: Invalid SGL type in Command Capsule\n");
2903*ec0cd287SJohn Baldwin 		return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID);
2904*ec0cd287SJohn Baldwin 	}
2905*ec0cd287SJohn Baldwin 
2906*ec0cd287SJohn Baldwin 	if (sgl->address != 0) {
2907*ec0cd287SJohn Baldwin 		printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n");
2908*ec0cd287SJohn Baldwin 		return (NVME_SC_SGL_OFFSET_INVALID);
2909*ec0cd287SJohn Baldwin 	}
2910*ec0cd287SJohn Baldwin 
2911*ec0cd287SJohn Baldwin 	return (NVME_SC_SUCCESS);
2912*ec0cd287SJohn Baldwin }
2913*ec0cd287SJohn Baldwin 
2914*ec0cd287SJohn Baldwin static size_t
che_capsule_data_len(const struct nvmf_capsule * nc)2915*ec0cd287SJohn Baldwin che_capsule_data_len(const struct nvmf_capsule *nc)
2916*ec0cd287SJohn Baldwin {
2917*ec0cd287SJohn Baldwin 	MPASS(nc->nc_qe_len == sizeof(struct nvme_command));
2918*ec0cd287SJohn Baldwin 	return (le32toh(nc->nc_sqe.sgl.length));
2919*ec0cd287SJohn Baldwin }
2920*ec0cd287SJohn Baldwin 
2921*ec0cd287SJohn Baldwin static void
che_receive_r2t_data(struct nvmf_capsule * nc,uint32_t data_offset,struct nvmf_io_request * io)2922*ec0cd287SJohn Baldwin che_receive_r2t_data(struct nvmf_capsule *nc, uint32_t data_offset,
2923*ec0cd287SJohn Baldwin     struct nvmf_io_request *io)
2924*ec0cd287SJohn Baldwin {
2925*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nc->nc_qpair);
2926*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc = CCAP(nc);
2927*ec0cd287SJohn Baldwin 	struct nvmf_che_command_buffer *cb;
2928*ec0cd287SJohn Baldwin 
2929*ec0cd287SJohn Baldwin 	cb = che_alloc_command_buffer(qp, io, data_offset, io->io_len,
2930*ec0cd287SJohn Baldwin 	    nc->nc_sqe.cid);
2931*ec0cd287SJohn Baldwin 
2932*ec0cd287SJohn Baldwin 	cb->cc = cc;
2933*ec0cd287SJohn Baldwin 	refcount_acquire(&cc->refs);
2934*ec0cd287SJohn Baldwin 
2935*ec0cd287SJohn Baldwin 	/*
2936*ec0cd287SJohn Baldwin 	 * If this command has too many active R2Ts or there are no
2937*ec0cd287SJohn Baldwin 	 * available transfer tags, queue the request for later.
2938*ec0cd287SJohn Baldwin 	 *
2939*ec0cd287SJohn Baldwin 	 * NB: maxr2t is 0's based.
2940*ec0cd287SJohn Baldwin 	 */
2941*ec0cd287SJohn Baldwin 	mtx_lock(&qp->rx_buffers.lock);
2942*ec0cd287SJohn Baldwin 	if (cc->active_r2ts > qp->maxr2t ||
2943*ec0cd287SJohn Baldwin 	    !nvmf_che_allocate_ttag(qp, cb)) {
2944*ec0cd287SJohn Baldwin #ifdef INVARIANTS
2945*ec0cd287SJohn Baldwin 		cc->pending_r2ts++;
2946*ec0cd287SJohn Baldwin #endif
2947*ec0cd287SJohn Baldwin 		TAILQ_INSERT_TAIL(&qp->rx_buffers.head, cb, link);
2948*ec0cd287SJohn Baldwin 		mtx_unlock(&qp->rx_buffers.lock);
2949*ec0cd287SJohn Baldwin 		return;
2950*ec0cd287SJohn Baldwin 	}
2951*ec0cd287SJohn Baldwin 	mtx_unlock(&qp->rx_buffers.lock);
2952*ec0cd287SJohn Baldwin 
2953*ec0cd287SJohn Baldwin 	che_send_r2t(qp, nc->nc_sqe.cid, cb->ttag, data_offset, io->io_len);
2954*ec0cd287SJohn Baldwin }
2955*ec0cd287SJohn Baldwin 
2956*ec0cd287SJohn Baldwin static void
che_receive_icd_data(struct nvmf_capsule * nc,uint32_t data_offset,struct nvmf_io_request * io)2957*ec0cd287SJohn Baldwin che_receive_icd_data(struct nvmf_capsule *nc, uint32_t data_offset,
2958*ec0cd287SJohn Baldwin     struct nvmf_io_request *io)
2959*ec0cd287SJohn Baldwin {
2960*ec0cd287SJohn Baldwin 	struct nvmf_che_capsule *cc = CCAP(nc);
2961*ec0cd287SJohn Baldwin 
2962*ec0cd287SJohn Baldwin 	/*
2963*ec0cd287SJohn Baldwin 	 * The header is in rx_pdu.m, the padding is discarded, and
2964*ec0cd287SJohn Baldwin 	 * the data starts at rx_pdu.m->m_next.
2965*ec0cd287SJohn Baldwin 	 */
2966*ec0cd287SJohn Baldwin 	mbuf_copyto_io(cc->rx_pdu.m->m_next, data_offset, io->io_len, io, 0);
2967*ec0cd287SJohn Baldwin 	nvmf_complete_io_request(io, io->io_len, 0);
2968*ec0cd287SJohn Baldwin }
2969*ec0cd287SJohn Baldwin 
2970*ec0cd287SJohn Baldwin static int
che_receive_controller_data(struct nvmf_capsule * nc,uint32_t data_offset,struct nvmf_io_request * io)2971*ec0cd287SJohn Baldwin che_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
2972*ec0cd287SJohn Baldwin     struct nvmf_io_request *io)
2973*ec0cd287SJohn Baldwin {
2974*ec0cd287SJohn Baldwin 	struct nvme_sgl_descriptor *sgl;
2975*ec0cd287SJohn Baldwin 	size_t data_len;
2976*ec0cd287SJohn Baldwin 
2977*ec0cd287SJohn Baldwin 	if (nc->nc_qe_len != sizeof(struct nvme_command) ||
2978*ec0cd287SJohn Baldwin 	    !nc->nc_qpair->nq_controller)
2979*ec0cd287SJohn Baldwin 		return (EINVAL);
2980*ec0cd287SJohn Baldwin 
2981*ec0cd287SJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
2982*ec0cd287SJohn Baldwin 	data_len = le32toh(sgl->length);
2983*ec0cd287SJohn Baldwin 	if (data_offset + io->io_len > data_len)
2984*ec0cd287SJohn Baldwin 		return (EFBIG);
2985*ec0cd287SJohn Baldwin 
2986*ec0cd287SJohn Baldwin 	if (sgl->type == NVME_SGL_TYPE_ICD)
2987*ec0cd287SJohn Baldwin 		che_receive_icd_data(nc, data_offset, io);
2988*ec0cd287SJohn Baldwin 	else
2989*ec0cd287SJohn Baldwin 		che_receive_r2t_data(nc, data_offset, io);
2990*ec0cd287SJohn Baldwin 	return (0);
2991*ec0cd287SJohn Baldwin }
2992*ec0cd287SJohn Baldwin 
2993*ec0cd287SJohn Baldwin /* NB: cid is little-endian already. */
2994*ec0cd287SJohn Baldwin static void
che_send_c2h_pdu(struct nvmf_che_qpair * qp,uint16_t cid,uint32_t data_offset,struct mbuf * m,size_t len,bool last_pdu,bool success)2995*ec0cd287SJohn Baldwin che_send_c2h_pdu(struct nvmf_che_qpair *qp, uint16_t cid, uint32_t data_offset,
2996*ec0cd287SJohn Baldwin     struct mbuf *m, size_t len, bool last_pdu, bool success)
2997*ec0cd287SJohn Baldwin {
2998*ec0cd287SJohn Baldwin 	struct nvme_tcp_c2h_data_hdr c2h;
2999*ec0cd287SJohn Baldwin 	struct mbuf *top;
3000*ec0cd287SJohn Baldwin 
3001*ec0cd287SJohn Baldwin 	memset(&c2h, 0, sizeof(c2h));
3002*ec0cd287SJohn Baldwin 	c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA;
3003*ec0cd287SJohn Baldwin 	if (last_pdu)
3004*ec0cd287SJohn Baldwin 		c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU;
3005*ec0cd287SJohn Baldwin 	if (success)
3006*ec0cd287SJohn Baldwin 		c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS;
3007*ec0cd287SJohn Baldwin 	c2h.cccid = cid;
3008*ec0cd287SJohn Baldwin 	c2h.datao = htole32(data_offset);
3009*ec0cd287SJohn Baldwin 	c2h.datal = htole32(len);
3010*ec0cd287SJohn Baldwin 
3011*ec0cd287SJohn Baldwin 	top = nvmf_che_construct_pdu(qp, &c2h, sizeof(c2h), m, len);
3012*ec0cd287SJohn Baldwin 	nvmf_che_write_pdu(qp, top);
3013*ec0cd287SJohn Baldwin }
3014*ec0cd287SJohn Baldwin 
3015*ec0cd287SJohn Baldwin static u_int
che_send_controller_data(struct nvmf_capsule * nc,uint32_t data_offset,struct mbuf * m,size_t len)3016*ec0cd287SJohn Baldwin che_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
3017*ec0cd287SJohn Baldwin     struct mbuf *m, size_t len)
3018*ec0cd287SJohn Baldwin {
3019*ec0cd287SJohn Baldwin 	struct nvmf_che_qpair *qp = CQP(nc->nc_qpair);
3020*ec0cd287SJohn Baldwin 	struct nvme_sgl_descriptor *sgl;
3021*ec0cd287SJohn Baldwin 	uint32_t data_len;
3022*ec0cd287SJohn Baldwin 	bool last_pdu, last_xfer;
3023*ec0cd287SJohn Baldwin 
3024*ec0cd287SJohn Baldwin 	if (nc->nc_qe_len != sizeof(struct nvme_command) ||
3025*ec0cd287SJohn Baldwin 	    !qp->qp.nq_controller) {
3026*ec0cd287SJohn Baldwin 		m_freem(m);
3027*ec0cd287SJohn Baldwin 		return (NVME_SC_INVALID_FIELD);
3028*ec0cd287SJohn Baldwin 	}
3029*ec0cd287SJohn Baldwin 
3030*ec0cd287SJohn Baldwin 	sgl = &nc->nc_sqe.sgl;
3031*ec0cd287SJohn Baldwin 	data_len = le32toh(sgl->length);
3032*ec0cd287SJohn Baldwin 	if (data_offset + len > data_len) {
3033*ec0cd287SJohn Baldwin 		m_freem(m);
3034*ec0cd287SJohn Baldwin 		return (NVME_SC_INVALID_FIELD);
3035*ec0cd287SJohn Baldwin 	}
3036*ec0cd287SJohn Baldwin 	last_xfer = (data_offset + len == data_len);
3037*ec0cd287SJohn Baldwin 
3038*ec0cd287SJohn Baldwin 	if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) {
3039*ec0cd287SJohn Baldwin 		m_freem(m);
3040*ec0cd287SJohn Baldwin 		return (NVME_SC_INVALID_FIELD);
3041*ec0cd287SJohn Baldwin 	}
3042*ec0cd287SJohn Baldwin 
3043*ec0cd287SJohn Baldwin 	KASSERT(data_offset == CCAP(nc)->tx_data_offset,
3044*ec0cd287SJohn Baldwin 	    ("%s: starting data_offset %u doesn't match end of previous xfer %u",
3045*ec0cd287SJohn Baldwin 	    __func__, data_offset, CCAP(nc)->tx_data_offset));
3046*ec0cd287SJohn Baldwin 
3047*ec0cd287SJohn Baldwin 	/* Queue one or more C2H_DATA PDUs containing the data from 'm'. */
3048*ec0cd287SJohn Baldwin 	while (m != NULL) {
3049*ec0cd287SJohn Baldwin 		struct mbuf *n;
3050*ec0cd287SJohn Baldwin 		uint32_t todo;
3051*ec0cd287SJohn Baldwin 
3052*ec0cd287SJohn Baldwin 		if (m->m_len > qp->max_tx_data) {
3053*ec0cd287SJohn Baldwin 			n = m_split(m, qp->max_tx_data, M_WAITOK);
3054*ec0cd287SJohn Baldwin 			todo = m->m_len;
3055*ec0cd287SJohn Baldwin 		} else {
3056*ec0cd287SJohn Baldwin 			struct mbuf *p;
3057*ec0cd287SJohn Baldwin 
3058*ec0cd287SJohn Baldwin 			todo = m->m_len;
3059*ec0cd287SJohn Baldwin 			p = m;
3060*ec0cd287SJohn Baldwin 			n = p->m_next;
3061*ec0cd287SJohn Baldwin 			while (n != NULL) {
3062*ec0cd287SJohn Baldwin 				if (todo + n->m_len > qp->max_tx_data) {
3063*ec0cd287SJohn Baldwin 					p->m_next = NULL;
3064*ec0cd287SJohn Baldwin 					break;
3065*ec0cd287SJohn Baldwin 				}
3066*ec0cd287SJohn Baldwin 				todo += n->m_len;
3067*ec0cd287SJohn Baldwin 				p = n;
3068*ec0cd287SJohn Baldwin 				n = p->m_next;
3069*ec0cd287SJohn Baldwin 			}
3070*ec0cd287SJohn Baldwin 			MPASS(m_length(m, NULL) == todo);
3071*ec0cd287SJohn Baldwin 		}
3072*ec0cd287SJohn Baldwin 
3073*ec0cd287SJohn Baldwin 		last_pdu = (n == NULL && last_xfer);
3074*ec0cd287SJohn Baldwin 		che_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, m, todo,
3075*ec0cd287SJohn Baldwin 		    last_pdu, last_pdu && qp->send_success);
3076*ec0cd287SJohn Baldwin 
3077*ec0cd287SJohn Baldwin 		data_offset += todo;
3078*ec0cd287SJohn Baldwin 		data_len -= todo;
3079*ec0cd287SJohn Baldwin 		m = n;
3080*ec0cd287SJohn Baldwin 	}
3081*ec0cd287SJohn Baldwin 	MPASS(data_len == 0);
3082*ec0cd287SJohn Baldwin 
3083*ec0cd287SJohn Baldwin #ifdef INVARIANTS
3084*ec0cd287SJohn Baldwin 	CCAP(nc)->tx_data_offset = data_offset;
3085*ec0cd287SJohn Baldwin #endif
3086*ec0cd287SJohn Baldwin 	if (!last_xfer)
3087*ec0cd287SJohn Baldwin 		return (NVMF_MORE);
3088*ec0cd287SJohn Baldwin 	else if (qp->send_success)
3089*ec0cd287SJohn Baldwin 		return (NVMF_SUCCESS_SENT);
3090*ec0cd287SJohn Baldwin 	else
3091*ec0cd287SJohn Baldwin 		return (NVME_SC_SUCCESS);
3092*ec0cd287SJohn Baldwin }
3093*ec0cd287SJohn Baldwin 
3094*ec0cd287SJohn Baldwin struct nvmf_transport_ops che_ops = {
3095*ec0cd287SJohn Baldwin 	.allocate_qpair = che_allocate_qpair,
3096*ec0cd287SJohn Baldwin 	.free_qpair = che_free_qpair,
3097*ec0cd287SJohn Baldwin 	.max_ioccsz = che_max_ioccsz,
3098*ec0cd287SJohn Baldwin 	.max_xfer_size = che_max_xfer_size,
3099*ec0cd287SJohn Baldwin 	.allocate_capsule = che_allocate_capsule,
3100*ec0cd287SJohn Baldwin 	.free_capsule = che_free_capsule,
3101*ec0cd287SJohn Baldwin 	.transmit_capsule = che_transmit_capsule,
3102*ec0cd287SJohn Baldwin 	.validate_command_capsule = che_validate_command_capsule,
3103*ec0cd287SJohn Baldwin 	.capsule_data_len = che_capsule_data_len,
3104*ec0cd287SJohn Baldwin 	.receive_controller_data = che_receive_controller_data,
3105*ec0cd287SJohn Baldwin 	.send_controller_data = che_send_controller_data,
3106*ec0cd287SJohn Baldwin 	.trtype = NVMF_TRTYPE_TCP,
3107*ec0cd287SJohn Baldwin 	.priority = 10,
3108*ec0cd287SJohn Baldwin };
3109*ec0cd287SJohn Baldwin 
3110*ec0cd287SJohn Baldwin NVMF_TRANSPORT(che, che_ops);
3111*ec0cd287SJohn Baldwin 
3112*ec0cd287SJohn Baldwin static void
read_pdu_limits(struct adapter * sc,u_int * max_tx_pdu_len,uint32_t * max_rx_pdu_len)3113*ec0cd287SJohn Baldwin read_pdu_limits(struct adapter *sc, u_int *max_tx_pdu_len,
3114*ec0cd287SJohn Baldwin     uint32_t *max_rx_pdu_len)
3115*ec0cd287SJohn Baldwin {
3116*ec0cd287SJohn Baldwin 	uint32_t tx_len, rx_len, r, v;
3117*ec0cd287SJohn Baldwin 
3118*ec0cd287SJohn Baldwin 	/* Copied from cxgbei, but not sure if this is correct. */
3119*ec0cd287SJohn Baldwin 	rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE);
3120*ec0cd287SJohn Baldwin 	tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
3121*ec0cd287SJohn Baldwin 
3122*ec0cd287SJohn Baldwin 	r = t4_read_reg(sc, A_TP_PARA_REG2);
3123*ec0cd287SJohn Baldwin 	rx_len = min(rx_len, G_MAXRXDATA(r));
3124*ec0cd287SJohn Baldwin 	tx_len = min(tx_len, G_MAXRXDATA(r));
3125*ec0cd287SJohn Baldwin 
3126*ec0cd287SJohn Baldwin 	r = t4_read_reg(sc, A_TP_PARA_REG7);
3127*ec0cd287SJohn Baldwin 	v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
3128*ec0cd287SJohn Baldwin 	rx_len = min(rx_len, v);
3129*ec0cd287SJohn Baldwin 	tx_len = min(tx_len, v);
3130*ec0cd287SJohn Baldwin 
3131*ec0cd287SJohn Baldwin 	/* Cannot be larger than 32KB - 256. */
3132*ec0cd287SJohn Baldwin 	rx_len = min(rx_len, 32512);
3133*ec0cd287SJohn Baldwin 	tx_len = min(tx_len, 32512);
3134*ec0cd287SJohn Baldwin 
3135*ec0cd287SJohn Baldwin 	*max_tx_pdu_len = tx_len;
3136*ec0cd287SJohn Baldwin 	*max_rx_pdu_len = rx_len;
3137*ec0cd287SJohn Baldwin }
3138*ec0cd287SJohn Baldwin 
3139*ec0cd287SJohn Baldwin static int
nvmf_che_init(struct adapter * sc,struct nvmf_che_adapter * nca)3140*ec0cd287SJohn Baldwin nvmf_che_init(struct adapter *sc, struct nvmf_che_adapter *nca)
3141*ec0cd287SJohn Baldwin {
3142*ec0cd287SJohn Baldwin 	struct sysctl_oid *oid;
3143*ec0cd287SJohn Baldwin 	struct sysctl_oid_list *children;
3144*ec0cd287SJohn Baldwin 	uint32_t val;
3145*ec0cd287SJohn Baldwin 
3146*ec0cd287SJohn Baldwin 	read_pdu_limits(sc, &nca->max_transmit_pdu, &nca->max_receive_pdu);
3147*ec0cd287SJohn Baldwin 	if (nca->max_transmit_pdu > che_max_transmit_pdu)
3148*ec0cd287SJohn Baldwin 		nca->max_transmit_pdu = che_max_transmit_pdu;
3149*ec0cd287SJohn Baldwin 	if (nca->max_receive_pdu > che_max_receive_pdu)
3150*ec0cd287SJohn Baldwin 		nca->max_receive_pdu = che_max_receive_pdu;
3151*ec0cd287SJohn Baldwin 	val = t4_read_reg(sc, A_SGE_CONTROL2);
3152*ec0cd287SJohn Baldwin 	nca->nvmt_data_iqe = (val & F_RXCPLMODE_NVMT) != 0;
3153*ec0cd287SJohn Baldwin 
3154*ec0cd287SJohn Baldwin 	sysctl_ctx_init(&nca->ctx);
3155*ec0cd287SJohn Baldwin 	oid = device_get_sysctl_tree(sc->dev);	/* dev.che.X */
3156*ec0cd287SJohn Baldwin 	children = SYSCTL_CHILDREN(oid);
3157*ec0cd287SJohn Baldwin 
3158*ec0cd287SJohn Baldwin 	oid = SYSCTL_ADD_NODE(&nca->ctx, children, OID_AUTO, "nvme",
3159*ec0cd287SJohn Baldwin 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "NVMe ULP settings");
3160*ec0cd287SJohn Baldwin 	children = SYSCTL_CHILDREN(oid);
3161*ec0cd287SJohn Baldwin 
3162*ec0cd287SJohn Baldwin 	nca->ddp_threshold = 8192;
3163*ec0cd287SJohn Baldwin 	SYSCTL_ADD_UINT(&nca->ctx, children, OID_AUTO, "ddp_threshold",
3164*ec0cd287SJohn Baldwin 	    CTLFLAG_RW, &nca->ddp_threshold, 0, "Rx zero copy threshold");
3165*ec0cd287SJohn Baldwin 
3166*ec0cd287SJohn Baldwin 	SYSCTL_ADD_UINT(&nca->ctx, children, OID_AUTO, "max_transmit_pdu",
3167*ec0cd287SJohn Baldwin 	    CTLFLAG_RW, &nca->max_transmit_pdu, 0,
3168*ec0cd287SJohn Baldwin 	    "Maximum size of a transmitted PDU");
3169*ec0cd287SJohn Baldwin 
3170*ec0cd287SJohn Baldwin 	SYSCTL_ADD_UINT(&nca->ctx, children, OID_AUTO, "max_receive_pdu",
3171*ec0cd287SJohn Baldwin 	    CTLFLAG_RW, &nca->max_receive_pdu, 0,
3172*ec0cd287SJohn Baldwin 	    "Maximum size of a received PDU");
3173*ec0cd287SJohn Baldwin 
3174*ec0cd287SJohn Baldwin 	return (0);
3175*ec0cd287SJohn Baldwin }
3176*ec0cd287SJohn Baldwin 
3177*ec0cd287SJohn Baldwin static void
nvmf_che_destroy(struct nvmf_che_adapter * nca)3178*ec0cd287SJohn Baldwin nvmf_che_destroy(struct nvmf_che_adapter *nca)
3179*ec0cd287SJohn Baldwin {
3180*ec0cd287SJohn Baldwin 	sysctl_ctx_free(&nca->ctx);
3181*ec0cd287SJohn Baldwin 	free(nca, M_CXGBE);
3182*ec0cd287SJohn Baldwin }
3183*ec0cd287SJohn Baldwin 
3184*ec0cd287SJohn Baldwin static int
nvmf_che_activate(struct adapter * sc)3185*ec0cd287SJohn Baldwin nvmf_che_activate(struct adapter *sc)
3186*ec0cd287SJohn Baldwin {
3187*ec0cd287SJohn Baldwin 	struct nvmf_che_adapter *nca;
3188*ec0cd287SJohn Baldwin 	int rc;
3189*ec0cd287SJohn Baldwin 
3190*ec0cd287SJohn Baldwin 	ASSERT_SYNCHRONIZED_OP(sc);
3191*ec0cd287SJohn Baldwin 
3192*ec0cd287SJohn Baldwin 	if (uld_active(sc, ULD_NVME)) {
3193*ec0cd287SJohn Baldwin 		KASSERT(0, ("%s: NVMe offload already enabled on adapter %p",
3194*ec0cd287SJohn Baldwin 		    __func__, sc));
3195*ec0cd287SJohn Baldwin 		return (0);
3196*ec0cd287SJohn Baldwin 	}
3197*ec0cd287SJohn Baldwin 
3198*ec0cd287SJohn Baldwin 	if ((sc->nvmecaps & FW_CAPS_CONFIG_NVME_TCP) == 0) {
3199*ec0cd287SJohn Baldwin 		device_printf(sc->dev,
3200*ec0cd287SJohn Baldwin 		    "not NVMe offload capable, or capability disabled\n");
3201*ec0cd287SJohn Baldwin 		return (ENOSYS);
3202*ec0cd287SJohn Baldwin 	}
3203*ec0cd287SJohn Baldwin 
3204*ec0cd287SJohn Baldwin 	/* per-adapter softc for NVMe */
3205*ec0cd287SJohn Baldwin 	nca = malloc(sizeof(*nca), M_CXGBE, M_ZERO | M_WAITOK);
3206*ec0cd287SJohn Baldwin 	nca->sc = sc;
3207*ec0cd287SJohn Baldwin 
3208*ec0cd287SJohn Baldwin 	rc = nvmf_che_init(sc, nca);
3209*ec0cd287SJohn Baldwin 	if (rc != 0) {
3210*ec0cd287SJohn Baldwin 		free(nca, M_CXGBE);
3211*ec0cd287SJohn Baldwin 		return (rc);
3212*ec0cd287SJohn Baldwin 	}
3213*ec0cd287SJohn Baldwin 
3214*ec0cd287SJohn Baldwin 	sc->nvme_ulp_softc = nca;
3215*ec0cd287SJohn Baldwin 
3216*ec0cd287SJohn Baldwin 	return (0);
3217*ec0cd287SJohn Baldwin }
3218*ec0cd287SJohn Baldwin 
3219*ec0cd287SJohn Baldwin static int
nvmf_che_deactivate(struct adapter * sc)3220*ec0cd287SJohn Baldwin nvmf_che_deactivate(struct adapter *sc)
3221*ec0cd287SJohn Baldwin {
3222*ec0cd287SJohn Baldwin 	struct nvmf_che_adapter *nca = sc->nvme_ulp_softc;
3223*ec0cd287SJohn Baldwin 
3224*ec0cd287SJohn Baldwin 	ASSERT_SYNCHRONIZED_OP(sc);
3225*ec0cd287SJohn Baldwin 
3226*ec0cd287SJohn Baldwin 	if (nca != NULL) {
3227*ec0cd287SJohn Baldwin 		nvmf_che_destroy(nca);
3228*ec0cd287SJohn Baldwin 		sc->nvme_ulp_softc = NULL;
3229*ec0cd287SJohn Baldwin 	}
3230*ec0cd287SJohn Baldwin 
3231*ec0cd287SJohn Baldwin 	return (0);
3232*ec0cd287SJohn Baldwin }
3233*ec0cd287SJohn Baldwin 
3234*ec0cd287SJohn Baldwin static void
nvmf_che_activate_all(struct adapter * sc,void * arg __unused)3235*ec0cd287SJohn Baldwin nvmf_che_activate_all(struct adapter *sc, void *arg __unused)
3236*ec0cd287SJohn Baldwin {
3237*ec0cd287SJohn Baldwin 	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t7nvact") != 0)
3238*ec0cd287SJohn Baldwin 		return;
3239*ec0cd287SJohn Baldwin 
3240*ec0cd287SJohn Baldwin 	/* Activate NVMe if any port on this adapter has IFCAP_TOE enabled. */
3241*ec0cd287SJohn Baldwin 	if (sc->offload_map && !uld_active(sc, ULD_NVME))
3242*ec0cd287SJohn Baldwin 		(void) t4_activate_uld(sc, ULD_NVME);
3243*ec0cd287SJohn Baldwin 
3244*ec0cd287SJohn Baldwin 	end_synchronized_op(sc, 0);
3245*ec0cd287SJohn Baldwin }
3246*ec0cd287SJohn Baldwin 
3247*ec0cd287SJohn Baldwin static void
nvmf_che_deactivate_all(struct adapter * sc,void * arg __unused)3248*ec0cd287SJohn Baldwin nvmf_che_deactivate_all(struct adapter *sc, void *arg __unused)
3249*ec0cd287SJohn Baldwin {
3250*ec0cd287SJohn Baldwin 	if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t7nvdea") != 0)
3251*ec0cd287SJohn Baldwin 		return;
3252*ec0cd287SJohn Baldwin 
3253*ec0cd287SJohn Baldwin 	if (uld_active(sc, ULD_NVME))
3254*ec0cd287SJohn Baldwin 	    (void) t4_deactivate_uld(sc, ULD_NVME);
3255*ec0cd287SJohn Baldwin 
3256*ec0cd287SJohn Baldwin 	end_synchronized_op(sc, 0);
3257*ec0cd287SJohn Baldwin }
3258*ec0cd287SJohn Baldwin 
3259*ec0cd287SJohn Baldwin static struct uld_info nvmf_che_uld_info = {
3260*ec0cd287SJohn Baldwin 	.uld_activate = nvmf_che_activate,
3261*ec0cd287SJohn Baldwin 	.uld_deactivate = nvmf_che_deactivate,
3262*ec0cd287SJohn Baldwin };
3263*ec0cd287SJohn Baldwin 
3264*ec0cd287SJohn Baldwin static int
nvmf_che_mod_load(void)3265*ec0cd287SJohn Baldwin nvmf_che_mod_load(void)
3266*ec0cd287SJohn Baldwin {
3267*ec0cd287SJohn Baldwin 	int rc;
3268*ec0cd287SJohn Baldwin 
3269*ec0cd287SJohn Baldwin 	t4_register_cpl_handler(CPL_NVMT_CMP, do_nvmt_cmp);
3270*ec0cd287SJohn Baldwin 	t4_register_cpl_handler(CPL_NVMT_DATA, do_nvmt_data);
3271*ec0cd287SJohn Baldwin 
3272*ec0cd287SJohn Baldwin 	rc = t4_register_uld(&nvmf_che_uld_info, ULD_NVME);
3273*ec0cd287SJohn Baldwin 	if (rc != 0)
3274*ec0cd287SJohn Baldwin 		return (rc);
3275*ec0cd287SJohn Baldwin 
3276*ec0cd287SJohn Baldwin 	t4_iterate(nvmf_che_activate_all, NULL);
3277*ec0cd287SJohn Baldwin 
3278*ec0cd287SJohn Baldwin 	return (rc);
3279*ec0cd287SJohn Baldwin }
3280*ec0cd287SJohn Baldwin 
3281*ec0cd287SJohn Baldwin static int
nvmf_che_mod_unload(void)3282*ec0cd287SJohn Baldwin nvmf_che_mod_unload(void)
3283*ec0cd287SJohn Baldwin {
3284*ec0cd287SJohn Baldwin 	t4_iterate(nvmf_che_deactivate_all, NULL);
3285*ec0cd287SJohn Baldwin 
3286*ec0cd287SJohn Baldwin 	if (t4_unregister_uld(&nvmf_che_uld_info, ULD_NVME) == EBUSY)
3287*ec0cd287SJohn Baldwin 		return (EBUSY);
3288*ec0cd287SJohn Baldwin 
3289*ec0cd287SJohn Baldwin 	t4_register_cpl_handler(CPL_NVMT_CMP, NULL);
3290*ec0cd287SJohn Baldwin 	t4_register_cpl_handler(CPL_NVMT_DATA, NULL);
3291*ec0cd287SJohn Baldwin 
3292*ec0cd287SJohn Baldwin 	return (0);
3293*ec0cd287SJohn Baldwin }
3294*ec0cd287SJohn Baldwin #endif
3295*ec0cd287SJohn Baldwin 
3296*ec0cd287SJohn Baldwin static int
nvmf_che_modevent(module_t mod,int cmd,void * arg)3297*ec0cd287SJohn Baldwin nvmf_che_modevent(module_t mod, int cmd, void *arg)
3298*ec0cd287SJohn Baldwin {
3299*ec0cd287SJohn Baldwin 	int rc;
3300*ec0cd287SJohn Baldwin 
3301*ec0cd287SJohn Baldwin #ifdef TCP_OFFLOAD
3302*ec0cd287SJohn Baldwin 	switch (cmd) {
3303*ec0cd287SJohn Baldwin 	case MOD_LOAD:
3304*ec0cd287SJohn Baldwin 		rc = nvmf_che_mod_load();
3305*ec0cd287SJohn Baldwin 		break;
3306*ec0cd287SJohn Baldwin 	case MOD_UNLOAD:
3307*ec0cd287SJohn Baldwin 		rc = nvmf_che_mod_unload();
3308*ec0cd287SJohn Baldwin 		break;
3309*ec0cd287SJohn Baldwin 	default:
3310*ec0cd287SJohn Baldwin 		rc = EOPNOTSUPP;
3311*ec0cd287SJohn Baldwin 		break;
3312*ec0cd287SJohn Baldwin 	}
3313*ec0cd287SJohn Baldwin #else
3314*ec0cd287SJohn Baldwin 	printf("nvmf_che: compiled without TCP_OFFLOAD support.\n");
3315*ec0cd287SJohn Baldwin 	rc = EOPNOTSUPP;
3316*ec0cd287SJohn Baldwin #endif
3317*ec0cd287SJohn Baldwin 
3318*ec0cd287SJohn Baldwin 	return (rc);
3319*ec0cd287SJohn Baldwin }
3320*ec0cd287SJohn Baldwin 
3321*ec0cd287SJohn Baldwin static moduledata_t nvmf_che_mod = {
3322*ec0cd287SJohn Baldwin 	"nvmf_che",
3323*ec0cd287SJohn Baldwin 	nvmf_che_modevent,
3324*ec0cd287SJohn Baldwin 	NULL,
3325*ec0cd287SJohn Baldwin };
3326*ec0cd287SJohn Baldwin 
3327*ec0cd287SJohn Baldwin MODULE_VERSION(nvmf_che, 1);
3328*ec0cd287SJohn Baldwin DECLARE_MODULE(nvmf_che, nvmf_che_mod, SI_SUB_EXEC, SI_ORDER_ANY);
3329*ec0cd287SJohn Baldwin MODULE_DEPEND(nvmf_che, t4_tom, 1, 1, 1);
3330*ec0cd287SJohn Baldwin MODULE_DEPEND(nvmf_che, cxgbe, 1, 1, 1);
3331